Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
8cd41f8
feat: add ai-cache plugin to installation and configuration
janiussyafiq Jun 19, 2026
1ea1aaa
feat: implement ai-cache plugin with Redis support and testing
janiussyafiq Jun 19, 2026
5c04222
Merge remote-tracking branch 'upstream/master' into feat/ai-cache-exact
janiussyafiq Jun 22, 2026
2d7eb3b
fix(ai-cache): canonical-encode fingerprint and switch bypass to bypa…
janiussyafiq Jun 23, 2026
d91e68a
feat(ai-cache): enhance body filter to handle oversized responses and…
janiussyafiq Jun 23, 2026
652a89f
feat(ai-cache): optimize body caching logic and enforce header valida…
janiussyafiq Jun 23, 2026
4ac2398
docs(ai-cache): add English and Chinese plugin documentation
janiussyafiq Jun 23, 2026
84c5ccf
feat(ai-cache): implement canonical JSON encoding and enhance cache k…
janiussyafiq Jun 23, 2026
9024b70
feat(ai-cache): update tests for exact.ttl validation and add cross-r…
janiussyafiq Jun 23, 2026
6f15de7
fix(json): remove redundant require statement in json.lua
janiussyafiq Jun 24, 2026
4775bfc
feat(ai-cache): enhance error handling for unsupported requests and i…
janiussyafiq Jun 24, 2026
edf5b51
Merge remote-tracking branch 'upstream/master' into feat/ai-cache-exact
janiussyafiq Jun 24, 2026
d6c1570
feat(ai-cache): add model to cache key generation and update test cases
janiussyafiq Jun 25, 2026
db70638
feat(ai-cache): enhance fingerprinting to include client request and …
janiussyafiq Jun 26, 2026
95a9243
feat(ai-cache): refine cache key generation and enhance documentation…
janiussyafiq Jun 26, 2026
feabb7f
style(ai-cache): localize tostring global in key.lua
janiussyafiq Jun 26, 2026
b4a6e9b
style(ai-cache): drop trailing whitespace in ai-cache.t
janiussyafiq Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,9 @@ install: runtime
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-cache
$(ENV_INSTALL) apisix/plugins/ai-cache/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-cache

$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
$(ENV_INSTALL) apisix/plugins/ai-lakera-guard/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard

Expand Down
1 change: 1 addition & 0 deletions apisix/cli/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ local _M = {
"ai-rate-limiting",
"ai-proxy-multi",
"ai-proxy",
"ai-cache",
"ai-aws-content-moderation",
"ai-aliyun-content-moderation",
"ai-lakera-guard",
Expand Down
48 changes: 48 additions & 0 deletions apisix/core/json.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,19 @@ local json_encode = cjson.encode
local json_decode = cjson.decode
local cjson_null = cjson.null
local clear_tab = require("table.clear")
local require = require
local ngx = ngx
local tostring = tostring
local type = type
local pairs = pairs
local ipairs = ipairs
local getmetatable = getmetatable
local cached_tab = {}

local rapidjson
local rapidjson_null
local rapidjson_encode_opts = { sort_keys = true }


cjson.encode_escape_forward_slash(false)
cjson.decode_array_with_array_mt(true)
Expand Down Expand Up @@ -122,6 +129,47 @@ local function encode(data, force)
end
_M.encode = encode


local function to_rapidjson_value(data)
if data == cjson_null then
return rapidjson_null
end

if type(data) ~= "table" then
return data
end

if getmetatable(data) == cjson.array_mt then
local arr = {}
for i, v in ipairs(data) do
arr[i] = to_rapidjson_value(v)
end
return rapidjson.array(arr)
end

local obj = {}
for k, v in pairs(data) do
obj[k] = to_rapidjson_value(v)
end
return obj
end


--- Encode a Lua value to a canonical JSON string with sorted object keys.
-- Unlike core.json.encode, object keys are emitted in a stable (sorted) order,
-- so the same logical value always produces the same string -- suitable for
-- hashing, cache keys and signatures. cjson null / array_mt markers are
-- preserved. Backed by rapidjson, which is loaded on first use.
-- @tparam table data The value to encode.
-- @treturn string The canonically-encoded JSON string.
function _M.canonical_encode(data)
if not rapidjson then
rapidjson = require("rapidjson")
rapidjson_null = rapidjson.null
end
return rapidjson.encode(to_rapidjson_value(data), rapidjson_encode_opts)
end

local max_delay_encode_items = 16
local delay_tab_idx = 0
local delay_tab_arr = {}
Expand Down
228 changes: 228 additions & 0 deletions apisix/plugins/ai-cache.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--

local core = require("apisix.core")
local schema = require("apisix.plugins.ai-cache.schema")
local key_mod = require("apisix.plugins.ai-cache.key")
local binding = require("apisix.plugins.ai-protocols.binding")
local redis_util = require("apisix.utils.redis")

local ngx = ngx
local ngx_null = ngx.null
local ipairs = ipairs
local concat = table.concat

local CACHE_STATUS_HEADER = "X-AI-Cache-Status"
local CACHE_AGE_HEADER = "X-AI-Cache-Age"
local DEFAULT_TTL = 3600
local DEFAULT_MAX_BODY = 1048576

local _M = {
version = 0.1,
priority = 1035,
name = "ai-cache",
schema = schema,
}


function _M.check_schema(conf)
return core.schema.check(schema, conf)
end


local function release(conf, red)
local ok, err = red:set_keepalive(conf.redis_keepalive_timeout or 10000,
conf.redis_keepalive_pool or 100)
if not ok then
core.log.warn("ai-cache: failed to set redis keepalive: ", err)
end
end


local function serve_hit(conf, ctx, cached)
ctx.ai_cache_status = "HIT"
if conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, "HIT")
local age = ngx.time() - (cached.created_at or ngx.time())
core.response.set_header(CACHE_AGE_HEADER, age < 0 and 0 or age)
end
core.response.set_header("Content-Type", "application/json")
return core.response.exit(200, cached.body)
end
Comment thread
janiussyafiq marked this conversation as resolved.


function _M.access(conf, ctx)
if not ctx.picked_ai_instance then
local handled, code, body = binding.on_unsupported(
conf.fail_mode, _M.name, ctx,
"no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
500, "ai-cache must be used with the ai-proxy or ai-proxy-multi plugin")
if handled then
return code, body
end
ctx.ai_cache_status = "BYPASS"
return
end

-- Streaming responses are not cached in PR-1 (SSE replay is a later
-- increment). ai-proxy (higher priority) has already classified the
-- request, so bypass before doing any work.
if ctx.var.request_type == "ai_stream" then
ctx.ai_cache_status = "BYPASS"
return
end

if conf.bypass_on then
for _, rule in ipairs(conf.bypass_on) do
if core.request.header(ctx, rule.header) == rule.equals then
ctx.ai_cache_status = "BYPASS"
return
end
end
end

local body, err = core.request.get_json_request_body_table()
if not body then
core.log.warn("ai-cache: cannot read request body, bypassing: ", err)
ctx.ai_cache_status = "BYPASS"
return
end

ctx.ai_cache_fingerprint = key_mod.fingerprint(ctx, body)
ctx.ai_cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
-- Remember which instance the fingerprint was computed for. ai-proxy-multi
-- may fall back to a different instance in before_proxy; the log phase uses
-- this to avoid writing that fallback response under the original key.
ctx.ai_cache_picked_at_access = ctx.picked_ai_instance

local red
red, err = redis_util.new(conf)
if not red then
-- fail-open: never let a cache-backend outage break the request.
core.log.warn("ai-cache: redis unavailable, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end

local res
res, err = red:get(ctx.ai_cache_key)
if err then
red:close()
core.log.warn("ai-cache: redis get failed, fail-open as MISS: ", err)
ctx.ai_cache_status = "MISS"
return
end
release(conf, red)

if res ~= nil and res ~= ngx_null then
local cached = core.json.decode(res)
if cached and cached.body then
return serve_hit(conf, ctx, cached)
end
core.log.warn("ai-cache: discarding malformed cache entry for ", ctx.ai_cache_key)
end

ctx.ai_cache_status = "MISS"
end


function _M.header_filter(conf, ctx)
if ctx.ai_cache_status and conf.cache_headers ~= false then
core.response.set_header(CACHE_STATUS_HEADER, ctx.ai_cache_status)
end
end


function _M.body_filter(conf, ctx)
-- only a MISS gets written back; HIT exited in access, BYPASS opts out.
if ctx.ai_cache_status ~= "MISS" or ctx.ai_cache_oversized then
return
end
local chunk = ngx.arg[1]
if chunk and #chunk > 0 then
local buf = ctx.ai_cache_buf
if not buf then
buf = { n = 0, bytes = 0 }
ctx.ai_cache_buf = buf
end
local n = buf.n + 1
buf.n = n
buf[n] = chunk
buf.bytes = buf.bytes + #chunk
if buf.bytes > (conf.max_cache_body_size or DEFAULT_MAX_BODY) then
ctx.ai_cache_buf = nil
ctx.ai_cache_oversized = true
end
end
end


-- The response-capturing phases (body_filter / log) run in contexts where
-- cosockets are disabled, so the Redis write is deferred to a 0-delay timer
-- (timers run in a light thread where cosockets are allowed).
local function write_to_cache(premature, conf, cache_key, response_body)
if premature then
return
end
local red, err = redis_util.new(conf)
if not red then
core.log.warn("ai-cache: redis unavailable on write: ", err)
return
end
local envelope = core.json.encode({ body = response_body, created_at = ngx.time() })
local ttl = (conf.exact and conf.exact.ttl) or DEFAULT_TTL
local ok
ok, err = red:set(cache_key, envelope, "EX", ttl)
if not ok then
red:close()
core.log.warn("ai-cache: redis set failed: ", err)
return
end
release(conf, red)
Comment thread
janiussyafiq marked this conversation as resolved.
end


function _M.log(conf, ctx)
if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_fingerprint then
return
end
-- ai-proxy-multi may reassign the picked instance on fallback/retry during
-- before_proxy. The frozen fingerprint identifies the ORIGINAL instance, so a
-- response actually produced by a different (fallback) instance must not be
-- written under it -- that would replay the wrong instance's response on a
-- later hit.
if ctx.picked_ai_instance ~= ctx.ai_cache_picked_at_access then
return
end
if ngx.status ~= 200 then
return
end
Comment thread
janiussyafiq marked this conversation as resolved.
local buf = ctx.ai_cache_buf
if not buf or buf.bytes == 0 then
return
end
local response_body = concat(buf, "", 1, buf.n)

local cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
local ok, err = ngx.timer.at(0, write_to_cache, conf, cache_key, response_body)
if not ok then
core.log.warn("ai-cache: failed to schedule cache write: ", err)
end
end


return _M
Loading
Loading