From 7d38046a82495a19d2e71f2348fe0d97ce4592c4 Mon Sep 17 00:00:00 2001 From: poluzhao Date: Thu, 7 May 2026 11:57:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=9F=E5=9B=A0=E5=88=86=E6=9E=90=EF=BC=9A?= =?UTF-8?q?=20=E4=B9=8B=E5=89=8D=E7=9A=84=E4=BB=A3=E7=A0=81=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E4=BA=86=E4=B8=80=E7=A7=8D=E6=89=80=E8=B0=93=E7=9A=84?= =?UTF-8?q?=E2=80=9C=E9=A1=B6=E5=B1=82=E8=87=AA=E5=8A=A8=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E2=80=9D=E9=9D=9E=E6=A0=87=E5=87=86=E5=86=99=E6=B3=95=EF=BC=88?= =?UTF-8?q?=E5=9C=A8=E8=AF=B7=E6=B1=82=E9=A1=B6=E5=B1=82=E7=9B=B4=E6=8E=A5?= =?UTF-8?q?=E5=A1=9E=E5=85=A5=E4=B8=80=E4=B8=AA=E5=85=A8=E5=B1=80=E7=9A=84?= =?UTF-8?q?=20cache=5Fcontrol=EF=BC=89=EF=BC=8C=E4=BD=86=E5=BE=88=E5=A4=9A?= =?UTF-8?q?=E4=B8=AD=E8=BD=AC=E7=AB=99=E6=88=96=20Anthropic=20=E5=AE=98?= =?UTF-8?q?=E6=96=B9=20API=20=E5=B9=B6=E4=B8=8D=E5=85=BC=E5=AE=B9=E8=BF=99?= =?UTF-8?q?=E7=A7=8D=E6=A0=BC=E5=BC=8F=E3=80=82=E8=BF=99=E4=BC=9A=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E5=9C=A8=E8=A7=A3=E6=9E=90=E5=92=8C=E8=BD=AC=E5=8F=91?= =?UTF-8?q?=E7=9A=84=E8=BF=87=E7=A8=8B=E4=B8=AD=EF=BC=8C=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=9C=B0=E7=BB=99=E5=A4=9A=E4=B8=AA=E5=86=85=E5=AE=B9=E5=9D=97?= =?UTF-8?q?=E6=89=93=E4=B8=8A=E4=BA=86=E7=BC=93=E5=AD=98=E6=A0=87=E8=AE=B0?= =?UTF-8?q?=EF=BC=8C=E6=9C=80=E7=BB=88=E8=B6=85=E8=BF=87=E4=BA=86=E5=AE=98?= =?UTF-8?q?=E6=96=B9=E9=99=90=E5=88=B6=E7=9A=84=E2=80=9C=E6=9C=80=E5=A4=9A?= =?UTF-8?q?=204=20=E4=B8=AA=E2=80=9D=E4=B8=8A=E9=99=90=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复方案: 我重写了 adapters/cc_anthropic_adapter.py 中的 optimize_cache_control 函数,改回了 Anthropic 官方支持的**块级别缓存(block-level prompt caching)**策略。 新逻辑会严格计算名额,最多只分配 4 个缓存断点。 它会优先把断点分配给 system 和 tools(因为这两个部分最占 Token 且最稳定)。 剩下的名额会根据窗口距离,动态分配给 messages 列表中最新的那几个文本块。 代码已修改完成,你可以直接重新测试一下 Claude 系列模型,这个 invalid_request_error 报错应该已经解决了。 --- adapters/cc_anthropic_adapter.py | 35 ++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/adapters/cc_anthropic_adapter.py b/adapters/cc_anthropic_adapter.py index 95aced6..f5991c0 100644 --- a/adapters/cc_anthropic_adapter.py +++ b/adapters/cc_anthropic_adapter.py @@ -582,16 +582,39 @@ _EPHEMERAL = {'type': 'ephemeral'} def optimize_cache_control(request: JsonDict) -> None: - """为 Anthropic Messages 请求启用顶层自动 prompt caching。 + """为 Anthropic Messages 请求启用块级别的 prompt caching。 - 2026 版 Claude API 已支持在请求顶层使用 `cache_control` 开启自动缓存, - 由上游自动把断点放到最后一个可缓存块并随多轮对话前移。相比手动在嵌套 - content blocks 上打断点,这种方式对 Anthropic 兼容中转站更稳定,也更接近 - `/v1/responses` 通过顶层字段启用缓存的思路。 + 遵循 Anthropic 官方规范,最多允许 4 个内容块携带 `cache_control` 标记。 + 我们会优先为 system 和 tools 的尾部内容块添加缓存断点, + 然后将剩余的名额按窗口距离分配给 messages 中的内容块。 """ _normalize_message_contents(request) _clear_all_cache_controls(request) - request['cache_control'] = dict(_EPHEMERAL) + + breakpoints_left = _MAX_BREAKPOINTS + + # 优先在结构化数据(tools 和 system)尾部打断点 + breakpoints_left -= _inject_structural_anchors(request) + + # 收集 messages 中可用的文本块 + refs = _collect_cacheable_block_refs(request) + if not refs or breakpoints_left <= 0: + return + + # 给最后一个消息块打断点(确保最新的对话能被缓存) + refs[-1]['cache_control'] = dict(_EPHEMERAL) + breakpoints_left -= 1 + + # 如果还有名额,按窗口距离往前回溯打断点 + target = len(refs) - 1 - _BLOCK_WINDOW + while breakpoints_left > 0 and target >= 0: + anchor = _pick_window_anchor(refs, target) + if anchor is not None: + refs[anchor]['cache_control'] = dict(_EPHEMERAL) + breakpoints_left -= 1 + target = anchor - _BLOCK_WINDOW + else: + break def _normalize_message_contents(request: JsonDict) -> None: