修复缓存问题

This commit is contained in:
h88782481 2026-03-15 13:25:23 +08:00
parent 049f91e549
commit 98f3ae24a0
4 changed files with 79 additions and 5 deletions

View file

@ -42,6 +42,7 @@ from routes.common import (
build_responses_target,
build_route_context,
chat_error_chunk,
ensure_responses_cache_control,
inject_instructions_anthropic,
inject_instructions_cc,
inject_instructions_responses,
@ -311,6 +312,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
responses_payload = cc_to_responses_request(payload)
responses_payload['model'] = ctx.upstream_model
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
responses_payload = ensure_responses_cache_control(responses_payload)
_dbg(
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
+ f' 输入项数={len(responses_payload.get("input", []))}'

View file

@ -195,6 +195,23 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po
return payload
def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]:
"""为 Responses 请求补齐自动 prompt caching 开关。
一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点
Cursor 侧通常不会主动携带这个字段因此这里在缺失时补一个保守的默认值
同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭
"""
if not isinstance(payload, dict):
return payload
cache_control = payload.get('cache_control')
if isinstance(cache_control, dict) and cache_control.get('type'):
return payload
payload['cache_control'] = {'type': 'ephemeral'}
logger.info('已为 Responses 请求自动启用 cache_control=ephemeral')
return payload
def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]:
"""向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。

View file

@ -27,6 +27,7 @@ from routes.common import (
build_openai_target,
build_responses_target,
build_route_context,
ensure_responses_cache_control,
inject_instructions_anthropic,
inject_instructions_cc,
inject_instructions_responses,
@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
payload = dict(payload)
payload['model'] = ctx.upstream_model
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
payload = ensure_responses_cache_control(payload)
url, headers = build_responses_target(ctx)
payload = apply_body_modifications(payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
@ -629,4 +631,22 @@ def _finalize_responses_response(
attach_client_response(turn, response_data)
finalize_turn(turn, usage=response_data.get('usage'))
output_items = response_data.get('output', [])
if isinstance(output_items, list):
for item in output_items:
if not isinstance(item, dict) or item.get('type') != 'reasoning':
continue
summary = item.get('summary', [])
if not isinstance(summary, list):
continue
reasoning_text = ''.join(
part.get('text', '')
for part in summary
if isinstance(part, dict) and part.get('type') == 'summary_text'
)
if reasoning_text:
cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', [])
thinking_cache.store_from_response(cc_messages, reasoning_text)
break
return jsonify(response_data)