diff --git a/routes/chat.py b/routes/chat.py index 7e01346..7c72413 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -21,6 +21,7 @@ from routes.common import ( get_outbound, inject_instructions_cc, log_route_context, + should_inject_thinking, ) from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache @@ -58,7 +59,8 @@ def chat_completions(): payload['model'] = ctx.upstream_model payload = normalize_request(payload) - payload['messages'] = thinking_cache.inject(payload.get('messages', [])) + if should_inject_thinking(ctx.backend): + payload['messages'] = thinking_cache.inject(payload.get('messages', [])) payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) outbound = get_outbound(ctx.backend) diff --git a/routes/common.py b/routes/common.py index eba89b2..654900e 100644 --- a/routes/common.py +++ b/routes/common.py @@ -173,6 +173,20 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po return payload +def should_inject_thinking(backend: str) -> bool: + """判断当前后端是否需要注入历史 thinking。 + + 仅对明确能消费历史 reasoning/thinking 的后端启用: + - anthropic + - gemini + - responses + + OpenAI Chat 兼容后端通常不接受 `reasoning_content` 历史字段, + 若注入会导致上游报错,因此显式排除。 + """ + return backend in ('anthropic', 'gemini', 'responses') + + # ─── Body / Header 修改 ────────────────────────── diff --git a/routes/responses.py b/routes/responses.py index 6732660..ce1de4b 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -28,6 +28,7 @@ from routes.common import ( inject_instructions_cc, inject_instructions_responses, log_route_context, + should_inject_thinking, ) from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache @@ -94,6 +95,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]: cc_payload = responses_to_cc(payload) cc_payload['model'] = ctx.upstream_model cc_payload = normalize_request(cc_payload) - cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) + if should_inject_thinking(ctx.backend): + cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) return cc_payload