diff --git a/routes/chat.py b/routes/chat.py
index 7e01346..7c72413 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -21,6 +21,7 @@ from routes.common import (
     get_outbound,
     inject_instructions_cc,
     log_route_context,
+    should_inject_thinking,
 )
 from utils.request_logger import start_turn
 from utils.thinking_cache import thinking_cache
@@ -58,7 +59,8 @@ def chat_completions():
 
     payload['model'] = ctx.upstream_model
     payload = normalize_request(payload)
-    payload['messages'] = thinking_cache.inject(payload.get('messages', []))
+    if should_inject_thinking(ctx.backend):
+        payload['messages'] = thinking_cache.inject(payload.get('messages', []))
     payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
 
     outbound = get_outbound(ctx.backend)
diff --git a/routes/common.py b/routes/common.py
index eba89b2..654900e 100644
--- a/routes/common.py
+++ b/routes/common.py
@@ -173,6 +173,20 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po
     return payload
 
 
+def should_inject_thinking(backend: str) -> bool:
+    """判断当前后端是否需要注入历史 thinking。
+
+    仅对明确能消费历史 reasoning/thinking 的后端启用：
+    - anthropic
+    - gemini
+    - responses
+
+    OpenAI Chat 兼容后端通常不接受 `reasoning_content` 历史字段，
+    若注入会导致上游报错，因此显式排除。
+    """
+    return backend in ('anthropic', 'gemini', 'responses')
+
+
 # ─── Body / Header 修改 ──────────────────────────
 
 
diff --git a/routes/responses.py b/routes/responses.py
index 6732660..ce1de4b 100644
--- a/routes/responses.py
+++ b/routes/responses.py
@@ -28,6 +28,7 @@ from routes.common import (
     inject_instructions_cc,
     inject_instructions_responses,
     log_route_context,
+    should_inject_thinking,
 )
 from utils.request_logger import start_turn
 from utils.thinking_cache import thinking_cache
@@ -94,6 +95,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]:
     cc_payload = responses_to_cc(payload)
     cc_payload['model'] = ctx.upstream_model
     cc_payload = normalize_request(cc_payload)
-    cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
+    if should_inject_thinking(ctx.backend):
+        cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
     cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
     return cc_payload