From 251437a76085591f53f0f77c774b8fa009722d8d Mon Sep 17 00:00:00 2001
From: h88782481 <54714341+h88782481@users.noreply.github.com>
Date: Tue, 14 Apr 2026 16:14:06 +0800
Subject: [PATCH] =?UTF-8?q?=E5=B0=9D=E8=AF=95=E4=BF=AE=E5=A4=8D/v1/respons?=
 =?UTF-8?q?es=E5=90=8E=E7=AB=AF=E6=B2=A1=E6=9C=89=E5=91=BD=E4=B8=AD?=
 =?UTF-8?q?=E7=BC=93=E5=AD=98=E7=9A=84=E6=83=85=E5=86=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 routes/chat.py      |  2 ++
 routes/common.py    | 18 ++++++++++++++++++
 routes/responses.py |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/routes/chat.py b/routes/chat.py
index be4f775..66e1e67 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -42,6 +42,7 @@ from routes.common import (
     build_responses_target,
     build_route_context,
     chat_error_chunk,
+    ensure_prompt_cache_key,
     inject_instructions_anthropic,
     inject_instructions_cc,
     inject_instructions_responses,
@@ -312,6 +313,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
     responses_payload = cc_to_responses_request(payload)
     responses_payload['model'] = ctx.upstream_model
     responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
+    responses_payload = ensure_prompt_cache_key(responses_payload)
     _dbg(
         '已转换为 Responses 请求：字段=' + str(list(responses_payload.keys()))
         + f' 输入项数={len(responses_payload.get("input", []))}'
diff --git a/routes/common.py b/routes/common.py
index 0ad7518..9f57233 100644
--- a/routes/common.py
+++ b/routes/common.py
@@ -7,6 +7,7 @@ SSE 消息拼装逻辑，避免 `chat.py` 和 `responses.py` 各自维护重复
 from __future__ import annotations
 
 from dataclasses import dataclass
+import hashlib
 import json
 import logging
 from typing import Any
@@ -218,6 +219,23 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po
 # ─── Body / Header 修改 ──────────────────────────
 
 
+def ensure_prompt_cache_key(payload: dict[str, Any]) -> dict[str, Any]:
+    """确保 Responses 请求携带 prompt_cache_key 以启用上游提示缓存。
+
+    上游（如 sub2api）对原生 /v1/responses 请求不会自动生成 prompt_cache_key，
+    导致提示缓存无法命中。这里根据模型名 + instructions 生成稳定的 cache key，
+    使得相同模型和系统提示的对话可以共享缓存前缀。
+    """
+    if payload.get('prompt_cache_key'):
+        return payload
+
+    model = payload.get('model', '')
+    instructions = payload.get('instructions', '')
+    seed = f'{model}|{instructions}'
+    payload['prompt_cache_key'] = hashlib.sha256(seed.encode()).hexdigest()[:32]
+    return payload
+
+
 def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]:
     """对转发请求体应用字段级修改。
 
diff --git a/routes/responses.py b/routes/responses.py
index 4889a40..2496a4b 100644
--- a/routes/responses.py
+++ b/routes/responses.py
@@ -27,6 +27,7 @@ from routes.common import (
     build_openai_target,
     build_responses_target,
     build_route_context,
+    ensure_prompt_cache_key,
     inject_instructions_anthropic,
     inject_instructions_cc,
     inject_instructions_responses,
@@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
     payload = dict(payload)
     payload['model'] = ctx.upstream_model
     payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
+    payload = ensure_prompt_cache_key(payload)
     url, headers = build_responses_target(ctx)
     payload = apply_body_modifications(payload, ctx.body_modifications)
     headers = apply_header_modifications(headers, ctx.header_modifications)