From 251437a76085591f53f0f77c774b8fa009722d8d Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:14:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=9D=E8=AF=95=E4=BF=AE=E5=A4=8D/v1/respons?= =?UTF-8?q?es=E5=90=8E=E7=AB=AF=E6=B2=A1=E6=9C=89=E5=91=BD=E4=B8=AD?= =?UTF-8?q?=E7=BC=93=E5=AD=98=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routes/chat.py | 2 ++ routes/common.py | 18 ++++++++++++++++++ routes/responses.py | 2 ++ 3 files changed, 22 insertions(+) diff --git a/routes/chat.py b/routes/chat.py index be4f775..66e1e67 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -42,6 +42,7 @@ from routes.common import ( build_responses_target, build_route_context, chat_error_chunk, + ensure_prompt_cache_key, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -312,6 +313,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload = cc_to_responses_request(payload) responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) + responses_payload = ensure_prompt_cache_key(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' diff --git a/routes/common.py b/routes/common.py index 0ad7518..9f57233 100644 --- a/routes/common.py +++ b/routes/common.py @@ -7,6 +7,7 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复 from __future__ import annotations from dataclasses import dataclass +import hashlib import json import logging from typing import Any @@ -218,6 +219,23 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po # ─── Body / Header 修改 ────────────────────────── +def ensure_prompt_cache_key(payload: dict[str, Any]) -> dict[str, Any]: + """确保 Responses 请求携带 prompt_cache_key 以启用上游提示缓存。 + + 上游(如 sub2api)对原生 /v1/responses 请求不会自动生成 prompt_cache_key, + 导致提示缓存无法命中。这里根据模型名 + instructions 生成稳定的 cache key, + 使得相同模型和系统提示的对话可以共享缓存前缀。 + """ + if payload.get('prompt_cache_key'): + return payload + + model = payload.get('model', '') + instructions = payload.get('instructions', '') + seed = f'{model}|{instructions}' + payload['prompt_cache_key'] = hashlib.sha256(seed.encode()).hexdigest()[:32] + return payload + + def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]: """对转发请求体应用字段级修改。 diff --git a/routes/responses.py b/routes/responses.py index 4889a40..2496a4b 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -27,6 +27,7 @@ from routes.common import ( build_openai_target, build_responses_target, build_route_context, + ensure_prompt_cache_key, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) + payload = ensure_prompt_cache_key(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications)