From 72223ef4125aa452eecb9719cb407dc8b94d7205 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:52:09 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=BC=93=E5=AD=98=E5=91=BD?= =?UTF-8?q?=E4=B8=ADbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routes/chat.py | 8 +++ routes/common.py | 162 ++++++++++++++++++++++++++++++++++++++++++++ routes/responses.py | 8 +++ 3 files changed, 178 insertions(+) diff --git a/routes/chat.py b/routes/chat.py index f8b0f54..8bea531 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -43,6 +43,8 @@ from routes.common import ( build_route_context, chat_error_chunk, ensure_responses_cache_control, + attach_previous_response_id, + remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -313,6 +315,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) responses_payload = ensure_responses_cache_control(responses_payload) + responses_payload = attach_previous_response_id(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' @@ -347,6 +350,7 @@ def _handle_responses_non_stream( attach_upstream_response(turn, raw) _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + remember_response_id(payload, raw) data = responses_to_cc_response(raw, ctx.client_model) return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') @@ -385,6 +389,10 @@ def _handle_responses_stream( 'completion_tokens': extracted_usage.get('output_tokens', 0), 'total_tokens': extracted_usage.get('total_tokens', 0), } + if event_type == 'response.completed': + response_obj = event_data.get('response') if isinstance(event_data, dict) else None + if isinstance(response_obj, dict): + remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据=' diff --git a/routes/common.py b/routes/common.py index 3277347..f008b96 100644 --- a/routes/common.py +++ b/routes/common.py @@ -7,8 +7,11 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复 from __future__ import annotations from dataclasses import dataclass +import hashlib import json import logging +import threading +import time from typing import Any import settings @@ -16,6 +19,10 @@ from utils.http import build_anthropic_headers, build_gemini_headers, build_open logger = logging.getLogger(__name__) +_RESPONSES_PREV_ID_LOCK = threading.Lock() +_RESPONSES_PREV_ID_TTL = 86400 +_RESPONSES_PREV_IDS: dict[str, tuple[str, float]] = {} + @dataclass(frozen=True) class RouteContext: @@ -212,6 +219,161 @@ def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]: return payload +def attach_previous_response_id(payload: dict[str, Any]) -> dict[str, Any]: + """为多轮 Responses 请求补齐上一轮 response_id。 + + 某些上游在 `/v1/responses` 多轮场景下,只有沿用 `previous_response_id` 才能稳定复用 + 上一轮的服务端响应链与缓存。Cursor 通常会回传完整历史,但不会主动带这个字段, + 因此代理需要基于稳定对话键做一次轻量补齐。 + """ + if not isinstance(payload, dict) or payload.get('previous_response_id'): + return payload + key = _responses_prev_id_key(payload) + if not key: + return payload + previous_response_id = _get_previous_response_id(key) + if not previous_response_id: + return payload + payload['previous_response_id'] = previous_response_id + logger.info('已为 Responses 请求补齐 previous_response_id') + return payload + + +def remember_response_id(payload: dict[str, Any], response_data: dict[str, Any]) -> None: + """记住当前对话最近一次上游 Responses response_id。""" + if not isinstance(payload, dict) or not isinstance(response_data, dict): + return + response_id = response_data.get('id') + if not isinstance(response_id, str) or not response_id.strip(): + return + key = _responses_prev_id_key(payload) + if not key: + return + with _RESPONSES_PREV_ID_LOCK: + _RESPONSES_PREV_IDS[key] = (response_id.strip(), time.time()) + _cleanup_previous_response_ids_locked() + + +def _responses_prev_id_key(payload: dict[str, Any]) -> str: + """基于 Responses 请求的“对话根信息”生成稳定键。 + + 这里故意不直接使用完整 `input` 作为键,因为多轮对话每轮都会追加历史; + 如果把整段历史都纳入哈希,键会在每一轮变化,导致无法稳定取回上一轮的 + `previous_response_id`。当前策略只取 instructions 与首轮 user/assistant 根消息。 + """ + instructions = payload.get('instructions') or '' + input_data = payload.get('input', []) + if isinstance(input_data, str): + seed_input = input_data + elif isinstance(input_data, list): + seed_input = _responses_root_seed_from_items(input_data) + else: + seed_input = json.dumps(input_data, ensure_ascii=False, default=str) + raw = instructions + '|' + seed_input + if not raw.strip('|'): + return '' + return hashlib.sha256(raw.encode('utf-8')).hexdigest()[:24] + + +def _responses_root_seed_from_items(items: list[Any]) -> str: + """从 Responses `input` 中提取足够稳定的对话根片段。 + + 目标不是完整还原会话,而是构造一个在同一段对话内尽量恒定、跨轮次可复用的 + seed。这里沿用项目里 conversation seed 的思路:优先取第一条 user 与第一条 + assistant;如果 assistant 还不存在,则只用第一条 user。 + """ + first_user = None + first_assistant = None + for item in items: + if isinstance(item, str): + if first_user is None: + first_user = {'role': 'user', 'content': item} + continue + if not isinstance(item, dict): + continue + item_type = item.get('type', '') + role = item.get('role', '') + if item_type == 'message' and role in ('user', 'assistant'): + normalized = { + 'role': role, + 'content': _responses_normalize_content(item.get('content', [])), + } + if role == 'user' and first_user is None: + first_user = normalized + elif role == 'assistant' and first_assistant is None: + first_assistant = normalized + elif role in ('user', 'assistant') and not item_type: + normalized = { + 'role': role, + 'content': _responses_normalize_content(item.get('content', '')), + } + if role == 'user' and first_user is None: + first_user = normalized + elif role == 'assistant' and first_assistant is None: + first_assistant = normalized + if first_user is not None and first_assistant is not None: + break + parts = [] + if first_user is not None: + parts.append(first_user) + if first_assistant is not None: + parts.append(first_assistant) + return json.dumps(parts, ensure_ascii=False, separators=(',', ':')) + + +def _responses_normalize_content(content: Any) -> str: + """把 Responses 各种 content 形态折叠成稳定文本。 + + 这里的目标不是保真展示,而是降低结构差异对 key 计算的影响;只抽取会影响 + 会话根语义的文本型内容,忽略无关字段,避免同一轮请求因格式细节不同而得到 + 不同的 previous_response_id 键。 + """ + if isinstance(content, str): + return content.strip() + if not isinstance(content, list): + return str(content).strip() if content is not None else '' + texts: list[str] = [] + for part in content: + if isinstance(part, str): + texts.append(part) + continue + if not isinstance(part, dict): + continue + if part.get('type') in ('input_text', 'output_text', 'text'): + texts.append(part.get('text', '')) + elif part.get('type') == 'summary_text': + texts.append(part.get('text', '')) + return '\n'.join(texts).strip() + + +def _get_previous_response_id(key: str) -> str: + """按稳定键读取上一轮 response_id,并在过期时顺手清理。""" + with _RESPONSES_PREV_ID_LOCK: + entry = _RESPONSES_PREV_IDS.get(key) + if not entry: + return '' + response_id, ts = entry + if (time.time() - ts) >= _RESPONSES_PREV_ID_TTL: + _RESPONSES_PREV_IDS.pop(key, None) + return '' + return response_id + + +def _cleanup_previous_response_ids_locked() -> None: + """清理过期的 previous_response_id 缓存项。 + + 这张表只用于短期多轮续接;一旦对话长时间不活跃,就不再需要继续保留, + 以免常驻进程运行过久后累计过多失效状态。 + """ + now = time.time() + expired = [ + key for key, (_, ts) in _RESPONSES_PREV_IDS.items() + if (now - ts) >= _RESPONSES_PREV_ID_TTL + ] + for key in expired: + _RESPONSES_PREV_IDS.pop(key, None) + + def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]: """向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。 diff --git a/routes/responses.py b/routes/responses.py index eeb65a3..dd32d5c 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -28,6 +28,8 @@ from routes.common import ( build_responses_target, build_route_context, ensure_responses_cache_control, + attach_previous_response_id, + remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -249,6 +251,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) payload = ensure_responses_cache_control(payload) + payload = attach_previous_response_id(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications) @@ -276,6 +279,7 @@ def _handle_responses_non_stream( response_data = resp.json() attach_upstream_response(turn, response_data) + remember_response_id(payload, response_data) response_data['model'] = ctx.client_model return _finalize_responses_response( response_data, @@ -315,6 +319,10 @@ def _handle_responses_stream( extracted_usage = _extract_responses_usage(event_data) if extracted_usage: last_usage = extracted_usage + if event_type == 'response.completed': + response_obj = event_data.get('response') if isinstance(event_data, dict) else None + if isinstance(response_obj, dict): + remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据='