From 2f2a3cce41bdc31157c016d37a2bf15d9fca3202 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 15 Mar 2026 14:10:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9B=9E=E6=BB=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/responses_cc_adapter.py | 45 +------- routes/chat.py | 10 -- routes/common.py | 179 ------------------------------- routes/responses.py | 28 ----- 4 files changed, 5 insertions(+), 257 deletions(-) diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index b793dbd..0f35c52 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -654,10 +654,6 @@ class ResponsesToCCStreamConverter: 'completion_tokens': self._usage.get('output_tokens', 0), 'total_tokens': self._usage.get('total_tokens', 0), } - if isinstance(self._usage.get('input_tokens_details'), dict): - chunk['usage']['prompt_tokens_details'] = dict(self._usage['input_tokens_details']) - if isinstance(self._usage.get('output_tokens_details'), dict): - chunk['usage']['completion_tokens_details'] = dict(self._usage['output_tokens_details']) return [chunk] def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: @@ -682,44 +678,20 @@ def _copy_request_options(payload: JsonDict, result: JsonDict) -> None: """将 Responses 请求中的通用选项复制到 CC 请求体。""" if 'tools' in payload: result['tools'] = _convert_tools(payload['tools']) - for key in ( - 'temperature', - 'top_p', - 'tool_choice', - 'parallel_tool_calls', - 'truncation', - 'store', - 'metadata', - 'conversation', - 'previous_response_id', - 'prompt_cache_key', - 'service_tier', - 'user', - ): + for key in ('temperature', 'top_p'): if key in payload: result[key] = payload[key] if 'max_output_tokens' in payload: result['max_tokens'] = payload['max_output_tokens'] + if 'tool_choice' in payload: + result['tool_choice'] = payload['tool_choice'] def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None: """将聊天补全请求中的通用选项复制到原生 Responses 请求体。""" if 'tools' in payload: result['tools'] = _convert_cc_tools_to_responses(payload['tools']) - for key in ( - 'temperature', - 'top_p', - 'tool_choice', - 'parallel_tool_calls', - 'truncation', - 'store', - 'metadata', - 'conversation', - 'previous_response_id', - 'prompt_cache_key', - 'service_tier', - 'user', - ): + for key in ('temperature', 'top_p', 'tool_choice'): if key in payload: result[key] = payload[key] if 'max_tokens' in payload: @@ -942,18 +914,11 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict: def _build_responses_usage(usage: JsonDict) -> JsonDict: """将 Chat Completions 的 usage 字段映射为 Responses usage 结构。""" - result = { + return { 'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'total_tokens': usage.get('total_tokens', 0), } - prompt_details = usage.get('prompt_tokens_details') - if isinstance(prompt_details, dict): - result['input_tokens_details'] = dict(prompt_details) - completion_details = usage.get('completion_tokens_details') - if isinstance(completion_details, dict): - result['output_tokens_details'] = dict(completion_details) - return result def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]: diff --git a/routes/chat.py b/routes/chat.py index 8bea531..1ca6f81 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -42,9 +42,6 @@ from routes.common import ( build_responses_target, build_route_context, chat_error_chunk, - ensure_responses_cache_control, - attach_previous_response_id, - remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -314,8 +311,6 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload = cc_to_responses_request(payload) responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) - responses_payload = ensure_responses_cache_control(responses_payload) - responses_payload = attach_previous_response_id(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' @@ -350,7 +345,6 @@ def _handle_responses_non_stream( attach_upstream_response(turn, raw) _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - remember_response_id(payload, raw) data = responses_to_cc_response(raw, ctx.client_model) return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') @@ -389,10 +383,6 @@ def _handle_responses_stream( 'completion_tokens': extracted_usage.get('output_tokens', 0), 'total_tokens': extracted_usage.get('total_tokens', 0), } - if event_type == 'response.completed': - response_obj = event_data.get('response') if isinstance(event_data, dict) else None - if isinstance(response_obj, dict): - remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据=' diff --git a/routes/common.py b/routes/common.py index f008b96..0ad7518 100644 --- a/routes/common.py +++ b/routes/common.py @@ -7,11 +7,8 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复 from __future__ import annotations from dataclasses import dataclass -import hashlib import json import logging -import threading -import time from typing import Any import settings @@ -19,10 +16,6 @@ from utils.http import build_anthropic_headers, build_gemini_headers, build_open logger = logging.getLogger(__name__) -_RESPONSES_PREV_ID_LOCK = threading.Lock() -_RESPONSES_PREV_ID_TTL = 86400 -_RESPONSES_PREV_IDS: dict[str, tuple[str, float]] = {} - @dataclass(frozen=True) class RouteContext: @@ -202,178 +195,6 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po return payload -def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]: - """为 Responses 请求补齐自动 prompt caching 开关。 - - 一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点。 - Cursor 侧通常不会主动携带这个字段,因此这里在缺失时补一个保守的默认值, - 同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭。 - """ - if not isinstance(payload, dict): - return payload - cache_control = payload.get('cache_control') - if isinstance(cache_control, dict) and cache_control.get('type'): - return payload - payload['cache_control'] = {'type': 'ephemeral'} - logger.info('已为 Responses 请求自动启用 cache_control=ephemeral') - return payload - - -def attach_previous_response_id(payload: dict[str, Any]) -> dict[str, Any]: - """为多轮 Responses 请求补齐上一轮 response_id。 - - 某些上游在 `/v1/responses` 多轮场景下,只有沿用 `previous_response_id` 才能稳定复用 - 上一轮的服务端响应链与缓存。Cursor 通常会回传完整历史,但不会主动带这个字段, - 因此代理需要基于稳定对话键做一次轻量补齐。 - """ - if not isinstance(payload, dict) or payload.get('previous_response_id'): - return payload - key = _responses_prev_id_key(payload) - if not key: - return payload - previous_response_id = _get_previous_response_id(key) - if not previous_response_id: - return payload - payload['previous_response_id'] = previous_response_id - logger.info('已为 Responses 请求补齐 previous_response_id') - return payload - - -def remember_response_id(payload: dict[str, Any], response_data: dict[str, Any]) -> None: - """记住当前对话最近一次上游 Responses response_id。""" - if not isinstance(payload, dict) or not isinstance(response_data, dict): - return - response_id = response_data.get('id') - if not isinstance(response_id, str) or not response_id.strip(): - return - key = _responses_prev_id_key(payload) - if not key: - return - with _RESPONSES_PREV_ID_LOCK: - _RESPONSES_PREV_IDS[key] = (response_id.strip(), time.time()) - _cleanup_previous_response_ids_locked() - - -def _responses_prev_id_key(payload: dict[str, Any]) -> str: - """基于 Responses 请求的“对话根信息”生成稳定键。 - - 这里故意不直接使用完整 `input` 作为键,因为多轮对话每轮都会追加历史; - 如果把整段历史都纳入哈希,键会在每一轮变化,导致无法稳定取回上一轮的 - `previous_response_id`。当前策略只取 instructions 与首轮 user/assistant 根消息。 - """ - instructions = payload.get('instructions') or '' - input_data = payload.get('input', []) - if isinstance(input_data, str): - seed_input = input_data - elif isinstance(input_data, list): - seed_input = _responses_root_seed_from_items(input_data) - else: - seed_input = json.dumps(input_data, ensure_ascii=False, default=str) - raw = instructions + '|' + seed_input - if not raw.strip('|'): - return '' - return hashlib.sha256(raw.encode('utf-8')).hexdigest()[:24] - - -def _responses_root_seed_from_items(items: list[Any]) -> str: - """从 Responses `input` 中提取足够稳定的对话根片段。 - - 目标不是完整还原会话,而是构造一个在同一段对话内尽量恒定、跨轮次可复用的 - seed。这里沿用项目里 conversation seed 的思路:优先取第一条 user 与第一条 - assistant;如果 assistant 还不存在,则只用第一条 user。 - """ - first_user = None - first_assistant = None - for item in items: - if isinstance(item, str): - if first_user is None: - first_user = {'role': 'user', 'content': item} - continue - if not isinstance(item, dict): - continue - item_type = item.get('type', '') - role = item.get('role', '') - if item_type == 'message' and role in ('user', 'assistant'): - normalized = { - 'role': role, - 'content': _responses_normalize_content(item.get('content', [])), - } - if role == 'user' and first_user is None: - first_user = normalized - elif role == 'assistant' and first_assistant is None: - first_assistant = normalized - elif role in ('user', 'assistant') and not item_type: - normalized = { - 'role': role, - 'content': _responses_normalize_content(item.get('content', '')), - } - if role == 'user' and first_user is None: - first_user = normalized - elif role == 'assistant' and first_assistant is None: - first_assistant = normalized - if first_user is not None and first_assistant is not None: - break - parts = [] - if first_user is not None: - parts.append(first_user) - if first_assistant is not None: - parts.append(first_assistant) - return json.dumps(parts, ensure_ascii=False, separators=(',', ':')) - - -def _responses_normalize_content(content: Any) -> str: - """把 Responses 各种 content 形态折叠成稳定文本。 - - 这里的目标不是保真展示,而是降低结构差异对 key 计算的影响;只抽取会影响 - 会话根语义的文本型内容,忽略无关字段,避免同一轮请求因格式细节不同而得到 - 不同的 previous_response_id 键。 - """ - if isinstance(content, str): - return content.strip() - if not isinstance(content, list): - return str(content).strip() if content is not None else '' - texts: list[str] = [] - for part in content: - if isinstance(part, str): - texts.append(part) - continue - if not isinstance(part, dict): - continue - if part.get('type') in ('input_text', 'output_text', 'text'): - texts.append(part.get('text', '')) - elif part.get('type') == 'summary_text': - texts.append(part.get('text', '')) - return '\n'.join(texts).strip() - - -def _get_previous_response_id(key: str) -> str: - """按稳定键读取上一轮 response_id,并在过期时顺手清理。""" - with _RESPONSES_PREV_ID_LOCK: - entry = _RESPONSES_PREV_IDS.get(key) - if not entry: - return '' - response_id, ts = entry - if (time.time() - ts) >= _RESPONSES_PREV_ID_TTL: - _RESPONSES_PREV_IDS.pop(key, None) - return '' - return response_id - - -def _cleanup_previous_response_ids_locked() -> None: - """清理过期的 previous_response_id 缓存项。 - - 这张表只用于短期多轮续接;一旦对话长时间不活跃,就不再需要继续保留, - 以免常驻进程运行过久后累计过多失效状态。 - """ - now = time.time() - expired = [ - key for key, (_, ts) in _RESPONSES_PREV_IDS.items() - if (now - ts) >= _RESPONSES_PREV_ID_TTL - ] - for key in expired: - _RESPONSES_PREV_IDS.pop(key, None) - - def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]: """向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。 diff --git a/routes/responses.py b/routes/responses.py index dd32d5c..4889a40 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -27,9 +27,6 @@ from routes.common import ( build_openai_target, build_responses_target, build_route_context, - ensure_responses_cache_control, - attach_previous_response_id, - remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -250,8 +247,6 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) - payload = ensure_responses_cache_control(payload) - payload = attach_previous_response_id(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications) @@ -279,7 +274,6 @@ def _handle_responses_non_stream( response_data = resp.json() attach_upstream_response(turn, response_data) - remember_response_id(payload, response_data) response_data['model'] = ctx.client_model return _finalize_responses_response( response_data, @@ -319,10 +313,6 @@ def _handle_responses_stream( extracted_usage = _extract_responses_usage(event_data) if extracted_usage: last_usage = extracted_usage - if event_type == 'response.completed': - response_obj = event_data.get('response') if isinstance(event_data, dict) else None - if isinstance(response_obj, dict): - remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据=' @@ -639,22 +629,4 @@ def _finalize_responses_response( attach_client_response(turn, response_data) finalize_turn(turn, usage=response_data.get('usage')) - output_items = response_data.get('output', []) - if isinstance(output_items, list): - for item in output_items: - if not isinstance(item, dict) or item.get('type') != 'reasoning': - continue - summary = item.get('summary', []) - if not isinstance(summary, list): - continue - reasoning_text = ''.join( - part.get('text', '') - for part in summary - if isinstance(part, dict) and part.get('type') == 'summary_text' - ) - if reasoning_text: - cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', []) - thinking_cache.store_from_response(cc_messages, reasoning_text) - break - return jsonify(response_data)