From 2f2a3cce41bdc31157c016d37a2bf15d9fca3202 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 15 Mar 2026 14:10:15 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E5=9B=9E=E6=BB=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/responses_cc_adapter.py | 45 +------- routes/chat.py | 10 -- routes/common.py | 179 ------------------------------- routes/responses.py | 28 ----- 4 files changed, 5 insertions(+), 257 deletions(-) diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index b793dbd..0f35c52 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -654,10 +654,6 @@ class ResponsesToCCStreamConverter: 'completion_tokens': self._usage.get('output_tokens', 0), 'total_tokens': self._usage.get('total_tokens', 0), } - if isinstance(self._usage.get('input_tokens_details'), dict): - chunk['usage']['prompt_tokens_details'] = dict(self._usage['input_tokens_details']) - if isinstance(self._usage.get('output_tokens_details'), dict): - chunk['usage']['completion_tokens_details'] = dict(self._usage['output_tokens_details']) return [chunk] def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: @@ -682,44 +678,20 @@ def _copy_request_options(payload: JsonDict, result: JsonDict) -> None: """将 Responses 请求中的通用选项复制到 CC 请求体。""" if 'tools' in payload: result['tools'] = _convert_tools(payload['tools']) - for key in ( - 'temperature', - 'top_p', - 'tool_choice', - 'parallel_tool_calls', - 'truncation', - 'store', - 'metadata', - 'conversation', - 'previous_response_id', - 'prompt_cache_key', - 'service_tier', - 'user', - ): + for key in ('temperature', 'top_p'): if key in payload: result[key] = payload[key] if 'max_output_tokens' in payload: result['max_tokens'] = payload['max_output_tokens'] + if 'tool_choice' in payload: + result['tool_choice'] = payload['tool_choice'] def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None: """将聊天补全请求中的通用选项复制到原生 Responses 请求体。""" if 'tools' in payload: result['tools'] = _convert_cc_tools_to_responses(payload['tools']) - for key in ( - 'temperature', - 'top_p', - 'tool_choice', - 'parallel_tool_calls', - 'truncation', - 'store', - 'metadata', - 'conversation', - 'previous_response_id', - 'prompt_cache_key', - 'service_tier', - 'user', - ): + for key in ('temperature', 'top_p', 'tool_choice'): if key in payload: result[key] = payload[key] if 'max_tokens' in payload: @@ -942,18 +914,11 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict: def _build_responses_usage(usage: JsonDict) -> JsonDict: """将 Chat Completions 的 usage 字段映射为 Responses usage 结构。""" - result = { + return { 'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'total_tokens': usage.get('total_tokens', 0), } - prompt_details = usage.get('prompt_tokens_details') - if isinstance(prompt_details, dict): - result['input_tokens_details'] = dict(prompt_details) - completion_details = usage.get('completion_tokens_details') - if isinstance(completion_details, dict): - result['output_tokens_details'] = dict(completion_details) - return result def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]: diff --git a/routes/chat.py b/routes/chat.py index 8bea531..1ca6f81 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -42,9 +42,6 @@ from routes.common import ( build_responses_target, build_route_context, chat_error_chunk, - ensure_responses_cache_control, - attach_previous_response_id, - remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -314,8 +311,6 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload = cc_to_responses_request(payload) responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) - responses_payload = ensure_responses_cache_control(responses_payload) - responses_payload = attach_previous_response_id(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' @@ -350,7 +345,6 @@ def _handle_responses_non_stream( attach_upstream_response(turn, raw) _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - remember_response_id(payload, raw) data = responses_to_cc_response(raw, ctx.client_model) return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') @@ -389,10 +383,6 @@ def _handle_responses_stream( 'completion_tokens': extracted_usage.get('output_tokens', 0), 'total_tokens': extracted_usage.get('total_tokens', 0), } - if event_type == 'response.completed': - response_obj = event_data.get('response') if isinstance(event_data, dict) else None - if isinstance(response_obj, dict): - remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据=' diff --git a/routes/common.py b/routes/common.py index f008b96..0ad7518 100644 --- a/routes/common.py +++ b/routes/common.py @@ -7,11 +7,8 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复 from __future__ import annotations from dataclasses import dataclass -import hashlib import json import logging -import threading -import time from typing import Any import settings @@ -19,10 +16,6 @@ from utils.http import build_anthropic_headers, build_gemini_headers, build_open logger = logging.getLogger(__name__) -_RESPONSES_PREV_ID_LOCK = threading.Lock() -_RESPONSES_PREV_ID_TTL = 86400 -_RESPONSES_PREV_IDS: dict[str, tuple[str, float]] = {} - @dataclass(frozen=True) class RouteContext: @@ -202,178 +195,6 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po return payload -def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]: - """为 Responses 请求补齐自动 prompt caching 开关。 - - 一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点。 - Cursor 侧通常不会主动携带这个字段,因此这里在缺失时补一个保守的默认值, - 同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭。 - """ - if not isinstance(payload, dict): - return payload - cache_control = payload.get('cache_control') - if isinstance(cache_control, dict) and cache_control.get('type'): - return payload - payload['cache_control'] = {'type': 'ephemeral'} - logger.info('已为 Responses 请求自动启用 cache_control=ephemeral') - return payload - - -def attach_previous_response_id(payload: dict[str, Any]) -> dict[str, Any]: - """为多轮 Responses 请求补齐上一轮 response_id。 - - 某些上游在 `/v1/responses` 多轮场景下,只有沿用 `previous_response_id` 才能稳定复用 - 上一轮的服务端响应链与缓存。Cursor 通常会回传完整历史,但不会主动带这个字段, - 因此代理需要基于稳定对话键做一次轻量补齐。 - """ - if not isinstance(payload, dict) or payload.get('previous_response_id'): - return payload - key = _responses_prev_id_key(payload) - if not key: - return payload - previous_response_id = _get_previous_response_id(key) - if not previous_response_id: - return payload - payload['previous_response_id'] = previous_response_id - logger.info('已为 Responses 请求补齐 previous_response_id') - return payload - - -def remember_response_id(payload: dict[str, Any], response_data: dict[str, Any]) -> None: - """记住当前对话最近一次上游 Responses response_id。""" - if not isinstance(payload, dict) or not isinstance(response_data, dict): - return - response_id = response_data.get('id') - if not isinstance(response_id, str) or not response_id.strip(): - return - key = _responses_prev_id_key(payload) - if not key: - return - with _RESPONSES_PREV_ID_LOCK: - _RESPONSES_PREV_IDS[key] = (response_id.strip(), time.time()) - _cleanup_previous_response_ids_locked() - - -def _responses_prev_id_key(payload: dict[str, Any]) -> str: - """基于 Responses 请求的“对话根信息”生成稳定键。 - - 这里故意不直接使用完整 `input` 作为键,因为多轮对话每轮都会追加历史; - 如果把整段历史都纳入哈希,键会在每一轮变化,导致无法稳定取回上一轮的 - `previous_response_id`。当前策略只取 instructions 与首轮 user/assistant 根消息。 - """ - instructions = payload.get('instructions') or '' - input_data = payload.get('input', []) - if isinstance(input_data, str): - seed_input = input_data - elif isinstance(input_data, list): - seed_input = _responses_root_seed_from_items(input_data) - else: - seed_input = json.dumps(input_data, ensure_ascii=False, default=str) - raw = instructions + '|' + seed_input - if not raw.strip('|'): - return '' - return hashlib.sha256(raw.encode('utf-8')).hexdigest()[:24] - - -def _responses_root_seed_from_items(items: list[Any]) -> str: - """从 Responses `input` 中提取足够稳定的对话根片段。 - - 目标不是完整还原会话,而是构造一个在同一段对话内尽量恒定、跨轮次可复用的 - seed。这里沿用项目里 conversation seed 的思路:优先取第一条 user 与第一条 - assistant;如果 assistant 还不存在,则只用第一条 user。 - """ - first_user = None - first_assistant = None - for item in items: - if isinstance(item, str): - if first_user is None: - first_user = {'role': 'user', 'content': item} - continue - if not isinstance(item, dict): - continue - item_type = item.get('type', '') - role = item.get('role', '') - if item_type == 'message' and role in ('user', 'assistant'): - normalized = { - 'role': role, - 'content': _responses_normalize_content(item.get('content', [])), - } - if role == 'user' and first_user is None: - first_user = normalized - elif role == 'assistant' and first_assistant is None: - first_assistant = normalized - elif role in ('user', 'assistant') and not item_type: - normalized = { - 'role': role, - 'content': _responses_normalize_content(item.get('content', '')), - } - if role == 'user' and first_user is None: - first_user = normalized - elif role == 'assistant' and first_assistant is None: - first_assistant = normalized - if first_user is not None and first_assistant is not None: - break - parts = [] - if first_user is not None: - parts.append(first_user) - if first_assistant is not None: - parts.append(first_assistant) - return json.dumps(parts, ensure_ascii=False, separators=(',', ':')) - - -def _responses_normalize_content(content: Any) -> str: - """把 Responses 各种 content 形态折叠成稳定文本。 - - 这里的目标不是保真展示,而是降低结构差异对 key 计算的影响;只抽取会影响 - 会话根语义的文本型内容,忽略无关字段,避免同一轮请求因格式细节不同而得到 - 不同的 previous_response_id 键。 - """ - if isinstance(content, str): - return content.strip() - if not isinstance(content, list): - return str(content).strip() if content is not None else '' - texts: list[str] = [] - for part in content: - if isinstance(part, str): - texts.append(part) - continue - if not isinstance(part, dict): - continue - if part.get('type') in ('input_text', 'output_text', 'text'): - texts.append(part.get('text', '')) - elif part.get('type') == 'summary_text': - texts.append(part.get('text', '')) - return '\n'.join(texts).strip() - - -def _get_previous_response_id(key: str) -> str: - """按稳定键读取上一轮 response_id,并在过期时顺手清理。""" - with _RESPONSES_PREV_ID_LOCK: - entry = _RESPONSES_PREV_IDS.get(key) - if not entry: - return '' - response_id, ts = entry - if (time.time() - ts) >= _RESPONSES_PREV_ID_TTL: - _RESPONSES_PREV_IDS.pop(key, None) - return '' - return response_id - - -def _cleanup_previous_response_ids_locked() -> None: - """清理过期的 previous_response_id 缓存项。 - - 这张表只用于短期多轮续接;一旦对话长时间不活跃,就不再需要继续保留, - 以免常驻进程运行过久后累计过多失效状态。 - """ - now = time.time() - expired = [ - key for key, (_, ts) in _RESPONSES_PREV_IDS.items() - if (now - ts) >= _RESPONSES_PREV_ID_TTL - ] - for key in expired: - _RESPONSES_PREV_IDS.pop(key, None) - - def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]: """向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。 diff --git a/routes/responses.py b/routes/responses.py index dd32d5c..4889a40 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -27,9 +27,6 @@ from routes.common import ( build_openai_target, build_responses_target, build_route_context, - ensure_responses_cache_control, - attach_previous_response_id, - remember_response_id, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -250,8 +247,6 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) - payload = ensure_responses_cache_control(payload) - payload = attach_previous_response_id(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications) @@ -279,7 +274,6 @@ def _handle_responses_non_stream( response_data = resp.json() attach_upstream_response(turn, response_data) - remember_response_id(payload, response_data) response_data['model'] = ctx.client_model return _finalize_responses_response( response_data, @@ -319,10 +313,6 @@ def _handle_responses_stream( extracted_usage = _extract_responses_usage(event_data) if extracted_usage: last_usage = extracted_usage - if event_type == 'response.completed': - response_obj = event_data.get('response') if isinstance(event_data, dict) else None - if isinstance(response_obj, dict): - remember_response_id(payload, response_obj) if event_count < 10: _dbg( f'上游事件#{event_count} 类型={event_type} 数据=' @@ -639,22 +629,4 @@ def _finalize_responses_response( attach_client_response(turn, response_data) finalize_turn(turn, usage=response_data.get('usage')) - output_items = response_data.get('output', []) - if isinstance(output_items, list): - for item in output_items: - if not isinstance(item, dict) or item.get('type') != 'reasoning': - continue - summary = item.get('summary', []) - if not isinstance(summary, list): - continue - reasoning_text = ''.join( - part.get('text', '') - for part in summary - if isinstance(part, dict) and part.get('type') == 'summary_text' - ) - if reasoning_text: - cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', []) - thinking_cache.store_from_response(cc_messages, reasoning_text) - break - return jsonify(response_data) From 56faf4fcf1cd37b8144ef1bd908a1403cb465fd3 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:26:07 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E5=91=BD=E4=B8=AD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/cc_anthropic_adapter.py | 8 ++++++ adapters/responses_cc_adapter.py | 44 ++++++++++++++++++++++++-------- routes/chat.py | 3 ++- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/adapters/cc_anthropic_adapter.py b/adapters/cc_anthropic_adapter.py index 512b3de..7848d05 100644 --- a/adapters/cc_anthropic_adapter.py +++ b/adapters/cc_anthropic_adapter.py @@ -261,6 +261,12 @@ def _convert_request_message(message: Any) -> tuple[JsonDict | None, str | None] anthropic_role = 'assistant' if role == 'assistant' else 'user' anthropic_content = _convert_content(message) + if role == 'assistant' and message.get('reasoning_content'): + thinking_block = {'type': 'thinking', 'thinking': message['reasoning_content']} + blocks = _to_blocks(anthropic_content) + blocks.insert(0, thinking_block) + anthropic_content = blocks + if role == 'assistant' and 'tool_calls' in message: anthropic_content = _append_tool_use_blocks(anthropic_content, message.get('tool_calls', [])) @@ -463,6 +469,8 @@ def _convert_content_part(part: Any) -> JsonDict | None: return {'type': 'text', 'text': part.get('text', '')} if part_type == 'image_url': return _convert_image(part) + if part_type == 'image': + return part if part_type in ('tool_use', 'tool_result'): return part return None diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index 0f35c52..e6c864a 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -703,7 +703,11 @@ def _append_responses_input_item( instructions: list[str], input_items: list[JsonDict], ) -> None: - """将单条 Chat Completions 消息追加为 Responses `input` 项。""" + """将单条 Chat Completions 消息追加为 Responses `input` 项。 + + 尽量使用 EasyInputMessage 格式({role, content})以减少 token 开销, + 提高上游 prompt caching 的前缀匹配命中率。 + """ if not isinstance(message, dict): return @@ -724,21 +728,26 @@ def _append_responses_input_item( }) return - item: JsonDict = { - 'type': 'message', - 'role': role or 'user', - 'content': _content_to_responses_parts(content, role), - } - input_items.append(item) + text = _content_to_text(content) + has_tool_calls = bool(message.get('tool_calls')) - if role == 'assistant': + if role == 'assistant' and has_tool_calls: + if text: + input_items.append({ + 'type': 'message', + 'role': 'assistant', + 'content': [{'type': 'output_text', 'text': text}], + }) for tool_call in message.get('tool_calls') or []: input_items.append(_build_responses_function_call_item(tool_call)) + else: + input_items.append({'role': role or 'user', 'content': text or ''}) def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None: """将 Responses `input` 数组重建为 Chat Completions `messages` 列表。""" index = 0 + pending_reasoning: str | None = None while index < len(items): item = items[index] @@ -754,20 +763,35 @@ def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None: item_type = item.get('type', '') role = item.get('role', '') + if item_type == 'reasoning': + pending_reasoning = _extract_reasoning_text(item) + index += 1 + continue + if role and not item_type: - messages.append({ + msg: JsonDict = { 'role': role, 'content': _normalize_simple_content(item.get('content', '')), - }) + } + if role == 'assistant' and pending_reasoning: + msg['reasoning_content'] = pending_reasoning + pending_reasoning = None + messages.append(msg) index += 1 continue if item_type == 'message': consumed = _append_message_item(items, start=index, messages=messages) + if item.get('role') == 'assistant' and pending_reasoning and messages: + messages[-1]['reasoning_content'] = pending_reasoning + pending_reasoning = None index += consumed continue if item_type == 'function_call': + if pending_reasoning and messages and messages[-1].get('role') == 'assistant': + messages[-1]['reasoning_content'] = pending_reasoning + pending_reasoning = None _append_function_call_item(item, messages) index += 1 continue diff --git a/routes/chat.py b/routes/chat.py index 1ca6f81..be4f775 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -127,7 +127,8 @@ def chat_completions(): log_route_context('聊天补全', ctx, extra=f'消息数={message_count}') _log_messages(payload) - payload['messages'] = thinking_cache.inject(payload.get('messages', [])) + if ctx.backend != 'responses': + payload['messages'] = thinking_cache.inject(payload.get('messages', [])) if ctx.backend == 'openai': return _handle_openai_backend(ctx, payload, turn) From 70361242aba72f17fc3b0b6d2d53cbab97cf353c Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 22 Mar 2026 08:24:19 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/cc_anthropic_adapter.py | 260 ++++++------ adapters/cc_gemini_adapter.py | 147 ++++--- adapters/helpers.py | 155 +++++++ adapters/openai_compat_fixer.py | 59 ++- adapters/responses_cc_adapter.py | 336 +++++++++------ adapters/unified.py | 354 ++++++++++++++++ routes/chat.py | 677 ++----------------------------- routes/common.py | 179 ++++++-- routes/responses.py | 607 ++------------------------- 9 files changed, 1195 insertions(+), 1579 deletions(-) create mode 100644 adapters/helpers.py create mode 100644 adapters/unified.py diff --git a/adapters/cc_anthropic_adapter.py b/adapters/cc_anthropic_adapter.py index 7848d05..b70fff0 100644 --- a/adapters/cc_anthropic_adapter.py +++ b/adapters/cc_anthropic_adapter.py @@ -18,13 +18,21 @@ from __future__ import annotations import json from typing import Any +from adapters.helpers import ( + build_cc_message, + build_cc_response, + build_cc_tool_call, + build_cc_usage, + extract_text, + make_cc_chunk, + parse_json_safe, + stringify_content, +) from utils.http import gen_id from utils.tool_fixer import fix_anthropic_tool_use, normalize_args, repair_str_replace_args JsonDict = dict[str, Any] - -# Anthropic stop_reason → OpenAI finish_reason _STOP_REASON_MAP = { 'end_turn': 'stop', 'max_tokens': 'length', @@ -78,23 +86,18 @@ def messages_to_cc_response(data: JsonDict, request_id: str | None = None) -> Js data = fix_anthropic_tool_use(data) content_text, reasoning_text, tool_calls = _collect_response_parts(data.get('content', [])) - message = _build_cc_message(content_text, reasoning_text, tool_calls) usage = data.get('usage', {}) - return { - 'id': request_id, - 'object': 'chat.completion', - 'model': data.get('model', 'claude'), - 'choices': [{ - 'index': 0, - 'message': message, - 'finish_reason': _STOP_REASON_MAP.get(data.get('stop_reason', 'end_turn'), 'stop'), - }], - 'usage': _build_cc_usage( + return build_cc_response( + response_id=request_id, + message=build_cc_message(content_text, reasoning_text, tool_calls), + finish_reason=_STOP_REASON_MAP.get(data.get('stop_reason', 'end_turn'), 'stop'), + usage=build_cc_usage( input_tokens=usage.get('input_tokens', 0), output_tokens=usage.get('output_tokens', 0), ), - } + model=data.get('model', 'claude'), + ) # ═══════════════════════════════════════════════════════════ @@ -124,12 +127,8 @@ class AnthropicStreamConverter: self._input_tokens = 0 self._output_tokens = 0 - def process_event(self, event_type: str, event_data: JsonDict) -> list[str]: - """处理单个 Anthropic SSE 事件。 - - 调用方会按事件顺序不断喂入 event/data,这里根据事件类型拆成一个或多个 CC chunk - 字符串,交给上层直接作为 SSE data 发送给 Cursor。 - """ + def process_event(self, event_type: str, event_data: JsonDict) -> list[JsonDict]: + """处理单个 Anthropic SSE 事件,返回 CC chunk dict 列表。""" if event_type == 'message_start': return self._handle_message_start(event_data) if event_type == 'content_block_start': @@ -140,104 +139,64 @@ class AnthropicStreamConverter: return self._handle_message_delta(event_data) return [] - def _handle_message_start(self, event_data: JsonDict) -> list[str]: - """处理消息开始事件,产出 assistant 角色起始 chunk。 - - 这个起始 chunk 很重要,因为 Cursor 侧通常会依赖首个带 role 的 chunk 来初始化 - 当前 assistant 消息。 - """ + def _handle_message_start(self, event_data: JsonDict) -> list[JsonDict]: message = event_data.get('message', {}) self._input_tokens = message.get('usage', {}).get('input_tokens', 0) - chunk = self._make_chunk(delta={'role': 'assistant', 'content': ''}) if message.get('model'): chunk['model'] = message['model'] - return [self._dump_chunk(chunk)] + return [chunk] - def _handle_content_block_start(self, event_data: JsonDict) -> list[str]: - """处理内容块开始事件。 - - 目前这里只需要显式处理 `tool_use`,因为文本和 thinking 的真正内容都在后续 delta - 事件里;而 tool_use 需要先开一个空 arguments 的 tool_call 槽位。 - """ + def _handle_content_block_start(self, event_data: JsonDict) -> list[JsonDict]: block = event_data.get('content_block', {}) if block.get('type') != 'tool_use': return [] - self._tool_index += 1 - return [self._dump_chunk(self._make_chunk(delta={ + return [self._make_chunk(delta={ 'tool_calls': [{ 'index': self._tool_index, 'id': block.get('id', gen_id('toolu_')), 'type': 'function', - 'function': { - 'name': block.get('name', ''), - 'arguments': '', - }, + 'function': {'name': block.get('name', ''), 'arguments': ''}, }] - }))] + })] - def _handle_content_block_delta(self, event_data: JsonDict) -> list[str]: - """处理内容块增量事件。 - - Anthropic 会把文本、思考内容、工具参数拆成不同 delta 类型,这里要分别映射成 - OpenAI chunk 里的 `content`、`reasoning_content` 和 `tool_calls.function.arguments`。 - """ + def _handle_content_block_delta(self, event_data: JsonDict) -> list[JsonDict]: delta = event_data.get('delta', {}) delta_type = delta.get('type', '') if delta_type == 'text_delta' and delta.get('text'): - return [self._dump_chunk(self._make_chunk(delta={'content': delta['text']}))] - + return [self._make_chunk(delta={'content': delta['text']})] if delta_type == 'thinking_delta' and delta.get('thinking'): - return [self._dump_chunk(self._make_chunk(delta={'reasoning_content': delta['thinking']}))] - + return [self._make_chunk(delta={'reasoning_content': delta['thinking']})] if delta_type == 'input_json_delta' and delta.get('partial_json'): - return [self._dump_chunk(self._make_chunk(delta={ + return [self._make_chunk(delta={ 'tool_calls': [{ 'index': self._tool_index, 'function': {'arguments': delta['partial_json']}, }] - }))] - + })] return [] - def _handle_message_delta(self, event_data: JsonDict) -> list[str]: - """处理消息收尾事件,补出 finish_reason 和 usage。 - - 当 Anthropic 发出 `message_delta` 时,说明这一轮 assistant 输出已经收束, - 这里会统一生成最后一个带 usage 的收尾 chunk。 - """ + def _handle_message_delta(self, event_data: JsonDict) -> list[JsonDict]: delta = event_data.get('delta', {}) usage = event_data.get('usage', {}) self._output_tokens = usage.get('output_tokens', 0) - - chunk = self._make_chunk( + chunk = make_cc_chunk( + self._id, delta={}, finish_reason=_STOP_REASON_MAP.get(delta.get('stop_reason', ''), 'stop'), + model='claude', ) - chunk['usage'] = _build_cc_usage( + chunk['usage'] = build_cc_usage( input_tokens=self._input_tokens, output_tokens=self._output_tokens, ) - return [self._dump_chunk(chunk)] + return [chunk] def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: """构造标准 OpenAI Chat Completions chunk 对象。""" - choice: JsonDict = {'index': 0, 'delta': delta} - if finish_reason: - choice['finish_reason'] = finish_reason - return { - 'id': self._id, - 'object': 'chat.completion.chunk', - 'model': 'claude', - 'choices': [choice], - } - - @staticmethod - def _dump_chunk(chunk: JsonDict) -> str: - """统一序列化 chunk,方便上层直接写入 SSE data。""" - return json.dumps(chunk) + return make_cc_chunk(self._id, delta, finish_reason, model='claude') # ═══════════════════════════════════════════════════════════ @@ -254,7 +213,7 @@ def _convert_request_message(message: Any) -> tuple[JsonDict | None, str | None] content = message.get('content', '') if role == 'system': - return None, _flatten_text(content) + return None, extract_text(content) if role == 'tool': return _convert_tool_role_message(message), None @@ -301,7 +260,7 @@ def _append_tool_use_blocks(content: Any, tool_calls: list[Any]) -> list[JsonDic 'type': 'tool_use', 'id': tool_call.get('id', gen_id('toolu_')), 'name': function_data.get('name', ''), - 'input': _parse_tool_arguments(function_data.get('arguments', '{}')), + 'input': parse_json_safe(function_data.get('arguments', '{}')), }) return blocks @@ -372,37 +331,12 @@ def _convert_tool_use_block(block: JsonDict, *, index: int) -> JsonDict: else: arguments_text = str(input_data) - return { - 'index': index, - 'id': block.get('id', gen_id('toolu_')), - 'type': 'function', - 'function': { - 'name': tool_name, - 'arguments': arguments_text, - }, - } - - -def _build_cc_message(content_text: str, reasoning_text: str, tool_calls: list[JsonDict]) -> JsonDict: - """构造 OpenAI CC 响应中的 assistant message。""" - message: JsonDict = { - 'role': 'assistant', - 'content': content_text or None, - } - if reasoning_text: - message['reasoning_content'] = reasoning_text - if tool_calls: - message['tool_calls'] = tool_calls - return message - - -def _build_cc_usage(*, input_tokens: int, output_tokens: int) -> JsonDict: - """将 Anthropic usage 字段映射为 OpenAI usage。""" - return { - 'prompt_tokens': input_tokens, - 'completion_tokens': output_tokens, - 'total_tokens': input_tokens + output_tokens, - } + return build_cc_tool_call( + call_id=block.get('id', gen_id('toolu_')), + name=tool_name, + arguments=arguments_text, + index=index, + ) # ═══════════════════════════════════════════════════════════ @@ -410,35 +344,6 @@ def _build_cc_usage(*, input_tokens: int, output_tokens: int) -> JsonDict: # ═══════════════════════════════════════════════════════════ -def _parse_tool_arguments(arguments: Any) -> Any: - """将 tool_call.arguments 尽量解析为对象,供 Anthropic tool_use.input 使用。 - - Anthropic 的 `tool_use.input` 天然期望对象结构;如果这里直接保留原始字符串, - 后续上游会把它当普通文本而不是工具参数对象。 - """ - if not isinstance(arguments, str): - return arguments if arguments is not None else {} - try: - return json.loads(arguments) - except json.JSONDecodeError: - return {} - - -def _flatten_text(content: Any) -> str: - """将 content 扁平化为纯文本,主要用于 system 消息上提。""" - if isinstance(content, str): - return content - if isinstance(content, list): - parts: list[str] = [] - for part in content: - if isinstance(part, str): - parts.append(part) - elif isinstance(part, dict) and part.get('type') == 'text': - parts.append(part.get('text', '')) - return '\n'.join(parts) - return str(content) - - def _convert_content(message: JsonDict) -> Any: """将 OpenAI 消息的 content 字段转换为 Anthropic 内容格式。""" content = message.get('content', '') @@ -708,3 +613,78 @@ def _pick_window_anchor(refs: list[JsonDict], target: int) -> int | None: if 'cache_control' not in refs[i]: return i return None + + +# ═══════════════════════════════════════════════════════════ +# OutboundTransformer 实现: Anthropic Messages +# ═══════════════════════════════════════════════════════════ + + +class AnthropicOutbound: + """Anthropic Messages 后端的出站转换器。 + + 将 CC 格式转换为 Anthropic Messages 格式并处理响应。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + return cc_to_messages_request(payload) + + def build_url(self, ctx) -> str: + return f'{ctx.target_url.rstrip("/")}/v1/messages' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_anthropic_headers + return build_anthropic_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + return messages_to_cc_response(raw) + + def create_stream_processor(self) -> AnthropicStreamProcessor: + return AnthropicStreamProcessor() + + +class AnthropicStreamProcessor: + """Anthropic SSE 流式处理器。 + + 包装 iter_anthropic_sse + AnthropicStreamConverter, + 将 Anthropic 事件流转换为 CC chunk。 + """ + + def __init__(self): + self._converter = AnthropicStreamConverter() + self._input_tokens = 0 + self._output_tokens = 0 + + def iter_events(self, response) -> Iterator: + from utils.http import iter_anthropic_sse + yield from iter_anthropic_sse(response) + + def process_event(self, event: tuple) -> list[JsonDict]: + event_type, event_data = event + return self._converter.process_event(event_type, event_data) + + def extract_usage(self, event: tuple) -> JsonDict | None: + event_type, event_data = event + if event_type == 'message_start': + message_usage = event_data.get('message', {}).get('usage', {}) + if isinstance(message_usage, dict): + self._input_tokens = message_usage.get('input_tokens', 0) + return { + 'prompt_tokens': self._input_tokens, + 'completion_tokens': 0, + 'total_tokens': self._input_tokens, + } + elif event_type == 'message_delta': + delta_usage = event_data.get('usage', {}) + if isinstance(delta_usage, dict): + completion = delta_usage.get('output_tokens', 0) + self._output_tokens = completion + return { + 'prompt_tokens': self._input_tokens, + 'completion_tokens': completion, + 'total_tokens': self._input_tokens + completion, + } + return None + + def finalize(self) -> list[JsonDict]: + return [] diff --git a/adapters/cc_gemini_adapter.py b/adapters/cc_gemini_adapter.py index 5e8aad0..60336c8 100644 --- a/adapters/cc_gemini_adapter.py +++ b/adapters/cc_gemini_adapter.py @@ -8,8 +8,17 @@ from __future__ import annotations import json import logging -from typing import Any +from typing import Any, Iterator +from adapters.helpers import ( + build_cc_message, + build_cc_response, + build_cc_tool_call, + build_cc_usage, + extract_text, + make_cc_chunk, + parse_json_safe, +) from utils.http import gen_id JsonDict = dict[str, Any] @@ -38,7 +47,7 @@ def cc_to_gemini_request(payload: JsonDict) -> JsonDict: for msg in messages: role = msg.get('role', '') if role in ('system', 'developer'): - system_parts.append(_flatten_text(msg.get('content', ''))) + system_parts.append(extract_text(msg.get('content', ''))) continue converted = _convert_message(msg) if converted: @@ -84,21 +93,13 @@ def gemini_to_cc_response(data: JsonDict, request_id: str | None = None) -> Json else: finish_reason = _FINISH_REASON_MAP.get(finish, 'stop') - message: JsonDict = {'role': 'assistant', 'content': content_text or None} - if reasoning_text: - message['reasoning_content'] = reasoning_text - if tool_calls: - message['tool_calls'] = tool_calls - - usage = _convert_usage(data.get('usageMetadata', {})) - - return { - 'id': request_id, - 'object': 'chat.completion', - 'model': data.get('modelVersion', 'gemini'), - 'choices': [{'index': 0, 'message': message, 'finish_reason': finish_reason}], - 'usage': usage, - } + return build_cc_response( + response_id=request_id, + message=build_cc_message(content_text, reasoning_text, tool_calls), + finish_reason=finish_reason, + usage=_convert_usage(data.get('usageMetadata', {})), + model=data.get('modelVersion', 'gemini'), + ) # ═══════════════════════════════════════════════════════════ @@ -166,15 +167,7 @@ class GeminiStreamConverter: return results def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: - choice: JsonDict = {'index': 0, 'delta': delta} - if finish_reason: - choice['finish_reason'] = finish_reason - return { - 'id': self._id, - 'object': 'chat.completion.chunk', - 'model': 'gemini', - 'choices': [choice], - } + return make_cc_chunk(self._id, delta, finish_reason, model='gemini') # ═══════════════════════════════════════════════════════════ @@ -194,7 +187,7 @@ def _convert_message(msg: JsonDict) -> JsonDict | None: 'parts': [{ 'functionResponse': { 'name': msg.get('name', msg.get('tool_call_id', '')), - 'response': _parse_json_safe(msg.get('content', '')), + 'response': parse_json_safe(msg.get('content', ''), fallback={'result': msg.get('content', '')} if msg.get('content', '') else {}), }, }], } @@ -221,7 +214,7 @@ def _convert_message(msg: JsonDict) -> JsonDict | None: parts.append({ 'functionCall': { 'name': func.get('name', ''), - 'args': _parse_json_safe(func.get('arguments', '{}')), + 'args': parse_json_safe(func.get('arguments', '{}'), fallback={}), }, }) @@ -304,15 +297,12 @@ def _extract_parts(parts: list[Any]) -> tuple[str, str, list[JsonDict]]: text += part['text'] elif 'functionCall' in part: fc = part['functionCall'] - tool_calls.append({ - 'index': len(tool_calls), - 'id': fc.get('id') or gen_id('call_'), - 'type': 'function', - 'function': { - 'name': fc.get('name', ''), - 'arguments': json.dumps(fc.get('args', {}), ensure_ascii=False), - }, - }) + tool_calls.append(build_cc_tool_call( + call_id=fc.get('id') or gen_id('call_'), + name=fc.get('name', ''), + arguments=json.dumps(fc.get('args', {}), ensure_ascii=False), + index=len(tool_calls), + )) return text, reasoning, tool_calls @@ -322,12 +312,7 @@ def _convert_usage(meta: JsonDict) -> JsonDict: prompt = meta.get('promptTokenCount', 0) candidates = meta.get('candidatesTokenCount', 0) thoughts = meta.get('thoughtsTokenCount', 0) - completion = candidates + thoughts - return { - 'prompt_tokens': prompt, - 'completion_tokens': completion, - 'total_tokens': prompt + completion, - } + return build_cc_usage(prompt, candidates + thoughts) def _merge_same_role(contents: list[JsonDict]) -> list[JsonDict]: @@ -343,21 +328,65 @@ def _merge_same_role(contents: list[JsonDict]) -> list[JsonDict]: return merged -def _flatten_text(content: Any) -> str: - if isinstance(content, str): - return content - if isinstance(content, list): - return '\n'.join( - p.get('text', '') if isinstance(p, dict) else str(p) - for p in content - ) - return str(content) -def _parse_json_safe(text: Any) -> Any: - if not isinstance(text, str): - return text if text is not None else {} - try: - return json.loads(text) - except (json.JSONDecodeError, ValueError): - return {'result': text} if text else {} +# ═══════════════════════════════════════════════════════════ +# OutboundTransformer 实现: Gemini Contents +# ═══════════════════════════════════════════════════════════ + + +class GeminiOutbound: + """Gemini Contents 后端的出站转换器。 + + 将 CC 格式转换为 Gemini generateContent 格式并处理响应。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + return cc_to_gemini_request(payload) + + def build_url(self, ctx) -> str: + base = ctx.target_url.rstrip('/') + model = ctx.upstream_model + if ctx.is_stream: + return f'{base}/v1/models/{model}:streamGenerateContent?alt=sse' + return f'{base}/v1/models/{model}:generateContent' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_gemini_headers + return build_gemini_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + return gemini_to_cc_response(raw) + + def create_stream_processor(self) -> GeminiStreamProcessor: + return GeminiStreamProcessor() + + +class GeminiStreamProcessor: + """Gemini SSE 流式处理器。 + + 包装 iter_gemini_sse + GeminiStreamConverter。 + """ + + def __init__(self): + self._converter = GeminiStreamConverter() + + def iter_events(self, response) -> Iterator: + from utils.http import iter_gemini_sse + yield from iter_gemini_sse(response) + + def process_event(self, event: JsonDict) -> list[JsonDict]: + return self._converter.process_chunk(event) + + def extract_usage(self, event: JsonDict) -> JsonDict | None: + usage_meta = event.get('usageMetadata') if isinstance(event, dict) else None + if isinstance(usage_meta, dict): + return { + 'prompt_tokens': usage_meta.get('promptTokenCount', 0), + 'completion_tokens': usage_meta.get('candidatesTokenCount', 0), + 'total_tokens': usage_meta.get('totalTokenCount', 0), + } + return None + + def finalize(self) -> list[JsonDict]: + return [] diff --git a/adapters/helpers.py b/adapters/helpers.py new file mode 100644 index 0000000..563902b --- /dev/null +++ b/adapters/helpers.py @@ -0,0 +1,155 @@ +"""适配器公共辅助函数 + +收敛多个适配器都在重复实现的 CC 格式构建逻辑: +- CC 消息/Usage/Tool Call/Stream Chunk 的标准构造 +- 内容扁平化、JSON 安全解析、工具输出序列化 +""" + +from __future__ import annotations + +import json +from typing import Any + +from utils.http import gen_id + +JsonDict = dict[str, Any] + + +# ═══════════════════════════════════════════════════════════ +# CC 格式标准构造 +# ═══════════════════════════════════════════════════════════ + + +def build_cc_message( + content_text: str, + reasoning_text: str = '', + tool_calls: list[JsonDict] | None = None, +) -> JsonDict: + """构造标准的 CC assistant 消息。""" + message: JsonDict = { + 'role': 'assistant', + 'content': content_text or None, + } + if reasoning_text: + message['reasoning_content'] = reasoning_text + if tool_calls: + message['tool_calls'] = tool_calls + return message + + +def build_cc_usage(input_tokens: int, output_tokens: int) -> JsonDict: + """构造标准的 CC usage 字典。""" + return { + 'prompt_tokens': input_tokens, + 'completion_tokens': output_tokens, + 'total_tokens': input_tokens + output_tokens, + } + + +def build_cc_tool_call( + call_id: str, + name: str, + arguments: str, + *, + index: int | None = None, +) -> JsonDict: + """构造标准的 CC tool_call 结构。""" + tc: JsonDict = { + 'id': call_id or gen_id('call_'), + 'type': 'function', + 'function': { + 'name': name, + 'arguments': arguments, + }, + } + if index is not None: + tc['index'] = index + return tc + + +def make_cc_chunk( + chunk_id: str, + delta: JsonDict, + finish_reason: str | None = None, + model: str = '', +) -> JsonDict: + """构造标准的 CC 流式 chunk。""" + choice: JsonDict = {'index': 0, 'delta': delta} + if finish_reason: + choice['finish_reason'] = finish_reason + return { + 'id': chunk_id, + 'object': 'chat.completion.chunk', + 'model': model, + 'choices': [choice], + } + + +def build_cc_response( + response_id: str, + message: JsonDict, + finish_reason: str, + usage: JsonDict, + model: str = '', +) -> JsonDict: + """构造标准的 CC 非流式响应。""" + return { + 'id': response_id, + 'object': 'chat.completion', + 'model': model, + 'choices': [{ + 'index': 0, + 'message': message, + 'finish_reason': finish_reason, + }], + 'usage': usage, + } + + +# ═══════════════════════════════════════════════════════════ +# 通用文本/JSON 处理 +# ═══════════════════════════════════════════════════════════ + + +def extract_text(content: Any) -> str: + """从多种内容格式中提取并拼接纯文本。 + + 支持字符串、内容块列表(OpenAI/Anthropic/Responses 风格)。 + """ + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) if content is not None else '' + + parts: list[str] = [] + for part in content: + if isinstance(part, str): + parts.append(part) + elif isinstance(part, dict): + part_type = part.get('type', '') + if part_type in ('text', 'output_text', 'input_text'): + parts.append(part.get('text', '')) + elif part_type == 'refusal': + parts.append(part.get('refusal', '')) + elif 'text' in part and not part_type: + parts.append(part['text']) + return '\n'.join(parts) if parts else '' + + +def parse_json_safe(text: Any, fallback: Any = None) -> Any: + """安全解析 JSON,失败时返回 fallback。""" + if not isinstance(text, str): + return text if text is not None else (fallback if fallback is not None else {}) + try: + return json.loads(text) + except (json.JSONDecodeError, ValueError): + return fallback if fallback is not None else {} + + +def stringify_content(content: Any) -> str: + """将任意内容序列化为字符串。""" + if isinstance(content, str): + return content + if content is None: + return '' + return json.dumps(content, ensure_ascii=False) diff --git a/adapters/openai_compat_fixer.py b/adapters/openai_compat_fixer.py index 8a2d252..50348cf 100644 --- a/adapters/openai_compat_fixer.py +++ b/adapters/openai_compat_fixer.py @@ -13,7 +13,7 @@ from __future__ import annotations import json import logging -from typing import Any +from typing import Any, Iterator from utils.http import gen_id from utils.think_tag import extract_from_text @@ -423,3 +423,60 @@ def _rewrite_function_call_finish_reason(choice: JsonDict) -> None: """将旧版 finish_reason=function_call 升级为 tool_calls。""" if choice.get('finish_reason') == 'function_call': choice['finish_reason'] = 'tool_calls' + + +# ═══════════════════════════════════════════════════════════ +# OutboundTransformer 实现: OpenAI Chat +# ═══════════════════════════════════════════════════════════ + + +class OpenAIChatOutbound: + """OpenAI Chat Completions 后端的出站转换器。 + + 由于 CC 本身就是 OpenAI Chat 格式,请求/响应转换主要做兼容性修复。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + return normalize_request(payload) + + def build_url(self, ctx) -> str: + return f'{ctx.target_url.rstrip("/")}/v1/chat/completions' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_openai_headers + return build_openai_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + return fix_response(raw) + + def create_stream_processor(self) -> OpenAIChatStreamProcessor: + return OpenAIChatStreamProcessor() + + +class OpenAIChatStreamProcessor: + """OpenAI Chat SSE 流式处理器。 + + 包装 iter_openai_sse + fix_stream_chunk + ThinkTagExtractor。 + """ + + def __init__(self): + from utils.think_tag import ThinkTagExtractor + self._think_extractor = ThinkTagExtractor() + + def iter_events(self, response) -> Iterator: + from utils.http import iter_openai_sse + for chunk in iter_openai_sse(response): + if chunk is None: + return + yield chunk + + def process_event(self, event: JsonDict) -> list[JsonDict]: + chunk = fix_stream_chunk(event) + return list(self._think_extractor.process_chunk(chunk)) + + def extract_usage(self, event: JsonDict) -> JsonDict | None: + return event.get('usage') + + def finalize(self) -> list[JsonDict]: + close_chunk = self._think_extractor.finalize() + return [close_chunk] if close_chunk else [] diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index e6c864a..68acedc 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -15,8 +15,18 @@ from __future__ import annotations import json from dataclasses import dataclass -from typing import Any +from typing import Any, Iterator +from adapters.helpers import ( + build_cc_message, + build_cc_response, + build_cc_tool_call, + build_cc_usage, + extract_text, + make_cc_chunk, + stringify_content, +) +from adapters.unified import UnifiedUsage from utils.http import gen_id JsonDict = dict[str, Any] @@ -85,7 +95,7 @@ def cc_to_responses(cc_resp: JsonDict, model: str = '') -> JsonDict: 'status': _response_status_from_finish_reason(finish_reason), 'model': model or cc_resp.get('model', ''), 'output': _build_responses_output(message), - 'usage': _build_responses_usage(cc_resp.get('usage', {})), + 'usage': UnifiedUsage.from_cc_dict(cc_resp.get('usage', {})).to_responses_dict(), } @@ -94,31 +104,18 @@ def responses_to_cc_response(response_data: JsonDict, model: str = '') -> JsonDi output_items = response_data.get('output', []) content_text, reasoning_text, tool_calls = _collect_cc_parts_from_responses_output(output_items) finish_reason = _cc_finish_reason_from_responses(response_data, tool_calls) - message = { - 'role': 'assistant', - 'content': content_text or None, - } - if reasoning_text: - message['reasoning_content'] = reasoning_text - if tool_calls: - message['tool_calls'] = tool_calls - usage = response_data.get('usage', {}) - return { - 'id': response_data.get('id', gen_id('chatcmpl-')), - 'object': 'chat.completion', - 'model': model or response_data.get('model', ''), - 'choices': [{ - 'index': 0, - 'message': message, - 'finish_reason': finish_reason, - }], - 'usage': { - 'prompt_tokens': usage.get('input_tokens', 0), - 'completion_tokens': usage.get('output_tokens', 0), - 'total_tokens': usage.get('total_tokens', 0), - }, - } + + return build_cc_response( + response_id=response_data.get('id', gen_id('chatcmpl-')), + message=build_cc_message(content_text, reasoning_text, tool_calls), + finish_reason=finish_reason, + usage=build_cc_usage( + input_tokens=usage.get('input_tokens', 0), + output_tokens=usage.get('output_tokens', 0), + ), + model=model or response_data.get('model', ''), + ) # ═══════════════════════════════════════════════════════════ @@ -658,15 +655,7 @@ class ResponsesToCCStreamConverter: def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: """构造标准 Chat Completions chunk。""" - choice: JsonDict = {'index': 0, 'delta': delta} - if finish_reason: - choice['finish_reason'] = finish_reason - return { - 'id': self._id, - 'object': 'chat.completion.chunk', - 'model': self._model, - 'choices': [choice], - } + return make_cc_chunk(self._id, delta, finish_reason, model=self._model) # ═══════════════════════════════════════════════════════════ @@ -715,7 +704,7 @@ def _append_responses_input_item( content = message.get('content') if role == 'system': - text = _content_to_text(content) + text = extract_text(content) if text: instructions.append(text) return @@ -724,11 +713,11 @@ def _append_responses_input_item( input_items.append({ 'type': 'function_call_output', 'call_id': message.get('tool_call_id', ''), - 'output': _stringify_output(content), + 'output': stringify_content(content), }) return - text = _content_to_text(content) + text = extract_text(content) has_tool_calls = bool(message.get('tool_calls')) if role == 'assistant' and has_tool_calls: @@ -771,7 +760,7 @@ def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None: if role and not item_type: msg: JsonDict = { 'role': role, - 'content': _normalize_simple_content(item.get('content', '')), + 'content': extract_text(item.get('content', '')), } if role == 'assistant' and pending_reasoning: msg['reasoning_content'] = pending_reasoning @@ -810,7 +799,7 @@ def _append_message_item(items: list[Any], *, start: int, messages: list[JsonDic """将一个 message 项及其后续连续 function_call 项合并成一条消息。""" item = items[start] role = item.get('role', 'assistant') - content = _extract_text(item.get('content', [])) + content = extract_text(item.get('content', [])) message: JsonDict = {'role': role, 'content': content or ''} if role == 'assistant': @@ -828,7 +817,11 @@ def _append_message_item(items: list[Any], *, start: int, messages: list[JsonDic def _append_function_call_item(item: JsonDict, messages: list[JsonDict]) -> None: """将独立的 Responses `function_call` 项挂接到最近的 assistant 消息上。""" - tool_call = _build_cc_tool_call(item) + tool_call = build_cc_tool_call( + call_id=item.get('call_id') or gen_id('call_'), + name=item.get('name', ''), + arguments=item.get('arguments', '{}'), + ) if messages and messages[-1]['role'] == 'assistant': messages[-1].setdefault('tool_calls', []).append(tool_call) @@ -851,12 +844,6 @@ def _convert_function_call_output_item(item: JsonDict) -> JsonDict: } -def _normalize_simple_content(content: Any) -> str: - """将简单 content 载荷规范化为纯文本字符串。""" - if isinstance(content, list): - return _extract_text(content) or '' - return str(content) if content is not None else '' - def _collect_function_calls(items: list[Any], start: int) -> tuple[list[JsonDict], int]: """收集从指定位置开始连续出现的 `function_call` 项。""" @@ -865,24 +852,17 @@ def _collect_function_calls(items: list[Any], start: int) -> tuple[list[JsonDict while index < len(items): next_item = items[index] if isinstance(next_item, dict) and next_item.get('type') == 'function_call': - tool_calls.append(_build_cc_tool_call(next_item)) + tool_calls.append(build_cc_tool_call( + call_id=next_item.get('call_id') or gen_id('call_'), + name=next_item.get('name', ''), + arguments=next_item.get('arguments', '{}'), + )) index += 1 else: break return tool_calls, index - start -def _build_cc_tool_call(item: JsonDict) -> JsonDict: - """将单个 Responses `function_call` 项转换为 CC `tool_call` 结构。""" - return { - 'id': item.get('call_id') or gen_id('call_'), - 'type': 'function', - 'function': { - 'name': item.get('name', ''), - 'arguments': item.get('arguments', '{}'), - }, - } - # ═══════════════════════════════════════════════════════════ # 非流式响应转换辅助 @@ -936,14 +916,6 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict: } -def _build_responses_usage(usage: JsonDict) -> JsonDict: - """将 Chat Completions 的 usage 字段映射为 Responses usage 结构。""" - return { - 'input_tokens': usage.get('prompt_tokens', 0), - 'output_tokens': usage.get('completion_tokens', 0), - 'total_tokens': usage.get('total_tokens', 0), - } - def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]: """从 Responses `output` 中提取文本、思考摘要和工具调用。""" @@ -959,11 +931,16 @@ def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str continue item_type = item.get('type', '') if item_type == 'message': - content_text += _extract_text(item.get('content', [])) + content_text += extract_text(item.get('content', [])) elif item_type == 'reasoning': reasoning_text += _extract_reasoning_text(item) elif item_type == 'function_call': - tool_calls.append(_build_cc_tool_call_from_responses_output(item, index=len(tool_calls))) + tool_calls.append(build_cc_tool_call( + call_id=item.get('call_id') or gen_id('call_'), + name=item.get('name', ''), + arguments=item.get('arguments', '{}'), + index=len(tool_calls), + )) return content_text, reasoning_text, tool_calls @@ -980,18 +957,6 @@ def _extract_reasoning_text(item: JsonDict) -> str: return ''.join(texts) -def _build_cc_tool_call_from_responses_output(item: JsonDict, *, index: int) -> JsonDict: - """将 Responses `function_call` 输出项转换为 CC `tool_call`。""" - return { - 'index': index, - 'id': item.get('call_id') or gen_id('call_'), - 'type': 'function', - 'function': { - 'name': item.get('name', ''), - 'arguments': item.get('arguments', '{}'), - }, - } - def _cc_finish_reason_from_responses(response_data: JsonDict, tool_calls: list[JsonDict]) -> str: """根据 Responses 完成状态推断聊天补全的 finish_reason。""" @@ -1017,57 +982,7 @@ def _map_anthropic_stop_reason(stop_reason: str) -> str: # ═══════════════════════════════════════════════════════════ -def _extract_text(content: Any) -> str: - """从多种内容块结构中提取并拼接纯文本。""" - if isinstance(content, str): - return content - if not isinstance(content, list): - return str(content) if content else '' - texts: list[str] = [] - for part in content: - if isinstance(part, str): - texts.append(part) - elif isinstance(part, dict): - part_type = part.get('type', '') - if part_type in ('output_text', 'input_text', 'text'): - texts.append(part.get('text', '')) - elif part_type == 'refusal': - texts.append(part.get('refusal', '')) - return '\n'.join(texts) if texts else '' - - -def _content_to_text(content: Any) -> str: - """将任意 content 载荷转换为单个字符串。""" - if isinstance(content, str): - return content - if isinstance(content, list): - return _extract_text(content) - return str(content) if content is not None else '' - - -def _content_to_responses_parts(content: Any, role: str = 'user') -> list[JsonDict]: - """将普通消息内容转换为 Responses 内容块数组。 - - assistant 消息使用 output_text,其他角色使用 input_text。 - """ - if isinstance(content, list): - text = _extract_text(content) - else: - text = _content_to_text(content) - if not text: - return [] - part_type = 'output_text' if role == 'assistant' else 'input_text' - return [{'type': part_type, 'text': text}] - - -def _stringify_output(content: Any) -> str: - """将工具输出统一序列化为字符串,便于放入 `function_call_output`。""" - if isinstance(content, str): - return content - if content is None: - return '' - return json.dumps(content, ensure_ascii=False) if not isinstance(content, str) else content def _build_responses_function_call_item(tool_call: JsonDict) -> JsonDict: @@ -1081,6 +996,165 @@ def _build_responses_function_call_item(tool_call: JsonDict) -> JsonDict: } +# ═══════════════════════════════════════════════════════════ +# OutboundTransformer 实现: Responses +# ═══════════════════════════════════════════════════════════ + + +class ResponsesOutbound: + """OpenAI Responses 后端的出站转换器。 + + 将 CC 格式转换为 Responses 格式并处理响应。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + return cc_to_responses_request(payload) + + def build_url(self, ctx) -> str: + return f'{ctx.target_url.rstrip("/")}/v1/responses' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_openai_headers + return build_openai_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + return responses_to_cc_response(raw) + + def create_stream_processor(self) -> ResponsesStreamProcessorForCC: + return ResponsesStreamProcessorForCC() + + +class ResponsesStreamProcessorForCC: + """Responses SSE → CC chunk 流式处理器。 + + 用于 /v1/chat/completions -> /v1/responses 的桥接路径。 + """ + + def __init__(self): + self._converter = ResponsesToCCStreamConverter() + + def iter_events(self, response) -> Iterator: + from utils.http import iter_responses_sse + yield from iter_responses_sse(response) + + def process_event(self, event: tuple) -> list[JsonDict]: + event_type, event_data = event + return self._converter.process_event(event_type, event_data) + + def extract_usage(self, event: tuple) -> JsonDict | None: + from adapters.unified import extract_responses_usage + event_type, event_data = event + extracted = extract_responses_usage(event_data) + if extracted: + return { + 'prompt_tokens': extracted.get('input_tokens', 0), + 'completion_tokens': extracted.get('output_tokens', 0), + 'total_tokens': extracted.get('total_tokens', 0), + } + return None + + def finalize(self) -> list[JsonDict]: + return [] + + +class ResponsesNativeOutbound: + """Responses 后端原生透传的出站转换器。 + + 当 /v1/responses → /v1/responses 时直接透传,不经过 CC 中间格式。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + return payload + + def build_url(self, ctx) -> str: + return f'{ctx.target_url.rstrip("/")}/v1/responses' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_openai_headers + return build_openai_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + return raw + + def create_stream_processor(self) -> ResponsesNativeStreamProcessor: + return ResponsesNativeStreamProcessor() + + +class ResponsesNativeStreamProcessor: + """Responses 原生 SSE 透传流式处理器。 + + 上游就是 Responses 格式,只需透传事件并做轻量模型名改写。 + 每个事件作为 SSE 字符串直接返回。 + """ + + def iter_events(self, response) -> Iterator: + from utils.http import iter_responses_sse + yield from iter_responses_sse(response) + + def process_event(self, event: tuple) -> list[JsonDict]: + event_type, event_data = event + return [{'_sse_event_type': event_type, **event_data}] + + def extract_usage(self, event: tuple) -> JsonDict | None: + from adapters.unified import extract_responses_usage + _, event_data = event + return extract_responses_usage(event_data) + + def finalize(self) -> list[JsonDict]: + return [] + + +class AnthropicOutboundForResponses: + """Anthropic 后端的出站转换器(用于 /v1/responses 路由)。 + + 流式处理直接将 Anthropic SSE → Responses SSE, + 跳过 CC 中间态以保留原始时序。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + from adapters.cc_anthropic_adapter import cc_to_messages_request + return cc_to_messages_request(payload) + + def build_url(self, ctx) -> str: + return f'{ctx.target_url.rstrip("/")}/v1/messages' + + def build_headers(self, ctx) -> dict[str, str]: + from utils.http import build_anthropic_headers + return build_anthropic_headers(ctx.api_key) + + def parse_response(self, raw: JsonDict) -> JsonDict: + from adapters.cc_anthropic_adapter import messages_to_cc_response + return messages_to_cc_response(raw) + + def create_stream_processor(self) -> AnthropicToResponsesStreamProcessor: + return AnthropicToResponsesStreamProcessor() + + +class AnthropicToResponsesStreamProcessor: + """Anthropic SSE → Responses SSE 直接转换的流式处理器。 + + 跳过 CC 中间态,直接将 Anthropic 事件映射为 Responses 事件。 + 返回的 chunk 是 SSE 字符串。 + """ + + def __init__(self): + self._converter = ResponsesStreamConverter() + + def iter_events(self, response) -> Iterator: + from utils.http import iter_anthropic_sse + yield from iter_anthropic_sse(response) + + def process_event(self, event: tuple) -> list[str]: + event_type, event_data = event + return self._converter.process_anthropic_event(event_type, event_data) + + def extract_usage(self, event: tuple) -> JsonDict | None: + return None + + def finalize(self) -> list[str]: + return self._converter.finalize() + + def _convert_cc_tools_to_responses(tools: Any) -> list[JsonDict]: """将聊天补全风格的工具定义转换为 Responses `tools` 列表。""" if not isinstance(tools, list): diff --git a/adapters/unified.py b/adapters/unified.py new file mode 100644 index 0000000..db2e087 --- /dev/null +++ b/adapters/unified.py @@ -0,0 +1,354 @@ +"""统一中间格式与转换器接口 + +定义项目中所有 API 格式共用的中间表示和转换器协议: +- UnifiedRequest / UnifiedResponse: 统一的请求/响应数据结构 +- InboundTransformer / OutboundTransformer: 入站/出站转换器接口 +- StreamProcessor: 流式事件处理器接口 +- ClientFormatter: 客户端响应格式化接口 +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from typing import Any, Iterator, Protocol + +from flask import Response, jsonify + +import settings +from utils.http import forward_request, gen_id, sse_response +from utils.request_logger import ( + append_client_event, + append_upstream_event, + attach_client_response, + attach_error, + attach_upstream_request, + attach_upstream_response, + finalize_turn, + set_stream_summary, +) +from utils.usage_tracker import usage_tracker + +logger = logging.getLogger(__name__) + +JsonDict = dict[str, Any] + + +# ═══════════════════════════════════════════════════════════ +# 统一数据模型 +# ═══════════════════════════════════════════════════════════ + + +@dataclass +class UnifiedUsage: + """标准化的令牌用量统计。""" + + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + + def to_cc_dict(self) -> JsonDict: + return { + 'prompt_tokens': self.input_tokens, + 'completion_tokens': self.output_tokens, + 'total_tokens': self.total_tokens, + } + + def to_responses_dict(self) -> JsonDict: + return { + 'input_tokens': self.input_tokens, + 'output_tokens': self.output_tokens, + 'total_tokens': self.total_tokens, + } + + @classmethod + def from_cc_dict(cls, d: JsonDict) -> UnifiedUsage: + return cls( + input_tokens=d.get('prompt_tokens', 0), + output_tokens=d.get('completion_tokens', 0), + total_tokens=d.get('total_tokens', 0), + ) + + @classmethod + def from_responses_dict(cls, d: JsonDict) -> UnifiedUsage: + return cls( + input_tokens=d.get('input_tokens', 0), + output_tokens=d.get('output_tokens', 0), + total_tokens=d.get('total_tokens', 0), + ) + + +# ═══════════════════════════════════════════════════════════ +# 转换器接口 +# ═══════════════════════════════════════════════════════════ + + +class OutboundTransformer(Protocol): + """出站转换器:将 CC 中间格式转换为上游后端格式。 + + 所有后端(OpenAI Chat / Responses / Anthropic / Gemini)各实现一套, + 内部复用各自现有的适配器函数。 + """ + + def build_request(self, payload: JsonDict) -> JsonDict: + """将 CC 格式请求体转换为上游格式请求体。""" + ... + + def build_url(self, ctx: Any) -> str: + """根据路由上下文构建上游请求 URL。""" + ... + + def build_headers(self, ctx: Any) -> JsonDict: + """根据路由上下文构建上游请求头。""" + ... + + def parse_response(self, raw: JsonDict) -> JsonDict: + """将上游非流式响应转换回 CC 格式。""" + ... + + def create_stream_processor(self) -> StreamProcessor: + """创建该后端对应的流式事件处理器。""" + ... + + +class StreamProcessor(Protocol): + """流式事件处理器接口。 + + 每个后端的 SSE 格式不同,StreamProcessor 封装了具体的迭代与转换逻辑, + 让通用流式处理器不必关心后端差异。 + """ + + def iter_events(self, response: Any) -> Iterator: + """从上游 HTTP 响应中迭代原始事件。""" + ... + + def process_event(self, event: Any) -> list: + """将单个上游事件转换为输出项列表。 + + 返回值通常是 list[JsonDict](CC chunk), + 但 Anthropic→Responses 路径返回 list[str](SSE 字符串)。 + """ + ... + + def extract_usage(self, event: Any) -> JsonDict | None: + """从上游事件中提取用量信息(如果有的话)。""" + ... + + def finalize(self) -> list: + """流结束时产出的收尾项。""" + ... + + +class ClientFormatter(Protocol): + """客户端响应格式化器。 + + 根据客户端期望的 API 格式(CC 或 Responses),将通用的处理结果 + 格式化为最终返回给客户端的形态。 + """ + + def format_response(self, cc_response: JsonDict, model: str) -> JsonDict: + """格式化非流式响应。""" + ... + + def wrap_stream_item(self, item: Any) -> str: + """将单个流式输出项包装为 SSE 字符串。""" + ... + + def format_error(self, message: str) -> str: + """构造流式错误消息。""" + ... + + def format_done(self) -> str | None: + """构造流结束标记(CC 返回 [DONE],Responses 返回 None)。""" + ... + + def start_events(self) -> list[str]: + """流开始前的初始事件(Responses 返回 response.created)。""" + ... + + @property + def usage_input_key(self) -> str: + """usage 中输入令牌的字段名。""" + ... + + @property + def usage_output_key(self) -> str: + """usage 中输出令牌的字段名。""" + ... + + +# ═══════════════════════════════════════════════════════════ +# 通用请求/响应处理器 +# ═══════════════════════════════════════════════════════════ + + +def _dbg(message: str) -> None: + if settings.get_debug_mode() in ('simple', 'verbose'): + logger.info('[通用调试] %s', message) + + +def extract_responses_usage(event_data: JsonDict) -> JsonDict | None: + """从原生 Responses 事件中提取 usage(公共辅助)。""" + if not isinstance(event_data, dict): + return None + usage = event_data.get('usage') + if isinstance(usage, dict): + return usage + response_obj = event_data.get('response') + if isinstance(response_obj, dict): + nested_usage = response_obj.get('usage') + if isinstance(nested_usage, dict): + return nested_usage + return None + + +def handle_non_stream( + ctx: Any, + outbound: OutboundTransformer, + client_fmt: ClientFormatter, + payload: JsonDict, + turn: JsonDict | None, +) -> Response: + """通用非流式处理器。 + + 替代 chat.py 和 responses.py 中的 8 个 _handle_xxx_non_stream 函数。 + """ + from routes.common import apply_body_modifications, apply_header_modifications, log_usage + + upstream_payload = outbound.build_request(payload) + url = outbound.build_url(ctx) + headers = outbound.build_headers(ctx) + upstream_payload = apply_body_modifications(upstream_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + upstream_payload['stream'] = False + attach_upstream_request(turn, upstream_payload, headers) + resp, err = forward_request(url, headers, upstream_payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + cc_response = outbound.parse_response(raw) + result = client_fmt.format_response(cc_response, ctx.client_model) + + _dbg('格式化后响应=' + json.dumps(result, ensure_ascii=False, default=str)[:1000]) + usage_data = result.get('usage', {}) + log_usage('通用', usage_data, input_key=client_fmt.usage_input_key, output_key=client_fmt.usage_output_key) + usage_tracker.record( + ctx.client_model, + usage_data, + input_key=client_fmt.usage_input_key, + output_key=client_fmt.usage_output_key, + ) + attach_client_response(turn, result) + finalize_turn(turn, usage=usage_data) + return jsonify(result) + + +def handle_stream( + ctx: Any, + outbound: OutboundTransformer, + client_fmt: ClientFormatter, + payload: JsonDict, + turn: JsonDict | None, +) -> Response: + """通用流式处理器。 + + 替代 chat.py 和 responses.py 中的 8 个 _handle_xxx_stream 函数。 + """ + from routes.common import apply_body_modifications, apply_header_modifications + + upstream_payload = outbound.build_request(payload) + url = outbound.build_url(ctx) + headers = outbound.build_headers(ctx) + upstream_payload = apply_body_modifications(upstream_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + upstream_payload['stream'] = True + processor = outbound.create_stream_processor() + + def generate(): + for start_evt in client_fmt.start_events(): + yield start_evt + + attach_upstream_request(turn, upstream_payload, headers) + resp, err = forward_request(url, headers, upstream_payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield client_fmt.format_error(str(err)) + return + + event_count = 0 + client_items: list[str] = [] + last_usage: JsonDict | None = None + + for event in processor.iter_events(resp): + append_upstream_event(turn, {'type': 'upstream_event', 'data': event}) + + extracted = processor.extract_usage(event) + if extracted is not None: + last_usage = extracted + + if event_count < 10: + _dbg( + f'上游事件#{event_count}=' + + json.dumps(event, ensure_ascii=False, default=str)[:500] + ) + + for chunk in processor.process_event(event): + if isinstance(chunk, dict): + chunk['model'] = ctx.client_model + wrapped = client_fmt.wrap_stream_item(chunk) + client_items.append(wrapped) + append_client_event(turn, {'type': 'stream_item', 'data': chunk}) + if event_count < 10: + _dbg( + f'返回片段#{event_count}=' + + json.dumps(chunk, ensure_ascii=False, default=str)[:500] + ) + yield wrapped + + event_count += 1 + + for chunk in processor.finalize(): + if isinstance(chunk, dict): + chunk['model'] = ctx.client_model + wrapped = client_fmt.wrap_stream_item(chunk) + client_items.append(wrapped) + append_client_event(turn, {'type': 'stream_item', 'data': chunk}) + yield wrapped + + done = client_fmt.format_done() + if done: + append_client_event(turn, {'type': 'done'}) + yield done + + _dbg(f'流式响应结束,共 {event_count} 个事件') + usage_tracker.record( + ctx.client_model, + last_usage, + input_key=client_fmt.usage_input_key, + output_key=client_fmt.usage_output_key, + ) + set_stream_summary(turn, { + 'event_count': event_count, + 'client_item_count': len(client_items), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'stream.summary', + 'model': ctx.client_model, + 'event_count': len(client_items), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) diff --git a/routes/chat.py b/routes/chat.py index be4f775..7e01346 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -1,8 +1,7 @@ """路由: /v1/chat/completions 处理 Cursor 发来的 OpenAI Chat Completions 格式请求。 -根据模型映射的后端类型,转发到 OpenAI 兼容接口、Anthropic Messages 接口, -或原生 OpenAI Responses 接口。 +根据模型映射的后端类型,通过统一的出站转换器转发到不同后端。 """ from __future__ import annotations @@ -11,103 +10,33 @@ import json import logging from typing import Any -import settings from flask import Blueprint, jsonify, request -from adapters.cc_anthropic_adapter import ( - AnthropicStreamConverter, - cc_to_messages_request, - messages_to_cc_response, -) -from adapters.cc_gemini_adapter import ( - GeminiStreamConverter, - cc_to_gemini_request, - gemini_to_cc_response, -) -from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request -from adapters.responses_cc_adapter import ( - ResponsesToCCStreamConverter, - cc_to_responses_request, - responses_to_cc, - responses_to_cc_response, -) -from config import Config +from adapters.openai_compat_fixer import normalize_request +from adapters.responses_cc_adapter import responses_to_cc +from adapters.unified import handle_non_stream, handle_stream from routes.common import ( - RouteContext, - apply_body_modifications, - apply_header_modifications, - build_anthropic_target, - build_gemini_target, - build_openai_target, - build_responses_target, + CCClientFormatter, build_route_context, - chat_error_chunk, - inject_instructions_anthropic, + get_outbound, inject_instructions_cc, - inject_instructions_responses, log_route_context, - log_usage, - sse_data_message, ) -from utils.http import ( - forward_request, - gen_id, - iter_anthropic_sse, - iter_gemini_sse, - iter_openai_sse, - iter_responses_sse, - sse_response, -) -from utils.request_logger import ( - append_client_event, - append_upstream_event, - attach_client_response, - attach_error, - attach_upstream_request, - attach_upstream_response, - finalize_turn, - set_stream_summary, - start_turn, -) -from utils.think_tag import ThinkTagExtractor +from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache -from utils.usage_tracker import usage_tracker logger = logging.getLogger(__name__) bp = Blueprint('chat', __name__) -def _dbg(message: str) -> None: - """仅在调试模式下输出详细日志。""" - if settings.get_debug_mode() in ('simple', 'verbose'): - logger.info('[聊天补全调试] %s', message) - - -def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None: - """从原生 Responses 事件中提取 usage。 - - `/v1/chat/completions -> /v1/responses` 的桥接流式路径也需要读取 usage, - 因此在本文件保留一个本地辅助函数,避免依赖其他路由模块的私有实现。 - """ - if not isinstance(event_data, dict): - return None - usage = event_data.get('usage') - if isinstance(usage, dict): - return usage - response_obj = event_data.get('response') - if isinstance(response_obj, dict): - nested_usage = response_obj.get('usage') - if isinstance(nested_usage, dict): - return nested_usage - return None - - @bp.route('/v1/chat/completions', methods=['POST']) def chat_completions(): """处理聊天补全请求并按模型映射分发到不同后端。""" original_payload = request.get_json(force=True) - payload, message_count = _normalize_chat_payload(json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))) + payload, message_count = _normalize_chat_payload( + json.loads(json.dumps(original_payload, ensure_ascii=False, default=str)) + ) client_model = payload.get('model', 'unknown') is_stream = payload.get('stream', False) @@ -127,23 +56,38 @@ def chat_completions(): log_route_context('聊天补全', ctx, extra=f'消息数={message_count}') _log_messages(payload) - if ctx.backend != 'responses': - payload['messages'] = thinking_cache.inject(payload.get('messages', [])) + payload['model'] = ctx.upstream_model + payload = normalize_request(payload) + payload['messages'] = thinking_cache.inject(payload.get('messages', [])) + payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) - if ctx.backend == 'openai': - return _handle_openai_backend(ctx, payload, turn) - if ctx.backend == 'responses': - return _handle_responses_backend(ctx, payload, turn) - if ctx.backend == 'gemini': - return _handle_gemini_backend(ctx, payload, turn) - return _handle_anthropic_backend(ctx, payload, turn) + outbound = get_outbound(ctx.backend) + client_fmt = CCClientFormatter() + + if ctx.is_stream: + result = handle_stream(ctx, outbound, client_fmt, payload, turn) + else: + result = handle_non_stream(ctx, outbound, client_fmt, payload, turn) + + if not ctx.is_stream and isinstance(result, tuple): + response_data = result + elif hasattr(result, 'json'): + try: + response_data = result.get_json(silent=True) or {} + except Exception: + response_data = {} + else: + response_data = {} + + _try_cache_thinking(response_data) + return result def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: """整理聊天补全入口的请求体。 - 这里保留了一层兼容逻辑:当 Cursor 或调用方把 Responses 格式误发到 - `/v1/chat/completions` 时,先降级转换成 Chat Completions,再进入统一主流程。 + 当 Cursor 或调用方把 Responses 格式误发到 `/v1/chat/completions` 时, + 先降级转换成 Chat Completions,再进入统一主流程。 """ message_count = len(payload.get('messages', [])) @@ -157,548 +101,11 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in return payload, message_count -def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]): - """处理走 OpenAI 兼容后端的聊天补全请求。""" - _dbg( - '原始请求字段=' + str(list(payload.keys())) + ' ' - + '附加字段=' - + json.dumps( - {k: v for k, v in payload.items() if k != 'messages'}, - ensure_ascii=False, - default=str, - )[:500] - ) - - payload = normalize_request(payload, ctx.upstream_model) - payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) - _dbg( - f'标准化完成:模型={payload.get("model")} ' - f'工具数={len(payload.get("tools", []))}' - ) - - url, headers = build_openai_target(ctx) - payload = apply_body_modifications(payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_openai_stream(ctx, payload, url, headers, turn) - return _handle_openai_non_stream(ctx, payload, url, headers, turn) - - -def _handle_openai_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any], -): - """处理 OpenAI 兼容后端的非流式返回。""" - payload['stream'] = False - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - data = fix_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应') - - -def _handle_openai_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any], -): - """处理 OpenAI 兼容后端的流式返回。""" - payload['stream'] = True - - def generate(): - """消费上游 OpenAI SSE,并逐段产出给 Cursor 的聊天补全流。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield chat_error_chunk(str(err)) - return - - think_extractor = ThinkTagExtractor() - chunk_count = 0 - last_usage = None - client_chunks: list[dict[str, Any]] = [] - - for chunk in iter_openai_sse(resp): - if chunk is None: - _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') - close_chunk = think_extractor.finalize() - if close_chunk: - client_chunks.append(close_chunk) - append_client_event(turn, {'type': 'chat_chunk', 'data': close_chunk}) - yield sse_data_message(close_chunk) - append_client_event(turn, {'type': 'done'}) - yield sse_data_message('[DONE]') - usage_tracker.record(ctx.client_model, last_usage) - set_stream_summary(turn, { - 'chunk_count': chunk_count, - 'client_chunk_count': len(client_chunks), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'chat.completion.stream.summary', - 'model': ctx.client_model, - 'chunk_count': len(client_chunks), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - return - - append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) - if chunk.get('usage'): - last_usage = chunk['usage'] - - if chunk_count < 10: - _dbg( - f'上游原始片段#{chunk_count}=' - + json.dumps(chunk, ensure_ascii=False, default=str)[:500] - ) - - chunk = fix_stream_chunk(chunk) - chunk['model'] = ctx.client_model - - for out in think_extractor.process_chunk(chunk): - client_chunks.append(out) - append_client_event(turn, {'type': 'chat_chunk', 'data': out}) - if chunk_count < 10: - _dbg( - f'返回片段#{chunk_count}=' - + json.dumps(out, ensure_ascii=False, default=str)[:500] - ) - yield sse_data_message(out) - - chunk_count += 1 - - usage_tracker.record(ctx.client_model, last_usage) - set_stream_summary(turn, { - 'chunk_count': chunk_count, - 'client_chunk_count': len(client_chunks), - 'usage': last_usage, - 'ended_without_done': True, - }) - attach_client_response(turn, { - 'type': 'chat.completion.stream.summary', - 'model': ctx.client_model, - 'chunk_count': len(client_chunks), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走原生 Responses 后端的聊天补全请求。 - - 当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求, - 返回时再转换回聊天补全协议。 - """ - responses_payload = cc_to_responses_request(payload) - responses_payload['model'] = ctx.upstream_model - responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) - _dbg( - '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) - + f' 输入项数={len(responses_payload.get("input", []))}' - ) - - url, headers = build_responses_target(ctx) - responses_payload = apply_body_modifications(responses_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_responses_stream(ctx, responses_payload, url, headers, turn) - return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn) - - -def _handle_responses_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理原生 Responses 后端的非流式返回。""" - payload['stream'] = False - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - data = responses_to_cc_response(raw, ctx.client_model) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') - - -def _handle_responses_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理原生 Responses 后端的流式返回。""" - payload['stream'] = True - converter = ResponsesToCCStreamConverter(model=ctx.client_model) - - def generate(): - """消费上游 Responses 事件,并实时转换成聊天补全 chunk。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield chat_error_chunk(str(err)) - return - - event_count = 0 - client_chunks: list[Any] = [] - last_usage: dict[str, Any] | None = None - for event_type, event_data in iter_responses_sse(resp): - append_upstream_event(turn, {'type': event_type, 'data': event_data}) - extracted_usage = _extract_responses_usage(event_data) - if extracted_usage: - last_usage = { - 'prompt_tokens': extracted_usage.get('input_tokens', 0), - 'completion_tokens': extracted_usage.get('output_tokens', 0), - 'total_tokens': extracted_usage.get('total_tokens', 0), - } - if event_count < 10: - _dbg( - f'上游事件#{event_count} 类型={event_type} 数据=' - + json.dumps(event_data, ensure_ascii=False, default=str)[:500] - ) - - for chunk in converter.process_event(event_type, event_data): - client_chunks.append(chunk) - append_client_event(turn, {'type': 'chat_chunk', 'data': chunk}) - if isinstance(chunk, dict) and isinstance(chunk.get('usage'), dict): - last_usage = chunk['usage'] - if event_count < 10: - _dbg( - f'返回片段#{event_count}=' - + json.dumps(chunk, ensure_ascii=False, default=str)[:500] - ) - yield sse_data_message(chunk) - - event_count += 1 - - _dbg(f'流式响应结束,共 {event_count} 个事件') - append_client_event(turn, {'type': 'done'}) - yield sse_data_message('[DONE]') - usage_tracker.record(ctx.client_model, last_usage) - set_stream_summary(turn, { - 'event_count': event_count, - 'client_chunk_count': len(client_chunks), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'chat.completion.stream.summary', - 'model': ctx.client_model, - 'chunk_count': len(client_chunks), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走 Gemini Contents 后端的聊天补全请求。""" - payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) - gemini_payload = cc_to_gemini_request(payload) - _dbg( - '已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys())) - + f' 内容数={len(gemini_payload.get("contents", []))}' - ) - - url, headers = build_gemini_target(ctx, stream=ctx.is_stream) - gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) - return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) - - -def _handle_gemini_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Gemini 后端的非流式返回。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - data = gemini_to_cc_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后') - - -def _handle_gemini_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Gemini 后端的流式返回。""" - converter = GeminiStreamConverter() - - def generate(): - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield chat_error_chunk(str(err)) - return - - chunk_count = 0 - client_chunks: list[Any] = [] - last_usage: dict[str, Any] | None = None - for gemini_chunk in iter_gemini_sse(resp): - append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk}) - usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None - if isinstance(usage_meta, dict): - last_usage = { - 'prompt_tokens': usage_meta.get('promptTokenCount', 0), - 'completion_tokens': usage_meta.get('candidatesTokenCount', 0), - 'total_tokens': usage_meta.get('totalTokenCount', 0), - } - if chunk_count < 10: - _dbg( - f'上游 Gemini 片段#{chunk_count}=' - + json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500] - ) - - for cc_chunk in converter.process_chunk(gemini_chunk): - cc_chunk['model'] = ctx.client_model - client_chunks.append(cc_chunk) - append_client_event(turn, {'type': 'chat_chunk', 'data': cc_chunk}) - if isinstance(cc_chunk, dict) and isinstance(cc_chunk.get('usage'), dict): - last_usage = cc_chunk['usage'] - if chunk_count < 10: - _dbg( - f'返回片段#{chunk_count}=' - + json.dumps(cc_chunk, ensure_ascii=False, default=str)[:500] - ) - yield sse_data_message(cc_chunk) - - chunk_count += 1 - - _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') - append_client_event(turn, {'type': 'done'}) - yield sse_data_message('[DONE]') - usage_tracker.record(ctx.client_model, last_usage) - set_stream_summary(turn, { - 'chunk_count': chunk_count, - 'client_chunk_count': len(client_chunks), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'chat.completion.stream.summary', - 'model': ctx.client_model, - 'chunk_count': len(client_chunks), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走 Anthropic Messages 后端的聊天补全请求。""" - payload['model'] = ctx.upstream_model - anthropic_payload = cc_to_messages_request(payload) - anthropic_payload = inject_instructions_anthropic(anthropic_payload, ctx.custom_instructions, ctx.instructions_position) - _dbg( - '已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys())) - + f' 消息数={len(anthropic_payload.get("messages", []))}' - ) - - url, headers = build_anthropic_target(ctx) - anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) - return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) - - -def _handle_anthropic_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Anthropic 后端的非流式返回。""" - payload['stream'] = False - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - data = messages_to_cc_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后') - - -def _handle_anthropic_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Anthropic 后端的流式返回。 - - 这里仍然保留独立的事件级转换器,而不是先落成完整响应再回放, - 是为了尽量保持 Cursor 端的流式体验和工具调用时序。 - """ - payload['stream'] = True - converter = AnthropicStreamConverter() - - def generate(): - """消费上游 Anthropic 事件流,并逐步映射为聊天补全 SSE。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield chat_error_chunk(str(err)) - return - - event_count = 0 - client_chunks: list[Any] = [] - last_usage: dict[str, Any] | None = None - for event_type, event_data in iter_anthropic_sse(resp): - append_upstream_event(turn, {'type': event_type, 'data': event_data}) - if event_type == 'message_start': - message_usage = event_data.get('message', {}).get('usage', {}) - if isinstance(message_usage, dict): - last_usage = { - 'prompt_tokens': message_usage.get('input_tokens', 0), - 'completion_tokens': 0, - 'total_tokens': message_usage.get('input_tokens', 0), - } - elif event_type == 'message_delta': - delta_usage = event_data.get('usage', {}) - if isinstance(delta_usage, dict): - prompt_tokens = 0 - if isinstance(last_usage, dict): - prompt_tokens = last_usage.get('prompt_tokens', 0) - completion_tokens = delta_usage.get('output_tokens', 0) - last_usage = { - 'prompt_tokens': prompt_tokens, - 'completion_tokens': completion_tokens, - 'total_tokens': prompt_tokens + completion_tokens, - } - if event_count < 10: - _dbg( - f'上游事件#{event_count} 类型={event_type} 数据=' - + json.dumps(event_data, ensure_ascii=False, default=str)[:500] - ) - - for chunk_str in converter.process_event(event_type, event_data): - try: - chunk_obj = json.loads(chunk_str) - chunk_obj['model'] = ctx.client_model - if isinstance(chunk_obj.get('usage'), dict): - last_usage = chunk_obj['usage'] - chunk_str = json.dumps(chunk_obj, ensure_ascii=False) - except (json.JSONDecodeError, TypeError): - pass - - client_chunks.append(chunk_str) - append_client_event(turn, {'type': 'chat_chunk', 'data': chunk_str}) - if event_count < 10: - _dbg(f'返回片段#{event_count}={chunk_str[:500]}') - yield sse_data_message(chunk_str) - - event_count += 1 - - _dbg(f'流式响应结束,共 {event_count} 个事件') - append_client_event(turn, {'type': 'done'}) - yield sse_data_message('[DONE]') - usage_tracker.record(ctx.client_model, last_usage) - set_stream_summary(turn, { - 'event_count': event_count, - 'client_chunk_count': len(client_chunks), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'chat.completion.stream.summary', - 'model': ctx.client_model, - 'chunk_count': len(client_chunks), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _finalize_chat_response( - ctx: RouteContext, - data: dict[str, Any], - *, - turn: dict[str, Any] | None, - debug_label: str, -): - """统一收尾非流式聊天补全响应。 - - 三条后端链路最终都会回到 Chat Completions 格式,因此这里集中做: - - 回填给 Cursor 展示的模型名 - - 输出统一调试日志 - - 输出统一令牌统计日志 - """ - data['model'] = ctx.client_model - _dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000]) - log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens') - - usage_tracker.record(ctx.client_model, data.get('usage')) - attach_client_response(turn, data) - finalize_turn(turn, usage=data.get('usage')) - - for choice in data.get('choices', []): +def _try_cache_thinking(response_data: dict[str, Any]) -> None: + """尝试从非流式响应中缓存思维链内容。""" + if not isinstance(response_data, dict): + return + for choice in response_data.get('choices', []): msg = choice.get('message', {}) if msg.get('reasoning_content'): thinking_cache.store_from_response( @@ -707,8 +114,6 @@ def _finalize_chat_response( ) break - return jsonify(data) - def _log_messages(payload: dict[str, Any]) -> None: """记录消息摘要,方便排查请求形态是否符合预期。""" diff --git a/routes/common.py b/routes/common.py index 0ad7518..eba89b2 100644 --- a/routes/common.py +++ b/routes/common.py @@ -12,7 +12,6 @@ import logging from typing import Any import settings -from utils.http import build_anthropic_headers, build_gemini_headers, build_openai_headers logger = logging.getLogger(__name__) @@ -55,42 +54,6 @@ def build_route_context(client_model: str, is_stream: bool) -> RouteContext: ) -def build_openai_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: - """根据路由上下文生成 OpenAI 兼容后端的地址和请求头。""" - url = f'{ctx.target_url.rstrip("/")}/v1/chat/completions' - headers = build_openai_headers(ctx.api_key) - return url, headers - - -def build_responses_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: - """根据路由上下文生成 OpenAI Responses 后端的地址和请求头。""" - url = f'{ctx.target_url.rstrip("/")}/v1/responses' - headers = build_openai_headers(ctx.api_key) - return url, headers - - -def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: - """根据路由上下文生成 Anthropic 后端的地址和请求头。""" - url = f'{ctx.target_url.rstrip("/")}/v1/messages' - headers = build_anthropic_headers(ctx.api_key) - return url, headers - - -def build_gemini_target(ctx: RouteContext, stream: bool = False) -> tuple[str, dict[str, str]]: - """根据路由上下文生成 Gemini 后端的地址和请求头。 - - Gemini URL 格式: {base}/v1/models/{model}:generateContent - 流式: {base}/v1/models/{model}:streamGenerateContent?alt=sse - """ - base = ctx.target_url.rstrip('/') - model = ctx.upstream_model - if stream: - url = f'{base}/v1/models/{model}:streamGenerateContent?alt=sse' - else: - url = f'{base}/v1/models/{model}:generateContent' - headers = build_gemini_headers(ctx.api_key) - return url, headers - def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None: """统一输出路由级日志,避免不同入口的日志格式逐渐漂移。""" @@ -137,11 +100,6 @@ def sse_event_message(event_type: str, data: Any) -> str: return f'event: {event_type}\ndata: {payload}\n\n' -def chat_error_chunk(message: str, error_type: str = 'upstream_error') -> str: - """构造聊天补全流式接口使用的错误消息。""" - return sse_data_message({'error': {'message': message, 'type': error_type}}) - - def responses_error_event(message: str) -> str: """构造 Responses 流式接口使用的错误事件。""" return sse_event_message('error', {'error': message}) @@ -248,3 +206,140 @@ def apply_header_modifications(headers: dict[str, str], modifications: dict[str, headers[key] = str(value) logger.info('已应用 header_modifications: %s', list(modifications.keys())) return headers + + +# ═══════════════════════════════════════════════════════════ +# 后端注册表 + ClientFormatter 实现 +# ═══════════════════════════════════════════════════════════ + + +def get_outbound(backend: str): + """根据后端类型获取对应的 OutboundTransformer 实例。""" + from adapters.cc_anthropic_adapter import AnthropicOutbound + from adapters.cc_gemini_adapter import GeminiOutbound + from adapters.openai_compat_fixer import OpenAIChatOutbound + from adapters.responses_cc_adapter import ResponsesOutbound + + registry = { + 'openai': OpenAIChatOutbound, + 'anthropic': AnthropicOutbound, + 'gemini': GeminiOutbound, + 'responses': ResponsesOutbound, + } + cls = registry.get(backend, OpenAIChatOutbound) + return cls() + + +class CCClientFormatter: + """Chat Completions 客户端格式化器。 + + 将通用处理结果格式化为 OpenAI Chat Completions 格式, + 供 /v1/chat/completions 端点使用。 + """ + + def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]: + cc_response['model'] = model + return cc_response + + def wrap_stream_item(self, item: Any) -> str: + payload = item if isinstance(item, str) else json.dumps(item, ensure_ascii=False) + return f'data: {payload}\n\n' + + def format_error(self, message: str) -> str: + return sse_data_message({'error': {'message': message, 'type': 'upstream_error'}}) + + def format_done(self) -> str | None: + return sse_data_message('[DONE]') + + def start_events(self) -> list[str]: + return [] + + @property + def usage_input_key(self) -> str: + return 'prompt_tokens' + + @property + def usage_output_key(self) -> str: + return 'completion_tokens' + + +class ResponsesClientFormatter: + """Responses API 客户端格式化器。 + + 将通用处理结果格式化为 OpenAI Responses 格式, + 供 /v1/responses 端点使用。 + + 流式场景使用 ResponsesStreamConverter 做 CC chunk → Responses SSE 转换。 + """ + + def __init__(self, model: str = ''): + from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses + self._model = model + self._converter = ResponsesStreamConverter(model=model) + self._cc_to_responses = cc_to_responses + + def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]: + return self._cc_to_responses(cc_response, model) + + def wrap_stream_item(self, item: Any) -> str: + if isinstance(item, str): + return item + events = self._converter.process_cc_chunk(item) + return ''.join(events) + + def format_error(self, message: str) -> str: + return responses_error_event(message) + + def format_done(self) -> str | None: + events = self._converter.finalize() + return ''.join(events) if events else None + + def start_events(self) -> list[str]: + return self._converter.start_events() + + @property + def usage_input_key(self) -> str: + return 'input_tokens' + + @property + def usage_output_key(self) -> str: + return 'output_tokens' + + +class ResponsesPassthroughFormatter: + """Responses 透传格式化器。 + + 当后端本身就是 Responses 格式时使用,做轻量模型名改写。 + """ + + def __init__(self, model: str = ''): + self._model = model + + def format_response(self, response_data: dict[str, Any], model: str) -> dict[str, Any]: + response_data['model'] = model + return response_data + + def wrap_stream_item(self, item: Any) -> str: + if isinstance(item, str): + return item + event_type = item.pop('_sse_event_type', None) + if event_type: + return f'event: {event_type}\ndata: {json.dumps(item, ensure_ascii=False)}\n\n' + return f'data: {json.dumps(item, ensure_ascii=False)}\n\n' + + def format_error(self, message: str) -> str: + return responses_error_event(message) + + def format_done(self) -> str | None: + return None + + def start_events(self) -> list[str]: + return [] + + @property + def usage_input_key(self) -> str: + return 'input_tokens' + + @property + def usage_output_key(self) -> str: + return 'output_tokens' diff --git a/routes/responses.py b/routes/responses.py index 4889a40..6732660 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -1,7 +1,7 @@ """路由: /v1/responses 处理 Cursor 对 GPT、Claude-Opus 等模型发出的 Responses API 请求。 -请求会先转换为 Chat Completions 中间表示,再按后端类型分发,最后转换回 Responses 格式。 +请求先转换为 Chat Completions 中间表示,再通过统一出站转换器分发。 """ from __future__ import annotations @@ -13,62 +13,30 @@ from typing import Any import settings from flask import Blueprint, jsonify, request -from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response -from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response -from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request -from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc -from config import Config +from adapters.openai_compat_fixer import normalize_request +from adapters.responses_cc_adapter import ( + AnthropicOutboundForResponses, + ResponsesNativeOutbound, + responses_to_cc, +) +from adapters.unified import handle_non_stream, handle_stream from routes.common import ( - RouteContext, - apply_body_modifications, - apply_header_modifications, - build_anthropic_target, - build_gemini_target, - build_openai_target, - build_responses_target, + ResponsesClientFormatter, + ResponsesPassthroughFormatter, build_route_context, - inject_instructions_anthropic, + get_outbound, inject_instructions_cc, inject_instructions_responses, log_route_context, - log_usage, - responses_error_event, ) -from utils.http import ( - forward_request, - gen_id, - iter_anthropic_sse, - iter_gemini_sse, - iter_openai_sse, - iter_responses_sse, - sse_response, -) -from utils.request_logger import ( - append_client_event, - append_upstream_event, - attach_client_response, - attach_error, - attach_upstream_request, - attach_upstream_response, - finalize_turn, - set_stream_summary, - start_turn, -) -from utils.think_tag import ThinkTagExtractor +from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache -from utils.usage_tracker import usage_tracker logger = logging.getLogger(__name__) bp = Blueprint('responses', __name__) -def _dbg(message: str) -> None: - """仅在调试模式下输出详细日志。""" - if settings.get_debug_mode() in ('simple', 'verbose'): - logger.info('[响应生成调试] %s', message) - - @bp.route('/v1/responses', methods=['POST']) def responses_endpoint(): """处理 Responses 请求并按模型映射分发。""" @@ -90,543 +58,42 @@ def responses_endpoint(): ) log_route_context('响应生成', ctx) + if ctx.backend == 'responses': + return _handle_native_responses(ctx, payload, turn) + cc_payload = _build_cc_payload(payload, ctx) - if ctx.backend == 'openai': - return _handle_openai_backend(ctx, cc_payload, turn) - if ctx.backend == 'responses': - return _handle_responses_backend(ctx, payload, turn) - if ctx.backend == 'gemini': - return _handle_gemini_backend(ctx, cc_payload, turn) - return _handle_anthropic_backend(ctx, cc_payload, turn) + if ctx.backend == 'anthropic': + outbound = AnthropicOutboundForResponses() + else: + outbound = get_outbound(ctx.backend) - -def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]: - """将 Responses 请求统一降级为 Chat Completions 中间表示。 - - 这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端,都能复用一套 - 中间协议,避免在路由层同时维护两套完全不同的请求编排逻辑。 - """ - cc_payload = responses_to_cc(payload) - cc_payload['model'] = ctx.upstream_model - cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) - cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) - _dbg( - '已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys())) - + f' 消息数={len(cc_payload.get("messages", []))}' - ) - return cc_payload - - -def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]): - """处理走 OpenAI 兼容后端的 Responses 请求。""" - cc_payload = normalize_request(cc_payload) - _dbg( - f'标准化完成:模型={cc_payload.get("model")} ' - f'工具数={len(cc_payload.get("tools", []))}' - ) - - url, headers = build_openai_target(ctx) - cc_payload = apply_body_modifications(cc_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) + client_fmt = ResponsesClientFormatter(model=ctx.client_model) if ctx.is_stream: - return _handle_openai_stream(ctx, cc_payload, url, headers, turn) - return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn) + return handle_stream(ctx, outbound, client_fmt, cc_payload, turn) + return handle_non_stream(ctx, outbound, client_fmt, cc_payload, turn) -def _handle_openai_non_stream( - ctx: RouteContext, - cc_payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any], -): - """处理 OpenAI 兼容后端的非流式 Responses 返回。""" - cc_payload['stream'] = False - attach_upstream_request(turn, cc_payload, headers) - resp, err = forward_request(url, headers, cc_payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - fixed = fix_response(raw) - response_data = cc_to_responses(fixed, ctx.client_model) - return _finalize_responses_response( - response_data, - client_model=ctx.client_model, - turn=turn, - debug_label='转换为 Responses 后', - ) - - -def _handle_openai_stream( - ctx: RouteContext, - cc_payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 OpenAI 兼容后端的流式 Responses 返回。""" - cc_payload['stream'] = True - converter = ResponsesStreamConverter(model=ctx.client_model) - - def generate(): - """消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。""" - yield from converter.start_events() - - attach_upstream_request(turn, cc_payload, headers) - resp, err = forward_request(url, headers, cc_payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield responses_error_event(str(err)) - return - - think_extractor = ThinkTagExtractor() - chunk_count = 0 - client_events: list[str] = [] - - for chunk in iter_openai_sse(resp): - if chunk is None: - _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') - finalized_events = converter.finalize() - for item in finalized_events: - client_events.append(item) - append_client_event(turn, {'type': 'responses_event', 'data': item}) - yield item - usage_tracker.record(ctx.client_model) - set_stream_summary(turn, { - 'chunk_count': chunk_count, - 'client_event_count': len(client_events), - }) - attach_client_response(turn, { - 'type': 'responses.stream.summary', - 'model': ctx.client_model, - 'event_count': len(client_events), - }) - finalize_turn(turn) - return - - append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) - if chunk_count < 10: - _dbg( - f'上游原始片段#{chunk_count}=' - + json.dumps(chunk, ensure_ascii=False, default=str)[:500] - ) - - chunk = fix_stream_chunk(chunk) - for out in think_extractor.process_chunk(chunk): - for evt in converter.process_cc_chunk(out): - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - if chunk_count < 10: - _dbg( - f'转换后片段#{chunk_count}=' - + json.dumps(out, ensure_ascii=False, default=str)[:500] - ) - yield evt - - chunk_count += 1 - - return sse_response(generate()) - - -def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走原生 Responses 后端的请求。 - - 当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议, - 直接转发原生 Responses 请求即可。 - """ +def _handle_native_responses(ctx, payload: dict[str, Any], turn: dict[str, Any]): + """处理走原生 Responses 后端的请求(直接透传)。""" payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) - url, headers = build_responses_target(ctx) - payload = apply_body_modifications(payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) + + outbound = ResponsesNativeOutbound() + client_fmt = ResponsesPassthroughFormatter(model=ctx.client_model) if ctx.is_stream: - return _handle_responses_stream(ctx, payload, url, headers, turn) - return _handle_responses_non_stream(ctx, payload, url, headers, turn) + return handle_stream(ctx, outbound, client_fmt, payload, turn) + return handle_non_stream(ctx, outbound, client_fmt, payload, turn) -def _handle_responses_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理原生 Responses 后端的非流式返回。""" - payload['stream'] = False - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - response_data = resp.json() - attach_upstream_response(turn, response_data) - response_data['model'] = ctx.client_model - return _finalize_responses_response( - response_data, - client_model=ctx.client_model, - turn=turn, - debug_label='原生 Responses 返回后', - ) - - -def _handle_responses_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理原生 Responses 后端的流式返回。""" - payload['stream'] = True - converter = ResponsesStreamConverter(model=ctx.client_model) - - def generate(): - """透传上游原生 Responses 流,并做轻量模型名改写。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield responses_error_event(str(err)) - return - - event_count = 0 - client_events: list[str] = [] - last_usage: dict[str, Any] | None = None - for event_type, event_data in iter_responses_sse(resp): - append_upstream_event(turn, {'type': event_type, 'data': event_data}) - extracted_usage = _extract_responses_usage(event_data) - if extracted_usage: - last_usage = extracted_usage - if event_count < 10: - _dbg( - f'上游事件#{event_count} 类型={event_type} 数据=' - + json.dumps(event_data, ensure_ascii=False, default=str)[:500] - ) - produced = converter.process_responses_event(event_type, event_data) - for evt in produced: - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - yield evt - event_count += 1 - - _dbg(f'流式响应结束,共 {event_count} 个事件') - usage_tracker.record( - ctx.client_model, - last_usage, - input_key='input_tokens', - output_key='output_tokens', - ) - set_stream_summary(turn, { - 'event_count': event_count, - 'client_event_count': len(client_events), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'responses.stream.summary', - 'model': ctx.client_model, - 'event_count': len(client_events), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None: - """从原生 Responses 事件中提取 usage。 - - 原生 `/v1/responses` 流式通常会在 `response.completed` 事件里携带 usage, - 也可能直接挂在顶层 `usage` 字段。这里统一做兼容提取,供统计与日志复用。 - """ - if not isinstance(event_data, dict): - return None - usage = event_data.get('usage') - if isinstance(usage, dict): - return usage - response_obj = event_data.get('response') - if isinstance(response_obj, dict): - nested_usage = response_obj.get('usage') - if isinstance(nested_usage, dict): - return nested_usage - return None - - -def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走 Gemini Contents 后端的 Responses 请求。""" - gemini_payload = cc_to_gemini_request(cc_payload) - _dbg( - '已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys())) - + f' 内容数={len(gemini_payload.get("contents", []))}' - ) - - url, headers = build_gemini_target(ctx, stream=ctx.is_stream) - gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) - return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) - - -def _handle_gemini_non_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Gemini 后端的非流式 Responses 返回。""" - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - cc_data = gemini_to_cc_response(raw) - response_data = cc_to_responses(cc_data, ctx.client_model) - return _finalize_responses_response( - response_data, - client_model=ctx.client_model, - turn=turn, - debug_label='Gemini 转回 Responses 后', - ) - - -def _handle_gemini_stream( - ctx: RouteContext, - payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Gemini 后端的流式 Responses 返回。""" - converter = ResponsesStreamConverter(model=ctx.client_model) - gemini_converter = GeminiStreamConverter() - - def generate(): - yield from converter.start_events() - - attach_upstream_request(turn, payload, headers) - resp, err = forward_request(url, headers, payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield responses_error_event(str(err)) - return - - chunk_count = 0 - client_events: list[str] = [] - last_usage: dict[str, Any] | None = None - for gemini_chunk in iter_gemini_sse(resp): - append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk}) - usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None - if isinstance(usage_meta, dict): - last_usage = { - 'input_tokens': usage_meta.get('promptTokenCount', 0), - 'output_tokens': usage_meta.get('candidatesTokenCount', 0), - 'total_tokens': usage_meta.get('totalTokenCount', 0), - } - if chunk_count < 10: - _dbg( - f'上游 Gemini 片段#{chunk_count}=' - + json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500] - ) - - for cc_chunk in gemini_converter.process_chunk(gemini_chunk): - for evt in converter.process_cc_chunk(cc_chunk): - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - yield evt - - chunk_count += 1 - - _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') - finalized_events = converter.finalize() - for evt in finalized_events: - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - yield evt - usage_tracker.record( - ctx.client_model, - last_usage, - input_key='input_tokens', - output_key='output_tokens', - ) - set_stream_summary(turn, { - 'chunk_count': chunk_count, - 'client_event_count': len(client_events), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'responses.stream.summary', - 'model': ctx.client_model, - 'event_count': len(client_events), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) - - -def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): - """处理走 Anthropic 后端的 Responses 请求。""" - anthropic_payload = cc_to_messages_request(cc_payload) - _dbg( - '已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys())) - + f' 消息数={len(anthropic_payload.get("messages", []))}' - ) - - url, headers = build_anthropic_target(ctx) - anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - if ctx.is_stream: - return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) - return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) - - -def _handle_anthropic_non_stream( - ctx: RouteContext, - anthropic_payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Anthropic 后端的非流式 Responses 返回。""" - anthropic_payload['stream'] = False - attach_upstream_request(turn, anthropic_payload, headers) - resp, err = forward_request(url, headers, anthropic_payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - cc_data = messages_to_cc_response(raw) - response_data = cc_to_responses(cc_data, ctx.client_model) - return _finalize_responses_response( - response_data, - client_model=ctx.client_model, - turn=turn, - debug_label='Messages 转回 Responses 后', - ) - - -def _handle_anthropic_stream( - ctx: RouteContext, - anthropic_payload: dict[str, Any], - url: str, - headers: dict[str, str], - turn: dict[str, Any] | None, -): - """处理 Anthropic 后端的流式 Responses 返回。 - - 这里直接将 Anthropic SSE 事件映射到 Responses SSE,故意跳过 CC 流式中间态, - 这样可以减少一次事件重组,降低流式转换复杂度,也更容易保留原始时序。 - """ - anthropic_payload['stream'] = True - converter = ResponsesStreamConverter(model=ctx.client_model) - - def generate(): - """消费 Anthropic SSE,并直接映射为 Responses 事件序列。""" - yield from converter.start_events() - - attach_upstream_request(turn, anthropic_payload, headers) - resp, err = forward_request(url, headers, anthropic_payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield responses_error_event(str(err)) - return - - event_count = 0 - client_events: list[str] = [] - for event_type, event_data in iter_anthropic_sse(resp): - append_upstream_event(turn, {'type': event_type, 'data': event_data}) - if event_count < 10: - _dbg( - f'上游事件#{event_count} 类型={event_type} 数据=' - + json.dumps(event_data, ensure_ascii=False, default=str)[:500] - ) - - produced = converter.process_anthropic_event(event_type, event_data) - for evt in produced: - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - yield evt - event_count += 1 - - _dbg(f'流式响应结束,共 {event_count} 个事件') - finalized_events = converter.finalize() - for evt in finalized_events: - client_events.append(evt) - append_client_event(turn, {'type': 'responses_event', 'data': evt}) - yield evt - usage_tracker.record(ctx.client_model) - set_stream_summary(turn, { - 'event_count': event_count, - 'client_event_count': len(client_events), - }) - attach_client_response(turn, { - 'type': 'responses.stream.summary', - 'model': ctx.client_model, - 'event_count': len(client_events), - }) - finalize_turn(turn) - - return sse_response(generate()) - - -def _finalize_responses_response( - response_data: dict[str, Any], - *, - client_model: str, - turn: dict[str, Any], - debug_label: str, -): - """统一收尾非流式 Responses 响应。 - - 两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象,因此这里集中 - 处理调试日志、回填展示模型名以及 usage 日志。 - """ - response_data['model'] = response_data.get('model') or '' - _dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000]) - log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens') - - usage_tracker.record( - client_model, - response_data.get('usage'), - input_key='input_tokens', - output_key='output_tokens', - ) - - attach_client_response(turn, response_data) - finalize_turn(turn, usage=response_data.get('usage')) - - return jsonify(response_data) +def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]: + """将 Responses 请求统一降级为 Chat Completions 中间表示。""" + cc_payload = responses_to_cc(payload) + cc_payload['model'] = ctx.upstream_model + cc_payload = normalize_request(cc_payload) + cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) + cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) + return cc_payload From cd577d17c31e54ce00ce3b39ba6d72717e8eed9d Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:29:02 +0800 Subject: [PATCH 04/10] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routes/chat.py | 4 +++- routes/common.py | 14 ++++++++++++++ routes/responses.py | 4 +++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/routes/chat.py b/routes/chat.py index 7e01346..7c72413 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -21,6 +21,7 @@ from routes.common import ( get_outbound, inject_instructions_cc, log_route_context, + should_inject_thinking, ) from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache @@ -58,7 +59,8 @@ def chat_completions(): payload['model'] = ctx.upstream_model payload = normalize_request(payload) - payload['messages'] = thinking_cache.inject(payload.get('messages', [])) + if should_inject_thinking(ctx.backend): + payload['messages'] = thinking_cache.inject(payload.get('messages', [])) payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) outbound = get_outbound(ctx.backend) diff --git a/routes/common.py b/routes/common.py index eba89b2..654900e 100644 --- a/routes/common.py +++ b/routes/common.py @@ -173,6 +173,20 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po return payload +def should_inject_thinking(backend: str) -> bool: + """判断当前后端是否需要注入历史 thinking。 + + 仅对明确能消费历史 reasoning/thinking 的后端启用: + - anthropic + - gemini + - responses + + OpenAI Chat 兼容后端通常不接受 `reasoning_content` 历史字段, + 若注入会导致上游报错,因此显式排除。 + """ + return backend in ('anthropic', 'gemini', 'responses') + + # ─── Body / Header 修改 ────────────────────────── diff --git a/routes/responses.py b/routes/responses.py index 6732660..ce1de4b 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -28,6 +28,7 @@ from routes.common import ( inject_instructions_cc, inject_instructions_responses, log_route_context, + should_inject_thinking, ) from utils.request_logger import start_turn from utils.thinking_cache import thinking_cache @@ -94,6 +95,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]: cc_payload = responses_to_cc(payload) cc_payload['model'] = ctx.upstream_model cc_payload = normalize_request(cc_payload) - cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) + if should_inject_thinking(ctx.backend): + cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) return cc_payload From a8f5ada8e1bfc86f655f4eb1398d9a22ea6d23d2 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:34:27 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=E5=9B=9E=E9=80=80=E6=97=A7=E7=89=88?= =?UTF-8?q?=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/cc_anthropic_adapter.py | 260 ++++++------ adapters/cc_gemini_adapter.py | 147 +++---- adapters/helpers.py | 155 ------- adapters/openai_compat_fixer.py | 59 +-- adapters/responses_cc_adapter.py | 336 ++++++--------- adapters/unified.py | 354 ---------------- routes/chat.py | 685 ++++++++++++++++++++++++++++--- routes/common.py | 193 ++------- routes/responses.py | 609 +++++++++++++++++++++++++-- 9 files changed, 1582 insertions(+), 1216 deletions(-) delete mode 100644 adapters/helpers.py delete mode 100644 adapters/unified.py diff --git a/adapters/cc_anthropic_adapter.py b/adapters/cc_anthropic_adapter.py index b70fff0..7848d05 100644 --- a/adapters/cc_anthropic_adapter.py +++ b/adapters/cc_anthropic_adapter.py @@ -18,21 +18,13 @@ from __future__ import annotations import json from typing import Any -from adapters.helpers import ( - build_cc_message, - build_cc_response, - build_cc_tool_call, - build_cc_usage, - extract_text, - make_cc_chunk, - parse_json_safe, - stringify_content, -) from utils.http import gen_id from utils.tool_fixer import fix_anthropic_tool_use, normalize_args, repair_str_replace_args JsonDict = dict[str, Any] + +# Anthropic stop_reason → OpenAI finish_reason _STOP_REASON_MAP = { 'end_turn': 'stop', 'max_tokens': 'length', @@ -86,18 +78,23 @@ def messages_to_cc_response(data: JsonDict, request_id: str | None = None) -> Js data = fix_anthropic_tool_use(data) content_text, reasoning_text, tool_calls = _collect_response_parts(data.get('content', [])) + message = _build_cc_message(content_text, reasoning_text, tool_calls) usage = data.get('usage', {}) - return build_cc_response( - response_id=request_id, - message=build_cc_message(content_text, reasoning_text, tool_calls), - finish_reason=_STOP_REASON_MAP.get(data.get('stop_reason', 'end_turn'), 'stop'), - usage=build_cc_usage( + return { + 'id': request_id, + 'object': 'chat.completion', + 'model': data.get('model', 'claude'), + 'choices': [{ + 'index': 0, + 'message': message, + 'finish_reason': _STOP_REASON_MAP.get(data.get('stop_reason', 'end_turn'), 'stop'), + }], + 'usage': _build_cc_usage( input_tokens=usage.get('input_tokens', 0), output_tokens=usage.get('output_tokens', 0), ), - model=data.get('model', 'claude'), - ) + } # ═══════════════════════════════════════════════════════════ @@ -127,8 +124,12 @@ class AnthropicStreamConverter: self._input_tokens = 0 self._output_tokens = 0 - def process_event(self, event_type: str, event_data: JsonDict) -> list[JsonDict]: - """处理单个 Anthropic SSE 事件,返回 CC chunk dict 列表。""" + def process_event(self, event_type: str, event_data: JsonDict) -> list[str]: + """处理单个 Anthropic SSE 事件。 + + 调用方会按事件顺序不断喂入 event/data,这里根据事件类型拆成一个或多个 CC chunk + 字符串,交给上层直接作为 SSE data 发送给 Cursor。 + """ if event_type == 'message_start': return self._handle_message_start(event_data) if event_type == 'content_block_start': @@ -139,64 +140,104 @@ class AnthropicStreamConverter: return self._handle_message_delta(event_data) return [] - def _handle_message_start(self, event_data: JsonDict) -> list[JsonDict]: + def _handle_message_start(self, event_data: JsonDict) -> list[str]: + """处理消息开始事件,产出 assistant 角色起始 chunk。 + + 这个起始 chunk 很重要,因为 Cursor 侧通常会依赖首个带 role 的 chunk 来初始化 + 当前 assistant 消息。 + """ message = event_data.get('message', {}) self._input_tokens = message.get('usage', {}).get('input_tokens', 0) + chunk = self._make_chunk(delta={'role': 'assistant', 'content': ''}) if message.get('model'): chunk['model'] = message['model'] - return [chunk] + return [self._dump_chunk(chunk)] - def _handle_content_block_start(self, event_data: JsonDict) -> list[JsonDict]: + def _handle_content_block_start(self, event_data: JsonDict) -> list[str]: + """处理内容块开始事件。 + + 目前这里只需要显式处理 `tool_use`,因为文本和 thinking 的真正内容都在后续 delta + 事件里;而 tool_use 需要先开一个空 arguments 的 tool_call 槽位。 + """ block = event_data.get('content_block', {}) if block.get('type') != 'tool_use': return [] + self._tool_index += 1 - return [self._make_chunk(delta={ + return [self._dump_chunk(self._make_chunk(delta={ 'tool_calls': [{ 'index': self._tool_index, 'id': block.get('id', gen_id('toolu_')), 'type': 'function', - 'function': {'name': block.get('name', ''), 'arguments': ''}, + 'function': { + 'name': block.get('name', ''), + 'arguments': '', + }, }] - })] + }))] - def _handle_content_block_delta(self, event_data: JsonDict) -> list[JsonDict]: + def _handle_content_block_delta(self, event_data: JsonDict) -> list[str]: + """处理内容块增量事件。 + + Anthropic 会把文本、思考内容、工具参数拆成不同 delta 类型,这里要分别映射成 + OpenAI chunk 里的 `content`、`reasoning_content` 和 `tool_calls.function.arguments`。 + """ delta = event_data.get('delta', {}) delta_type = delta.get('type', '') if delta_type == 'text_delta' and delta.get('text'): - return [self._make_chunk(delta={'content': delta['text']})] + return [self._dump_chunk(self._make_chunk(delta={'content': delta['text']}))] + if delta_type == 'thinking_delta' and delta.get('thinking'): - return [self._make_chunk(delta={'reasoning_content': delta['thinking']})] + return [self._dump_chunk(self._make_chunk(delta={'reasoning_content': delta['thinking']}))] + if delta_type == 'input_json_delta' and delta.get('partial_json'): - return [self._make_chunk(delta={ + return [self._dump_chunk(self._make_chunk(delta={ 'tool_calls': [{ 'index': self._tool_index, 'function': {'arguments': delta['partial_json']}, }] - })] + }))] + return [] - def _handle_message_delta(self, event_data: JsonDict) -> list[JsonDict]: + def _handle_message_delta(self, event_data: JsonDict) -> list[str]: + """处理消息收尾事件,补出 finish_reason 和 usage。 + + 当 Anthropic 发出 `message_delta` 时,说明这一轮 assistant 输出已经收束, + 这里会统一生成最后一个带 usage 的收尾 chunk。 + """ delta = event_data.get('delta', {}) usage = event_data.get('usage', {}) self._output_tokens = usage.get('output_tokens', 0) - chunk = make_cc_chunk( - self._id, + + chunk = self._make_chunk( delta={}, finish_reason=_STOP_REASON_MAP.get(delta.get('stop_reason', ''), 'stop'), - model='claude', ) - chunk['usage'] = build_cc_usage( + chunk['usage'] = _build_cc_usage( input_tokens=self._input_tokens, output_tokens=self._output_tokens, ) - return [chunk] + return [self._dump_chunk(chunk)] def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: """构造标准 OpenAI Chat Completions chunk 对象。""" - return make_cc_chunk(self._id, delta, finish_reason, model='claude') + choice: JsonDict = {'index': 0, 'delta': delta} + if finish_reason: + choice['finish_reason'] = finish_reason + return { + 'id': self._id, + 'object': 'chat.completion.chunk', + 'model': 'claude', + 'choices': [choice], + } + + @staticmethod + def _dump_chunk(chunk: JsonDict) -> str: + """统一序列化 chunk,方便上层直接写入 SSE data。""" + return json.dumps(chunk) # ═══════════════════════════════════════════════════════════ @@ -213,7 +254,7 @@ def _convert_request_message(message: Any) -> tuple[JsonDict | None, str | None] content = message.get('content', '') if role == 'system': - return None, extract_text(content) + return None, _flatten_text(content) if role == 'tool': return _convert_tool_role_message(message), None @@ -260,7 +301,7 @@ def _append_tool_use_blocks(content: Any, tool_calls: list[Any]) -> list[JsonDic 'type': 'tool_use', 'id': tool_call.get('id', gen_id('toolu_')), 'name': function_data.get('name', ''), - 'input': parse_json_safe(function_data.get('arguments', '{}')), + 'input': _parse_tool_arguments(function_data.get('arguments', '{}')), }) return blocks @@ -331,12 +372,37 @@ def _convert_tool_use_block(block: JsonDict, *, index: int) -> JsonDict: else: arguments_text = str(input_data) - return build_cc_tool_call( - call_id=block.get('id', gen_id('toolu_')), - name=tool_name, - arguments=arguments_text, - index=index, - ) + return { + 'index': index, + 'id': block.get('id', gen_id('toolu_')), + 'type': 'function', + 'function': { + 'name': tool_name, + 'arguments': arguments_text, + }, + } + + +def _build_cc_message(content_text: str, reasoning_text: str, tool_calls: list[JsonDict]) -> JsonDict: + """构造 OpenAI CC 响应中的 assistant message。""" + message: JsonDict = { + 'role': 'assistant', + 'content': content_text or None, + } + if reasoning_text: + message['reasoning_content'] = reasoning_text + if tool_calls: + message['tool_calls'] = tool_calls + return message + + +def _build_cc_usage(*, input_tokens: int, output_tokens: int) -> JsonDict: + """将 Anthropic usage 字段映射为 OpenAI usage。""" + return { + 'prompt_tokens': input_tokens, + 'completion_tokens': output_tokens, + 'total_tokens': input_tokens + output_tokens, + } # ═══════════════════════════════════════════════════════════ @@ -344,6 +410,35 @@ def _convert_tool_use_block(block: JsonDict, *, index: int) -> JsonDict: # ═══════════════════════════════════════════════════════════ +def _parse_tool_arguments(arguments: Any) -> Any: + """将 tool_call.arguments 尽量解析为对象,供 Anthropic tool_use.input 使用。 + + Anthropic 的 `tool_use.input` 天然期望对象结构;如果这里直接保留原始字符串, + 后续上游会把它当普通文本而不是工具参数对象。 + """ + if not isinstance(arguments, str): + return arguments if arguments is not None else {} + try: + return json.loads(arguments) + except json.JSONDecodeError: + return {} + + +def _flatten_text(content: Any) -> str: + """将 content 扁平化为纯文本,主要用于 system 消息上提。""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for part in content: + if isinstance(part, str): + parts.append(part) + elif isinstance(part, dict) and part.get('type') == 'text': + parts.append(part.get('text', '')) + return '\n'.join(parts) + return str(content) + + def _convert_content(message: JsonDict) -> Any: """将 OpenAI 消息的 content 字段转换为 Anthropic 内容格式。""" content = message.get('content', '') @@ -613,78 +708,3 @@ def _pick_window_anchor(refs: list[JsonDict], target: int) -> int | None: if 'cache_control' not in refs[i]: return i return None - - -# ═══════════════════════════════════════════════════════════ -# OutboundTransformer 实现: Anthropic Messages -# ═══════════════════════════════════════════════════════════ - - -class AnthropicOutbound: - """Anthropic Messages 后端的出站转换器。 - - 将 CC 格式转换为 Anthropic Messages 格式并处理响应。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - return cc_to_messages_request(payload) - - def build_url(self, ctx) -> str: - return f'{ctx.target_url.rstrip("/")}/v1/messages' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_anthropic_headers - return build_anthropic_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - return messages_to_cc_response(raw) - - def create_stream_processor(self) -> AnthropicStreamProcessor: - return AnthropicStreamProcessor() - - -class AnthropicStreamProcessor: - """Anthropic SSE 流式处理器。 - - 包装 iter_anthropic_sse + AnthropicStreamConverter, - 将 Anthropic 事件流转换为 CC chunk。 - """ - - def __init__(self): - self._converter = AnthropicStreamConverter() - self._input_tokens = 0 - self._output_tokens = 0 - - def iter_events(self, response) -> Iterator: - from utils.http import iter_anthropic_sse - yield from iter_anthropic_sse(response) - - def process_event(self, event: tuple) -> list[JsonDict]: - event_type, event_data = event - return self._converter.process_event(event_type, event_data) - - def extract_usage(self, event: tuple) -> JsonDict | None: - event_type, event_data = event - if event_type == 'message_start': - message_usage = event_data.get('message', {}).get('usage', {}) - if isinstance(message_usage, dict): - self._input_tokens = message_usage.get('input_tokens', 0) - return { - 'prompt_tokens': self._input_tokens, - 'completion_tokens': 0, - 'total_tokens': self._input_tokens, - } - elif event_type == 'message_delta': - delta_usage = event_data.get('usage', {}) - if isinstance(delta_usage, dict): - completion = delta_usage.get('output_tokens', 0) - self._output_tokens = completion - return { - 'prompt_tokens': self._input_tokens, - 'completion_tokens': completion, - 'total_tokens': self._input_tokens + completion, - } - return None - - def finalize(self) -> list[JsonDict]: - return [] diff --git a/adapters/cc_gemini_adapter.py b/adapters/cc_gemini_adapter.py index 60336c8..5e8aad0 100644 --- a/adapters/cc_gemini_adapter.py +++ b/adapters/cc_gemini_adapter.py @@ -8,17 +8,8 @@ from __future__ import annotations import json import logging -from typing import Any, Iterator +from typing import Any -from adapters.helpers import ( - build_cc_message, - build_cc_response, - build_cc_tool_call, - build_cc_usage, - extract_text, - make_cc_chunk, - parse_json_safe, -) from utils.http import gen_id JsonDict = dict[str, Any] @@ -47,7 +38,7 @@ def cc_to_gemini_request(payload: JsonDict) -> JsonDict: for msg in messages: role = msg.get('role', '') if role in ('system', 'developer'): - system_parts.append(extract_text(msg.get('content', ''))) + system_parts.append(_flatten_text(msg.get('content', ''))) continue converted = _convert_message(msg) if converted: @@ -93,13 +84,21 @@ def gemini_to_cc_response(data: JsonDict, request_id: str | None = None) -> Json else: finish_reason = _FINISH_REASON_MAP.get(finish, 'stop') - return build_cc_response( - response_id=request_id, - message=build_cc_message(content_text, reasoning_text, tool_calls), - finish_reason=finish_reason, - usage=_convert_usage(data.get('usageMetadata', {})), - model=data.get('modelVersion', 'gemini'), - ) + message: JsonDict = {'role': 'assistant', 'content': content_text or None} + if reasoning_text: + message['reasoning_content'] = reasoning_text + if tool_calls: + message['tool_calls'] = tool_calls + + usage = _convert_usage(data.get('usageMetadata', {})) + + return { + 'id': request_id, + 'object': 'chat.completion', + 'model': data.get('modelVersion', 'gemini'), + 'choices': [{'index': 0, 'message': message, 'finish_reason': finish_reason}], + 'usage': usage, + } # ═══════════════════════════════════════════════════════════ @@ -167,7 +166,15 @@ class GeminiStreamConverter: return results def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: - return make_cc_chunk(self._id, delta, finish_reason, model='gemini') + choice: JsonDict = {'index': 0, 'delta': delta} + if finish_reason: + choice['finish_reason'] = finish_reason + return { + 'id': self._id, + 'object': 'chat.completion.chunk', + 'model': 'gemini', + 'choices': [choice], + } # ═══════════════════════════════════════════════════════════ @@ -187,7 +194,7 @@ def _convert_message(msg: JsonDict) -> JsonDict | None: 'parts': [{ 'functionResponse': { 'name': msg.get('name', msg.get('tool_call_id', '')), - 'response': parse_json_safe(msg.get('content', ''), fallback={'result': msg.get('content', '')} if msg.get('content', '') else {}), + 'response': _parse_json_safe(msg.get('content', '')), }, }], } @@ -214,7 +221,7 @@ def _convert_message(msg: JsonDict) -> JsonDict | None: parts.append({ 'functionCall': { 'name': func.get('name', ''), - 'args': parse_json_safe(func.get('arguments', '{}'), fallback={}), + 'args': _parse_json_safe(func.get('arguments', '{}')), }, }) @@ -297,12 +304,15 @@ def _extract_parts(parts: list[Any]) -> tuple[str, str, list[JsonDict]]: text += part['text'] elif 'functionCall' in part: fc = part['functionCall'] - tool_calls.append(build_cc_tool_call( - call_id=fc.get('id') or gen_id('call_'), - name=fc.get('name', ''), - arguments=json.dumps(fc.get('args', {}), ensure_ascii=False), - index=len(tool_calls), - )) + tool_calls.append({ + 'index': len(tool_calls), + 'id': fc.get('id') or gen_id('call_'), + 'type': 'function', + 'function': { + 'name': fc.get('name', ''), + 'arguments': json.dumps(fc.get('args', {}), ensure_ascii=False), + }, + }) return text, reasoning, tool_calls @@ -312,7 +322,12 @@ def _convert_usage(meta: JsonDict) -> JsonDict: prompt = meta.get('promptTokenCount', 0) candidates = meta.get('candidatesTokenCount', 0) thoughts = meta.get('thoughtsTokenCount', 0) - return build_cc_usage(prompt, candidates + thoughts) + completion = candidates + thoughts + return { + 'prompt_tokens': prompt, + 'completion_tokens': completion, + 'total_tokens': prompt + completion, + } def _merge_same_role(contents: list[JsonDict]) -> list[JsonDict]: @@ -328,65 +343,21 @@ def _merge_same_role(contents: list[JsonDict]) -> list[JsonDict]: return merged +def _flatten_text(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + return '\n'.join( + p.get('text', '') if isinstance(p, dict) else str(p) + for p in content + ) + return str(content) -# ═══════════════════════════════════════════════════════════ -# OutboundTransformer 实现: Gemini Contents -# ═══════════════════════════════════════════════════════════ - - -class GeminiOutbound: - """Gemini Contents 后端的出站转换器。 - - 将 CC 格式转换为 Gemini generateContent 格式并处理响应。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - return cc_to_gemini_request(payload) - - def build_url(self, ctx) -> str: - base = ctx.target_url.rstrip('/') - model = ctx.upstream_model - if ctx.is_stream: - return f'{base}/v1/models/{model}:streamGenerateContent?alt=sse' - return f'{base}/v1/models/{model}:generateContent' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_gemini_headers - return build_gemini_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - return gemini_to_cc_response(raw) - - def create_stream_processor(self) -> GeminiStreamProcessor: - return GeminiStreamProcessor() - - -class GeminiStreamProcessor: - """Gemini SSE 流式处理器。 - - 包装 iter_gemini_sse + GeminiStreamConverter。 - """ - - def __init__(self): - self._converter = GeminiStreamConverter() - - def iter_events(self, response) -> Iterator: - from utils.http import iter_gemini_sse - yield from iter_gemini_sse(response) - - def process_event(self, event: JsonDict) -> list[JsonDict]: - return self._converter.process_chunk(event) - - def extract_usage(self, event: JsonDict) -> JsonDict | None: - usage_meta = event.get('usageMetadata') if isinstance(event, dict) else None - if isinstance(usage_meta, dict): - return { - 'prompt_tokens': usage_meta.get('promptTokenCount', 0), - 'completion_tokens': usage_meta.get('candidatesTokenCount', 0), - 'total_tokens': usage_meta.get('totalTokenCount', 0), - } - return None - - def finalize(self) -> list[JsonDict]: - return [] +def _parse_json_safe(text: Any) -> Any: + if not isinstance(text, str): + return text if text is not None else {} + try: + return json.loads(text) + except (json.JSONDecodeError, ValueError): + return {'result': text} if text else {} diff --git a/adapters/helpers.py b/adapters/helpers.py deleted file mode 100644 index 563902b..0000000 --- a/adapters/helpers.py +++ /dev/null @@ -1,155 +0,0 @@ -"""适配器公共辅助函数 - -收敛多个适配器都在重复实现的 CC 格式构建逻辑: -- CC 消息/Usage/Tool Call/Stream Chunk 的标准构造 -- 内容扁平化、JSON 安全解析、工具输出序列化 -""" - -from __future__ import annotations - -import json -from typing import Any - -from utils.http import gen_id - -JsonDict = dict[str, Any] - - -# ═══════════════════════════════════════════════════════════ -# CC 格式标准构造 -# ═══════════════════════════════════════════════════════════ - - -def build_cc_message( - content_text: str, - reasoning_text: str = '', - tool_calls: list[JsonDict] | None = None, -) -> JsonDict: - """构造标准的 CC assistant 消息。""" - message: JsonDict = { - 'role': 'assistant', - 'content': content_text or None, - } - if reasoning_text: - message['reasoning_content'] = reasoning_text - if tool_calls: - message['tool_calls'] = tool_calls - return message - - -def build_cc_usage(input_tokens: int, output_tokens: int) -> JsonDict: - """构造标准的 CC usage 字典。""" - return { - 'prompt_tokens': input_tokens, - 'completion_tokens': output_tokens, - 'total_tokens': input_tokens + output_tokens, - } - - -def build_cc_tool_call( - call_id: str, - name: str, - arguments: str, - *, - index: int | None = None, -) -> JsonDict: - """构造标准的 CC tool_call 结构。""" - tc: JsonDict = { - 'id': call_id or gen_id('call_'), - 'type': 'function', - 'function': { - 'name': name, - 'arguments': arguments, - }, - } - if index is not None: - tc['index'] = index - return tc - - -def make_cc_chunk( - chunk_id: str, - delta: JsonDict, - finish_reason: str | None = None, - model: str = '', -) -> JsonDict: - """构造标准的 CC 流式 chunk。""" - choice: JsonDict = {'index': 0, 'delta': delta} - if finish_reason: - choice['finish_reason'] = finish_reason - return { - 'id': chunk_id, - 'object': 'chat.completion.chunk', - 'model': model, - 'choices': [choice], - } - - -def build_cc_response( - response_id: str, - message: JsonDict, - finish_reason: str, - usage: JsonDict, - model: str = '', -) -> JsonDict: - """构造标准的 CC 非流式响应。""" - return { - 'id': response_id, - 'object': 'chat.completion', - 'model': model, - 'choices': [{ - 'index': 0, - 'message': message, - 'finish_reason': finish_reason, - }], - 'usage': usage, - } - - -# ═══════════════════════════════════════════════════════════ -# 通用文本/JSON 处理 -# ═══════════════════════════════════════════════════════════ - - -def extract_text(content: Any) -> str: - """从多种内容格式中提取并拼接纯文本。 - - 支持字符串、内容块列表(OpenAI/Anthropic/Responses 风格)。 - """ - if isinstance(content, str): - return content - if not isinstance(content, list): - return str(content) if content is not None else '' - - parts: list[str] = [] - for part in content: - if isinstance(part, str): - parts.append(part) - elif isinstance(part, dict): - part_type = part.get('type', '') - if part_type in ('text', 'output_text', 'input_text'): - parts.append(part.get('text', '')) - elif part_type == 'refusal': - parts.append(part.get('refusal', '')) - elif 'text' in part and not part_type: - parts.append(part['text']) - return '\n'.join(parts) if parts else '' - - -def parse_json_safe(text: Any, fallback: Any = None) -> Any: - """安全解析 JSON,失败时返回 fallback。""" - if not isinstance(text, str): - return text if text is not None else (fallback if fallback is not None else {}) - try: - return json.loads(text) - except (json.JSONDecodeError, ValueError): - return fallback if fallback is not None else {} - - -def stringify_content(content: Any) -> str: - """将任意内容序列化为字符串。""" - if isinstance(content, str): - return content - if content is None: - return '' - return json.dumps(content, ensure_ascii=False) diff --git a/adapters/openai_compat_fixer.py b/adapters/openai_compat_fixer.py index 50348cf..8a2d252 100644 --- a/adapters/openai_compat_fixer.py +++ b/adapters/openai_compat_fixer.py @@ -13,7 +13,7 @@ from __future__ import annotations import json import logging -from typing import Any, Iterator +from typing import Any from utils.http import gen_id from utils.think_tag import extract_from_text @@ -423,60 +423,3 @@ def _rewrite_function_call_finish_reason(choice: JsonDict) -> None: """将旧版 finish_reason=function_call 升级为 tool_calls。""" if choice.get('finish_reason') == 'function_call': choice['finish_reason'] = 'tool_calls' - - -# ═══════════════════════════════════════════════════════════ -# OutboundTransformer 实现: OpenAI Chat -# ═══════════════════════════════════════════════════════════ - - -class OpenAIChatOutbound: - """OpenAI Chat Completions 后端的出站转换器。 - - 由于 CC 本身就是 OpenAI Chat 格式,请求/响应转换主要做兼容性修复。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - return normalize_request(payload) - - def build_url(self, ctx) -> str: - return f'{ctx.target_url.rstrip("/")}/v1/chat/completions' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_openai_headers - return build_openai_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - return fix_response(raw) - - def create_stream_processor(self) -> OpenAIChatStreamProcessor: - return OpenAIChatStreamProcessor() - - -class OpenAIChatStreamProcessor: - """OpenAI Chat SSE 流式处理器。 - - 包装 iter_openai_sse + fix_stream_chunk + ThinkTagExtractor。 - """ - - def __init__(self): - from utils.think_tag import ThinkTagExtractor - self._think_extractor = ThinkTagExtractor() - - def iter_events(self, response) -> Iterator: - from utils.http import iter_openai_sse - for chunk in iter_openai_sse(response): - if chunk is None: - return - yield chunk - - def process_event(self, event: JsonDict) -> list[JsonDict]: - chunk = fix_stream_chunk(event) - return list(self._think_extractor.process_chunk(chunk)) - - def extract_usage(self, event: JsonDict) -> JsonDict | None: - return event.get('usage') - - def finalize(self) -> list[JsonDict]: - close_chunk = self._think_extractor.finalize() - return [close_chunk] if close_chunk else [] diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index 68acedc..e6c864a 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -15,18 +15,8 @@ from __future__ import annotations import json from dataclasses import dataclass -from typing import Any, Iterator +from typing import Any -from adapters.helpers import ( - build_cc_message, - build_cc_response, - build_cc_tool_call, - build_cc_usage, - extract_text, - make_cc_chunk, - stringify_content, -) -from adapters.unified import UnifiedUsage from utils.http import gen_id JsonDict = dict[str, Any] @@ -95,7 +85,7 @@ def cc_to_responses(cc_resp: JsonDict, model: str = '') -> JsonDict: 'status': _response_status_from_finish_reason(finish_reason), 'model': model or cc_resp.get('model', ''), 'output': _build_responses_output(message), - 'usage': UnifiedUsage.from_cc_dict(cc_resp.get('usage', {})).to_responses_dict(), + 'usage': _build_responses_usage(cc_resp.get('usage', {})), } @@ -104,18 +94,31 @@ def responses_to_cc_response(response_data: JsonDict, model: str = '') -> JsonDi output_items = response_data.get('output', []) content_text, reasoning_text, tool_calls = _collect_cc_parts_from_responses_output(output_items) finish_reason = _cc_finish_reason_from_responses(response_data, tool_calls) - usage = response_data.get('usage', {}) + message = { + 'role': 'assistant', + 'content': content_text or None, + } + if reasoning_text: + message['reasoning_content'] = reasoning_text + if tool_calls: + message['tool_calls'] = tool_calls - return build_cc_response( - response_id=response_data.get('id', gen_id('chatcmpl-')), - message=build_cc_message(content_text, reasoning_text, tool_calls), - finish_reason=finish_reason, - usage=build_cc_usage( - input_tokens=usage.get('input_tokens', 0), - output_tokens=usage.get('output_tokens', 0), - ), - model=model or response_data.get('model', ''), - ) + usage = response_data.get('usage', {}) + return { + 'id': response_data.get('id', gen_id('chatcmpl-')), + 'object': 'chat.completion', + 'model': model or response_data.get('model', ''), + 'choices': [{ + 'index': 0, + 'message': message, + 'finish_reason': finish_reason, + }], + 'usage': { + 'prompt_tokens': usage.get('input_tokens', 0), + 'completion_tokens': usage.get('output_tokens', 0), + 'total_tokens': usage.get('total_tokens', 0), + }, + } # ═══════════════════════════════════════════════════════════ @@ -655,7 +658,15 @@ class ResponsesToCCStreamConverter: def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: """构造标准 Chat Completions chunk。""" - return make_cc_chunk(self._id, delta, finish_reason, model=self._model) + choice: JsonDict = {'index': 0, 'delta': delta} + if finish_reason: + choice['finish_reason'] = finish_reason + return { + 'id': self._id, + 'object': 'chat.completion.chunk', + 'model': self._model, + 'choices': [choice], + } # ═══════════════════════════════════════════════════════════ @@ -704,7 +715,7 @@ def _append_responses_input_item( content = message.get('content') if role == 'system': - text = extract_text(content) + text = _content_to_text(content) if text: instructions.append(text) return @@ -713,11 +724,11 @@ def _append_responses_input_item( input_items.append({ 'type': 'function_call_output', 'call_id': message.get('tool_call_id', ''), - 'output': stringify_content(content), + 'output': _stringify_output(content), }) return - text = extract_text(content) + text = _content_to_text(content) has_tool_calls = bool(message.get('tool_calls')) if role == 'assistant' and has_tool_calls: @@ -760,7 +771,7 @@ def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None: if role and not item_type: msg: JsonDict = { 'role': role, - 'content': extract_text(item.get('content', '')), + 'content': _normalize_simple_content(item.get('content', '')), } if role == 'assistant' and pending_reasoning: msg['reasoning_content'] = pending_reasoning @@ -799,7 +810,7 @@ def _append_message_item(items: list[Any], *, start: int, messages: list[JsonDic """将一个 message 项及其后续连续 function_call 项合并成一条消息。""" item = items[start] role = item.get('role', 'assistant') - content = extract_text(item.get('content', [])) + content = _extract_text(item.get('content', [])) message: JsonDict = {'role': role, 'content': content or ''} if role == 'assistant': @@ -817,11 +828,7 @@ def _append_message_item(items: list[Any], *, start: int, messages: list[JsonDic def _append_function_call_item(item: JsonDict, messages: list[JsonDict]) -> None: """将独立的 Responses `function_call` 项挂接到最近的 assistant 消息上。""" - tool_call = build_cc_tool_call( - call_id=item.get('call_id') or gen_id('call_'), - name=item.get('name', ''), - arguments=item.get('arguments', '{}'), - ) + tool_call = _build_cc_tool_call(item) if messages and messages[-1]['role'] == 'assistant': messages[-1].setdefault('tool_calls', []).append(tool_call) @@ -844,6 +851,12 @@ def _convert_function_call_output_item(item: JsonDict) -> JsonDict: } +def _normalize_simple_content(content: Any) -> str: + """将简单 content 载荷规范化为纯文本字符串。""" + if isinstance(content, list): + return _extract_text(content) or '' + return str(content) if content is not None else '' + def _collect_function_calls(items: list[Any], start: int) -> tuple[list[JsonDict], int]: """收集从指定位置开始连续出现的 `function_call` 项。""" @@ -852,17 +865,24 @@ def _collect_function_calls(items: list[Any], start: int) -> tuple[list[JsonDict while index < len(items): next_item = items[index] if isinstance(next_item, dict) and next_item.get('type') == 'function_call': - tool_calls.append(build_cc_tool_call( - call_id=next_item.get('call_id') or gen_id('call_'), - name=next_item.get('name', ''), - arguments=next_item.get('arguments', '{}'), - )) + tool_calls.append(_build_cc_tool_call(next_item)) index += 1 else: break return tool_calls, index - start +def _build_cc_tool_call(item: JsonDict) -> JsonDict: + """将单个 Responses `function_call` 项转换为 CC `tool_call` 结构。""" + return { + 'id': item.get('call_id') or gen_id('call_'), + 'type': 'function', + 'function': { + 'name': item.get('name', ''), + 'arguments': item.get('arguments', '{}'), + }, + } + # ═══════════════════════════════════════════════════════════ # 非流式响应转换辅助 @@ -916,6 +936,14 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict: } +def _build_responses_usage(usage: JsonDict) -> JsonDict: + """将 Chat Completions 的 usage 字段映射为 Responses usage 结构。""" + return { + 'input_tokens': usage.get('prompt_tokens', 0), + 'output_tokens': usage.get('completion_tokens', 0), + 'total_tokens': usage.get('total_tokens', 0), + } + def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]: """从 Responses `output` 中提取文本、思考摘要和工具调用。""" @@ -931,16 +959,11 @@ def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str continue item_type = item.get('type', '') if item_type == 'message': - content_text += extract_text(item.get('content', [])) + content_text += _extract_text(item.get('content', [])) elif item_type == 'reasoning': reasoning_text += _extract_reasoning_text(item) elif item_type == 'function_call': - tool_calls.append(build_cc_tool_call( - call_id=item.get('call_id') or gen_id('call_'), - name=item.get('name', ''), - arguments=item.get('arguments', '{}'), - index=len(tool_calls), - )) + tool_calls.append(_build_cc_tool_call_from_responses_output(item, index=len(tool_calls))) return content_text, reasoning_text, tool_calls @@ -957,6 +980,18 @@ def _extract_reasoning_text(item: JsonDict) -> str: return ''.join(texts) +def _build_cc_tool_call_from_responses_output(item: JsonDict, *, index: int) -> JsonDict: + """将 Responses `function_call` 输出项转换为 CC `tool_call`。""" + return { + 'index': index, + 'id': item.get('call_id') or gen_id('call_'), + 'type': 'function', + 'function': { + 'name': item.get('name', ''), + 'arguments': item.get('arguments', '{}'), + }, + } + def _cc_finish_reason_from_responses(response_data: JsonDict, tool_calls: list[JsonDict]) -> str: """根据 Responses 完成状态推断聊天补全的 finish_reason。""" @@ -982,7 +1017,57 @@ def _map_anthropic_stop_reason(stop_reason: str) -> str: # ═══════════════════════════════════════════════════════════ +def _extract_text(content: Any) -> str: + """从多种内容块结构中提取并拼接纯文本。""" + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) if content else '' + texts: list[str] = [] + for part in content: + if isinstance(part, str): + texts.append(part) + elif isinstance(part, dict): + part_type = part.get('type', '') + if part_type in ('output_text', 'input_text', 'text'): + texts.append(part.get('text', '')) + elif part_type == 'refusal': + texts.append(part.get('refusal', '')) + return '\n'.join(texts) if texts else '' + + +def _content_to_text(content: Any) -> str: + """将任意 content 载荷转换为单个字符串。""" + if isinstance(content, str): + return content + if isinstance(content, list): + return _extract_text(content) + return str(content) if content is not None else '' + + +def _content_to_responses_parts(content: Any, role: str = 'user') -> list[JsonDict]: + """将普通消息内容转换为 Responses 内容块数组。 + + assistant 消息使用 output_text,其他角色使用 input_text。 + """ + if isinstance(content, list): + text = _extract_text(content) + else: + text = _content_to_text(content) + if not text: + return [] + part_type = 'output_text' if role == 'assistant' else 'input_text' + return [{'type': part_type, 'text': text}] + + +def _stringify_output(content: Any) -> str: + """将工具输出统一序列化为字符串,便于放入 `function_call_output`。""" + if isinstance(content, str): + return content + if content is None: + return '' + return json.dumps(content, ensure_ascii=False) if not isinstance(content, str) else content def _build_responses_function_call_item(tool_call: JsonDict) -> JsonDict: @@ -996,165 +1081,6 @@ def _build_responses_function_call_item(tool_call: JsonDict) -> JsonDict: } -# ═══════════════════════════════════════════════════════════ -# OutboundTransformer 实现: Responses -# ═══════════════════════════════════════════════════════════ - - -class ResponsesOutbound: - """OpenAI Responses 后端的出站转换器。 - - 将 CC 格式转换为 Responses 格式并处理响应。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - return cc_to_responses_request(payload) - - def build_url(self, ctx) -> str: - return f'{ctx.target_url.rstrip("/")}/v1/responses' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_openai_headers - return build_openai_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - return responses_to_cc_response(raw) - - def create_stream_processor(self) -> ResponsesStreamProcessorForCC: - return ResponsesStreamProcessorForCC() - - -class ResponsesStreamProcessorForCC: - """Responses SSE → CC chunk 流式处理器。 - - 用于 /v1/chat/completions -> /v1/responses 的桥接路径。 - """ - - def __init__(self): - self._converter = ResponsesToCCStreamConverter() - - def iter_events(self, response) -> Iterator: - from utils.http import iter_responses_sse - yield from iter_responses_sse(response) - - def process_event(self, event: tuple) -> list[JsonDict]: - event_type, event_data = event - return self._converter.process_event(event_type, event_data) - - def extract_usage(self, event: tuple) -> JsonDict | None: - from adapters.unified import extract_responses_usage - event_type, event_data = event - extracted = extract_responses_usage(event_data) - if extracted: - return { - 'prompt_tokens': extracted.get('input_tokens', 0), - 'completion_tokens': extracted.get('output_tokens', 0), - 'total_tokens': extracted.get('total_tokens', 0), - } - return None - - def finalize(self) -> list[JsonDict]: - return [] - - -class ResponsesNativeOutbound: - """Responses 后端原生透传的出站转换器。 - - 当 /v1/responses → /v1/responses 时直接透传,不经过 CC 中间格式。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - return payload - - def build_url(self, ctx) -> str: - return f'{ctx.target_url.rstrip("/")}/v1/responses' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_openai_headers - return build_openai_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - return raw - - def create_stream_processor(self) -> ResponsesNativeStreamProcessor: - return ResponsesNativeStreamProcessor() - - -class ResponsesNativeStreamProcessor: - """Responses 原生 SSE 透传流式处理器。 - - 上游就是 Responses 格式,只需透传事件并做轻量模型名改写。 - 每个事件作为 SSE 字符串直接返回。 - """ - - def iter_events(self, response) -> Iterator: - from utils.http import iter_responses_sse - yield from iter_responses_sse(response) - - def process_event(self, event: tuple) -> list[JsonDict]: - event_type, event_data = event - return [{'_sse_event_type': event_type, **event_data}] - - def extract_usage(self, event: tuple) -> JsonDict | None: - from adapters.unified import extract_responses_usage - _, event_data = event - return extract_responses_usage(event_data) - - def finalize(self) -> list[JsonDict]: - return [] - - -class AnthropicOutboundForResponses: - """Anthropic 后端的出站转换器(用于 /v1/responses 路由)。 - - 流式处理直接将 Anthropic SSE → Responses SSE, - 跳过 CC 中间态以保留原始时序。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - from adapters.cc_anthropic_adapter import cc_to_messages_request - return cc_to_messages_request(payload) - - def build_url(self, ctx) -> str: - return f'{ctx.target_url.rstrip("/")}/v1/messages' - - def build_headers(self, ctx) -> dict[str, str]: - from utils.http import build_anthropic_headers - return build_anthropic_headers(ctx.api_key) - - def parse_response(self, raw: JsonDict) -> JsonDict: - from adapters.cc_anthropic_adapter import messages_to_cc_response - return messages_to_cc_response(raw) - - def create_stream_processor(self) -> AnthropicToResponsesStreamProcessor: - return AnthropicToResponsesStreamProcessor() - - -class AnthropicToResponsesStreamProcessor: - """Anthropic SSE → Responses SSE 直接转换的流式处理器。 - - 跳过 CC 中间态,直接将 Anthropic 事件映射为 Responses 事件。 - 返回的 chunk 是 SSE 字符串。 - """ - - def __init__(self): - self._converter = ResponsesStreamConverter() - - def iter_events(self, response) -> Iterator: - from utils.http import iter_anthropic_sse - yield from iter_anthropic_sse(response) - - def process_event(self, event: tuple) -> list[str]: - event_type, event_data = event - return self._converter.process_anthropic_event(event_type, event_data) - - def extract_usage(self, event: tuple) -> JsonDict | None: - return None - - def finalize(self) -> list[str]: - return self._converter.finalize() - - def _convert_cc_tools_to_responses(tools: Any) -> list[JsonDict]: """将聊天补全风格的工具定义转换为 Responses `tools` 列表。""" if not isinstance(tools, list): diff --git a/adapters/unified.py b/adapters/unified.py deleted file mode 100644 index db2e087..0000000 --- a/adapters/unified.py +++ /dev/null @@ -1,354 +0,0 @@ -"""统一中间格式与转换器接口 - -定义项目中所有 API 格式共用的中间表示和转换器协议: -- UnifiedRequest / UnifiedResponse: 统一的请求/响应数据结构 -- InboundTransformer / OutboundTransformer: 入站/出站转换器接口 -- StreamProcessor: 流式事件处理器接口 -- ClientFormatter: 客户端响应格式化接口 -""" - -from __future__ import annotations - -import json -import logging -from dataclasses import dataclass, field -from typing import Any, Iterator, Protocol - -from flask import Response, jsonify - -import settings -from utils.http import forward_request, gen_id, sse_response -from utils.request_logger import ( - append_client_event, - append_upstream_event, - attach_client_response, - attach_error, - attach_upstream_request, - attach_upstream_response, - finalize_turn, - set_stream_summary, -) -from utils.usage_tracker import usage_tracker - -logger = logging.getLogger(__name__) - -JsonDict = dict[str, Any] - - -# ═══════════════════════════════════════════════════════════ -# 统一数据模型 -# ═══════════════════════════════════════════════════════════ - - -@dataclass -class UnifiedUsage: - """标准化的令牌用量统计。""" - - input_tokens: int = 0 - output_tokens: int = 0 - total_tokens: int = 0 - - def to_cc_dict(self) -> JsonDict: - return { - 'prompt_tokens': self.input_tokens, - 'completion_tokens': self.output_tokens, - 'total_tokens': self.total_tokens, - } - - def to_responses_dict(self) -> JsonDict: - return { - 'input_tokens': self.input_tokens, - 'output_tokens': self.output_tokens, - 'total_tokens': self.total_tokens, - } - - @classmethod - def from_cc_dict(cls, d: JsonDict) -> UnifiedUsage: - return cls( - input_tokens=d.get('prompt_tokens', 0), - output_tokens=d.get('completion_tokens', 0), - total_tokens=d.get('total_tokens', 0), - ) - - @classmethod - def from_responses_dict(cls, d: JsonDict) -> UnifiedUsage: - return cls( - input_tokens=d.get('input_tokens', 0), - output_tokens=d.get('output_tokens', 0), - total_tokens=d.get('total_tokens', 0), - ) - - -# ═══════════════════════════════════════════════════════════ -# 转换器接口 -# ═══════════════════════════════════════════════════════════ - - -class OutboundTransformer(Protocol): - """出站转换器:将 CC 中间格式转换为上游后端格式。 - - 所有后端(OpenAI Chat / Responses / Anthropic / Gemini)各实现一套, - 内部复用各自现有的适配器函数。 - """ - - def build_request(self, payload: JsonDict) -> JsonDict: - """将 CC 格式请求体转换为上游格式请求体。""" - ... - - def build_url(self, ctx: Any) -> str: - """根据路由上下文构建上游请求 URL。""" - ... - - def build_headers(self, ctx: Any) -> JsonDict: - """根据路由上下文构建上游请求头。""" - ... - - def parse_response(self, raw: JsonDict) -> JsonDict: - """将上游非流式响应转换回 CC 格式。""" - ... - - def create_stream_processor(self) -> StreamProcessor: - """创建该后端对应的流式事件处理器。""" - ... - - -class StreamProcessor(Protocol): - """流式事件处理器接口。 - - 每个后端的 SSE 格式不同,StreamProcessor 封装了具体的迭代与转换逻辑, - 让通用流式处理器不必关心后端差异。 - """ - - def iter_events(self, response: Any) -> Iterator: - """从上游 HTTP 响应中迭代原始事件。""" - ... - - def process_event(self, event: Any) -> list: - """将单个上游事件转换为输出项列表。 - - 返回值通常是 list[JsonDict](CC chunk), - 但 Anthropic→Responses 路径返回 list[str](SSE 字符串)。 - """ - ... - - def extract_usage(self, event: Any) -> JsonDict | None: - """从上游事件中提取用量信息(如果有的话)。""" - ... - - def finalize(self) -> list: - """流结束时产出的收尾项。""" - ... - - -class ClientFormatter(Protocol): - """客户端响应格式化器。 - - 根据客户端期望的 API 格式(CC 或 Responses),将通用的处理结果 - 格式化为最终返回给客户端的形态。 - """ - - def format_response(self, cc_response: JsonDict, model: str) -> JsonDict: - """格式化非流式响应。""" - ... - - def wrap_stream_item(self, item: Any) -> str: - """将单个流式输出项包装为 SSE 字符串。""" - ... - - def format_error(self, message: str) -> str: - """构造流式错误消息。""" - ... - - def format_done(self) -> str | None: - """构造流结束标记(CC 返回 [DONE],Responses 返回 None)。""" - ... - - def start_events(self) -> list[str]: - """流开始前的初始事件(Responses 返回 response.created)。""" - ... - - @property - def usage_input_key(self) -> str: - """usage 中输入令牌的字段名。""" - ... - - @property - def usage_output_key(self) -> str: - """usage 中输出令牌的字段名。""" - ... - - -# ═══════════════════════════════════════════════════════════ -# 通用请求/响应处理器 -# ═══════════════════════════════════════════════════════════ - - -def _dbg(message: str) -> None: - if settings.get_debug_mode() in ('simple', 'verbose'): - logger.info('[通用调试] %s', message) - - -def extract_responses_usage(event_data: JsonDict) -> JsonDict | None: - """从原生 Responses 事件中提取 usage(公共辅助)。""" - if not isinstance(event_data, dict): - return None - usage = event_data.get('usage') - if isinstance(usage, dict): - return usage - response_obj = event_data.get('response') - if isinstance(response_obj, dict): - nested_usage = response_obj.get('usage') - if isinstance(nested_usage, dict): - return nested_usage - return None - - -def handle_non_stream( - ctx: Any, - outbound: OutboundTransformer, - client_fmt: ClientFormatter, - payload: JsonDict, - turn: JsonDict | None, -) -> Response: - """通用非流式处理器。 - - 替代 chat.py 和 responses.py 中的 8 个 _handle_xxx_non_stream 函数。 - """ - from routes.common import apply_body_modifications, apply_header_modifications, log_usage - - upstream_payload = outbound.build_request(payload) - url = outbound.build_url(ctx) - headers = outbound.build_headers(ctx) - upstream_payload = apply_body_modifications(upstream_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - upstream_payload['stream'] = False - attach_upstream_request(turn, upstream_payload, headers) - resp, err = forward_request(url, headers, upstream_payload) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) - finalize_turn(turn) - return err - - raw = resp.json() - attach_upstream_response(turn, raw) - _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) - - cc_response = outbound.parse_response(raw) - result = client_fmt.format_response(cc_response, ctx.client_model) - - _dbg('格式化后响应=' + json.dumps(result, ensure_ascii=False, default=str)[:1000]) - usage_data = result.get('usage', {}) - log_usage('通用', usage_data, input_key=client_fmt.usage_input_key, output_key=client_fmt.usage_output_key) - usage_tracker.record( - ctx.client_model, - usage_data, - input_key=client_fmt.usage_input_key, - output_key=client_fmt.usage_output_key, - ) - attach_client_response(turn, result) - finalize_turn(turn, usage=usage_data) - return jsonify(result) - - -def handle_stream( - ctx: Any, - outbound: OutboundTransformer, - client_fmt: ClientFormatter, - payload: JsonDict, - turn: JsonDict | None, -) -> Response: - """通用流式处理器。 - - 替代 chat.py 和 responses.py 中的 8 个 _handle_xxx_stream 函数。 - """ - from routes.common import apply_body_modifications, apply_header_modifications - - upstream_payload = outbound.build_request(payload) - url = outbound.build_url(ctx) - headers = outbound.build_headers(ctx) - upstream_payload = apply_body_modifications(upstream_payload, ctx.body_modifications) - headers = apply_header_modifications(headers, ctx.header_modifications) - - upstream_payload['stream'] = True - processor = outbound.create_stream_processor() - - def generate(): - for start_evt in client_fmt.start_events(): - yield start_evt - - attach_upstream_request(turn, upstream_payload, headers) - resp, err = forward_request(url, headers, upstream_payload, stream=True) - if err: - attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) - set_stream_summary(turn, {'status': 'error'}) - finalize_turn(turn) - yield client_fmt.format_error(str(err)) - return - - event_count = 0 - client_items: list[str] = [] - last_usage: JsonDict | None = None - - for event in processor.iter_events(resp): - append_upstream_event(turn, {'type': 'upstream_event', 'data': event}) - - extracted = processor.extract_usage(event) - if extracted is not None: - last_usage = extracted - - if event_count < 10: - _dbg( - f'上游事件#{event_count}=' - + json.dumps(event, ensure_ascii=False, default=str)[:500] - ) - - for chunk in processor.process_event(event): - if isinstance(chunk, dict): - chunk['model'] = ctx.client_model - wrapped = client_fmt.wrap_stream_item(chunk) - client_items.append(wrapped) - append_client_event(turn, {'type': 'stream_item', 'data': chunk}) - if event_count < 10: - _dbg( - f'返回片段#{event_count}=' - + json.dumps(chunk, ensure_ascii=False, default=str)[:500] - ) - yield wrapped - - event_count += 1 - - for chunk in processor.finalize(): - if isinstance(chunk, dict): - chunk['model'] = ctx.client_model - wrapped = client_fmt.wrap_stream_item(chunk) - client_items.append(wrapped) - append_client_event(turn, {'type': 'stream_item', 'data': chunk}) - yield wrapped - - done = client_fmt.format_done() - if done: - append_client_event(turn, {'type': 'done'}) - yield done - - _dbg(f'流式响应结束,共 {event_count} 个事件') - usage_tracker.record( - ctx.client_model, - last_usage, - input_key=client_fmt.usage_input_key, - output_key=client_fmt.usage_output_key, - ) - set_stream_summary(turn, { - 'event_count': event_count, - 'client_item_count': len(client_items), - 'usage': last_usage, - }) - attach_client_response(turn, { - 'type': 'stream.summary', - 'model': ctx.client_model, - 'event_count': len(client_items), - 'usage': last_usage, - }) - finalize_turn(turn, usage=last_usage) - - return sse_response(generate()) diff --git a/routes/chat.py b/routes/chat.py index 7c72413..be4f775 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -1,7 +1,8 @@ """路由: /v1/chat/completions 处理 Cursor 发来的 OpenAI Chat Completions 格式请求。 -根据模型映射的后端类型,通过统一的出站转换器转发到不同后端。 +根据模型映射的后端类型,转发到 OpenAI 兼容接口、Anthropic Messages 接口, +或原生 OpenAI Responses 接口。 """ from __future__ import annotations @@ -10,34 +11,103 @@ import json import logging from typing import Any +import settings from flask import Blueprint, jsonify, request -from adapters.openai_compat_fixer import normalize_request -from adapters.responses_cc_adapter import responses_to_cc -from adapters.unified import handle_non_stream, handle_stream -from routes.common import ( - CCClientFormatter, - build_route_context, - get_outbound, - inject_instructions_cc, - log_route_context, - should_inject_thinking, +from adapters.cc_anthropic_adapter import ( + AnthropicStreamConverter, + cc_to_messages_request, + messages_to_cc_response, ) -from utils.request_logger import start_turn +from adapters.cc_gemini_adapter import ( + GeminiStreamConverter, + cc_to_gemini_request, + gemini_to_cc_response, +) +from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request +from adapters.responses_cc_adapter import ( + ResponsesToCCStreamConverter, + cc_to_responses_request, + responses_to_cc, + responses_to_cc_response, +) +from config import Config +from routes.common import ( + RouteContext, + apply_body_modifications, + apply_header_modifications, + build_anthropic_target, + build_gemini_target, + build_openai_target, + build_responses_target, + build_route_context, + chat_error_chunk, + inject_instructions_anthropic, + inject_instructions_cc, + inject_instructions_responses, + log_route_context, + log_usage, + sse_data_message, +) +from utils.http import ( + forward_request, + gen_id, + iter_anthropic_sse, + iter_gemini_sse, + iter_openai_sse, + iter_responses_sse, + sse_response, +) +from utils.request_logger import ( + append_client_event, + append_upstream_event, + attach_client_response, + attach_error, + attach_upstream_request, + attach_upstream_response, + finalize_turn, + set_stream_summary, + start_turn, +) +from utils.think_tag import ThinkTagExtractor from utils.thinking_cache import thinking_cache +from utils.usage_tracker import usage_tracker logger = logging.getLogger(__name__) bp = Blueprint('chat', __name__) +def _dbg(message: str) -> None: + """仅在调试模式下输出详细日志。""" + if settings.get_debug_mode() in ('simple', 'verbose'): + logger.info('[聊天补全调试] %s', message) + + +def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None: + """从原生 Responses 事件中提取 usage。 + + `/v1/chat/completions -> /v1/responses` 的桥接流式路径也需要读取 usage, + 因此在本文件保留一个本地辅助函数,避免依赖其他路由模块的私有实现。 + """ + if not isinstance(event_data, dict): + return None + usage = event_data.get('usage') + if isinstance(usage, dict): + return usage + response_obj = event_data.get('response') + if isinstance(response_obj, dict): + nested_usage = response_obj.get('usage') + if isinstance(nested_usage, dict): + return nested_usage + return None + + @bp.route('/v1/chat/completions', methods=['POST']) def chat_completions(): """处理聊天补全请求并按模型映射分发到不同后端。""" original_payload = request.get_json(force=True) - payload, message_count = _normalize_chat_payload( - json.loads(json.dumps(original_payload, ensure_ascii=False, default=str)) - ) + payload, message_count = _normalize_chat_payload(json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))) client_model = payload.get('model', 'unknown') is_stream = payload.get('stream', False) @@ -57,39 +127,23 @@ def chat_completions(): log_route_context('聊天补全', ctx, extra=f'消息数={message_count}') _log_messages(payload) - payload['model'] = ctx.upstream_model - payload = normalize_request(payload) - if should_inject_thinking(ctx.backend): + if ctx.backend != 'responses': payload['messages'] = thinking_cache.inject(payload.get('messages', [])) - payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) - outbound = get_outbound(ctx.backend) - client_fmt = CCClientFormatter() - - if ctx.is_stream: - result = handle_stream(ctx, outbound, client_fmt, payload, turn) - else: - result = handle_non_stream(ctx, outbound, client_fmt, payload, turn) - - if not ctx.is_stream and isinstance(result, tuple): - response_data = result - elif hasattr(result, 'json'): - try: - response_data = result.get_json(silent=True) or {} - except Exception: - response_data = {} - else: - response_data = {} - - _try_cache_thinking(response_data) - return result + if ctx.backend == 'openai': + return _handle_openai_backend(ctx, payload, turn) + if ctx.backend == 'responses': + return _handle_responses_backend(ctx, payload, turn) + if ctx.backend == 'gemini': + return _handle_gemini_backend(ctx, payload, turn) + return _handle_anthropic_backend(ctx, payload, turn) def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: """整理聊天补全入口的请求体。 - 当 Cursor 或调用方把 Responses 格式误发到 `/v1/chat/completions` 时, - 先降级转换成 Chat Completions,再进入统一主流程。 + 这里保留了一层兼容逻辑:当 Cursor 或调用方把 Responses 格式误发到 + `/v1/chat/completions` 时,先降级转换成 Chat Completions,再进入统一主流程。 """ message_count = len(payload.get('messages', [])) @@ -103,11 +157,548 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in return payload, message_count -def _try_cache_thinking(response_data: dict[str, Any]) -> None: - """尝试从非流式响应中缓存思维链内容。""" - if not isinstance(response_data, dict): - return - for choice in response_data.get('choices', []): +def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]): + """处理走 OpenAI 兼容后端的聊天补全请求。""" + _dbg( + '原始请求字段=' + str(list(payload.keys())) + ' ' + + '附加字段=' + + json.dumps( + {k: v for k, v in payload.items() if k != 'messages'}, + ensure_ascii=False, + default=str, + )[:500] + ) + + payload = normalize_request(payload, ctx.upstream_model) + payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) + _dbg( + f'标准化完成:模型={payload.get("model")} ' + f'工具数={len(payload.get("tools", []))}' + ) + + url, headers = build_openai_target(ctx) + payload = apply_body_modifications(payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_openai_stream(ctx, payload, url, headers, turn) + return _handle_openai_non_stream(ctx, payload, url, headers, turn) + + +def _handle_openai_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any], +): + """处理 OpenAI 兼容后端的非流式返回。""" + payload['stream'] = False + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + data = fix_response(raw) + return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应') + + +def _handle_openai_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any], +): + """处理 OpenAI 兼容后端的流式返回。""" + payload['stream'] = True + + def generate(): + """消费上游 OpenAI SSE,并逐段产出给 Cursor 的聊天补全流。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield chat_error_chunk(str(err)) + return + + think_extractor = ThinkTagExtractor() + chunk_count = 0 + last_usage = None + client_chunks: list[dict[str, Any]] = [] + + for chunk in iter_openai_sse(resp): + if chunk is None: + _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') + close_chunk = think_extractor.finalize() + if close_chunk: + client_chunks.append(close_chunk) + append_client_event(turn, {'type': 'chat_chunk', 'data': close_chunk}) + yield sse_data_message(close_chunk) + append_client_event(turn, {'type': 'done'}) + yield sse_data_message('[DONE]') + usage_tracker.record(ctx.client_model, last_usage) + set_stream_summary(turn, { + 'chunk_count': chunk_count, + 'client_chunk_count': len(client_chunks), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'chat.completion.stream.summary', + 'model': ctx.client_model, + 'chunk_count': len(client_chunks), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + return + + append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) + if chunk.get('usage'): + last_usage = chunk['usage'] + + if chunk_count < 10: + _dbg( + f'上游原始片段#{chunk_count}=' + + json.dumps(chunk, ensure_ascii=False, default=str)[:500] + ) + + chunk = fix_stream_chunk(chunk) + chunk['model'] = ctx.client_model + + for out in think_extractor.process_chunk(chunk): + client_chunks.append(out) + append_client_event(turn, {'type': 'chat_chunk', 'data': out}) + if chunk_count < 10: + _dbg( + f'返回片段#{chunk_count}=' + + json.dumps(out, ensure_ascii=False, default=str)[:500] + ) + yield sse_data_message(out) + + chunk_count += 1 + + usage_tracker.record(ctx.client_model, last_usage) + set_stream_summary(turn, { + 'chunk_count': chunk_count, + 'client_chunk_count': len(client_chunks), + 'usage': last_usage, + 'ended_without_done': True, + }) + attach_client_response(turn, { + 'type': 'chat.completion.stream.summary', + 'model': ctx.client_model, + 'chunk_count': len(client_chunks), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走原生 Responses 后端的聊天补全请求。 + + 当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求, + 返回时再转换回聊天补全协议。 + """ + responses_payload = cc_to_responses_request(payload) + responses_payload['model'] = ctx.upstream_model + responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) + _dbg( + '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + + f' 输入项数={len(responses_payload.get("input", []))}' + ) + + url, headers = build_responses_target(ctx) + responses_payload = apply_body_modifications(responses_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_responses_stream(ctx, responses_payload, url, headers, turn) + return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn) + + +def _handle_responses_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理原生 Responses 后端的非流式返回。""" + payload['stream'] = False + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + data = responses_to_cc_response(raw, ctx.client_model) + return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') + + +def _handle_responses_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理原生 Responses 后端的流式返回。""" + payload['stream'] = True + converter = ResponsesToCCStreamConverter(model=ctx.client_model) + + def generate(): + """消费上游 Responses 事件,并实时转换成聊天补全 chunk。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield chat_error_chunk(str(err)) + return + + event_count = 0 + client_chunks: list[Any] = [] + last_usage: dict[str, Any] | None = None + for event_type, event_data in iter_responses_sse(resp): + append_upstream_event(turn, {'type': event_type, 'data': event_data}) + extracted_usage = _extract_responses_usage(event_data) + if extracted_usage: + last_usage = { + 'prompt_tokens': extracted_usage.get('input_tokens', 0), + 'completion_tokens': extracted_usage.get('output_tokens', 0), + 'total_tokens': extracted_usage.get('total_tokens', 0), + } + if event_count < 10: + _dbg( + f'上游事件#{event_count} 类型={event_type} 数据=' + + json.dumps(event_data, ensure_ascii=False, default=str)[:500] + ) + + for chunk in converter.process_event(event_type, event_data): + client_chunks.append(chunk) + append_client_event(turn, {'type': 'chat_chunk', 'data': chunk}) + if isinstance(chunk, dict) and isinstance(chunk.get('usage'), dict): + last_usage = chunk['usage'] + if event_count < 10: + _dbg( + f'返回片段#{event_count}=' + + json.dumps(chunk, ensure_ascii=False, default=str)[:500] + ) + yield sse_data_message(chunk) + + event_count += 1 + + _dbg(f'流式响应结束,共 {event_count} 个事件') + append_client_event(turn, {'type': 'done'}) + yield sse_data_message('[DONE]') + usage_tracker.record(ctx.client_model, last_usage) + set_stream_summary(turn, { + 'event_count': event_count, + 'client_chunk_count': len(client_chunks), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'chat.completion.stream.summary', + 'model': ctx.client_model, + 'chunk_count': len(client_chunks), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走 Gemini Contents 后端的聊天补全请求。""" + payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) + gemini_payload = cc_to_gemini_request(payload) + _dbg( + '已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys())) + + f' 内容数={len(gemini_payload.get("contents", []))}' + ) + + url, headers = build_gemini_target(ctx, stream=ctx.is_stream) + gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) + return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) + + +def _handle_gemini_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Gemini 后端的非流式返回。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + data = gemini_to_cc_response(raw) + return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后') + + +def _handle_gemini_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Gemini 后端的流式返回。""" + converter = GeminiStreamConverter() + + def generate(): + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield chat_error_chunk(str(err)) + return + + chunk_count = 0 + client_chunks: list[Any] = [] + last_usage: dict[str, Any] | None = None + for gemini_chunk in iter_gemini_sse(resp): + append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk}) + usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None + if isinstance(usage_meta, dict): + last_usage = { + 'prompt_tokens': usage_meta.get('promptTokenCount', 0), + 'completion_tokens': usage_meta.get('candidatesTokenCount', 0), + 'total_tokens': usage_meta.get('totalTokenCount', 0), + } + if chunk_count < 10: + _dbg( + f'上游 Gemini 片段#{chunk_count}=' + + json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500] + ) + + for cc_chunk in converter.process_chunk(gemini_chunk): + cc_chunk['model'] = ctx.client_model + client_chunks.append(cc_chunk) + append_client_event(turn, {'type': 'chat_chunk', 'data': cc_chunk}) + if isinstance(cc_chunk, dict) and isinstance(cc_chunk.get('usage'), dict): + last_usage = cc_chunk['usage'] + if chunk_count < 10: + _dbg( + f'返回片段#{chunk_count}=' + + json.dumps(cc_chunk, ensure_ascii=False, default=str)[:500] + ) + yield sse_data_message(cc_chunk) + + chunk_count += 1 + + _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') + append_client_event(turn, {'type': 'done'}) + yield sse_data_message('[DONE]') + usage_tracker.record(ctx.client_model, last_usage) + set_stream_summary(turn, { + 'chunk_count': chunk_count, + 'client_chunk_count': len(client_chunks), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'chat.completion.stream.summary', + 'model': ctx.client_model, + 'chunk_count': len(client_chunks), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走 Anthropic Messages 后端的聊天补全请求。""" + payload['model'] = ctx.upstream_model + anthropic_payload = cc_to_messages_request(payload) + anthropic_payload = inject_instructions_anthropic(anthropic_payload, ctx.custom_instructions, ctx.instructions_position) + _dbg( + '已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys())) + + f' 消息数={len(anthropic_payload.get("messages", []))}' + ) + + url, headers = build_anthropic_target(ctx) + anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) + return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) + + +def _handle_anthropic_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Anthropic 后端的非流式返回。""" + payload['stream'] = False + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + data = messages_to_cc_response(raw) + return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后') + + +def _handle_anthropic_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Anthropic 后端的流式返回。 + + 这里仍然保留独立的事件级转换器,而不是先落成完整响应再回放, + 是为了尽量保持 Cursor 端的流式体验和工具调用时序。 + """ + payload['stream'] = True + converter = AnthropicStreamConverter() + + def generate(): + """消费上游 Anthropic 事件流,并逐步映射为聊天补全 SSE。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield chat_error_chunk(str(err)) + return + + event_count = 0 + client_chunks: list[Any] = [] + last_usage: dict[str, Any] | None = None + for event_type, event_data in iter_anthropic_sse(resp): + append_upstream_event(turn, {'type': event_type, 'data': event_data}) + if event_type == 'message_start': + message_usage = event_data.get('message', {}).get('usage', {}) + if isinstance(message_usage, dict): + last_usage = { + 'prompt_tokens': message_usage.get('input_tokens', 0), + 'completion_tokens': 0, + 'total_tokens': message_usage.get('input_tokens', 0), + } + elif event_type == 'message_delta': + delta_usage = event_data.get('usage', {}) + if isinstance(delta_usage, dict): + prompt_tokens = 0 + if isinstance(last_usage, dict): + prompt_tokens = last_usage.get('prompt_tokens', 0) + completion_tokens = delta_usage.get('output_tokens', 0) + last_usage = { + 'prompt_tokens': prompt_tokens, + 'completion_tokens': completion_tokens, + 'total_tokens': prompt_tokens + completion_tokens, + } + if event_count < 10: + _dbg( + f'上游事件#{event_count} 类型={event_type} 数据=' + + json.dumps(event_data, ensure_ascii=False, default=str)[:500] + ) + + for chunk_str in converter.process_event(event_type, event_data): + try: + chunk_obj = json.loads(chunk_str) + chunk_obj['model'] = ctx.client_model + if isinstance(chunk_obj.get('usage'), dict): + last_usage = chunk_obj['usage'] + chunk_str = json.dumps(chunk_obj, ensure_ascii=False) + except (json.JSONDecodeError, TypeError): + pass + + client_chunks.append(chunk_str) + append_client_event(turn, {'type': 'chat_chunk', 'data': chunk_str}) + if event_count < 10: + _dbg(f'返回片段#{event_count}={chunk_str[:500]}') + yield sse_data_message(chunk_str) + + event_count += 1 + + _dbg(f'流式响应结束,共 {event_count} 个事件') + append_client_event(turn, {'type': 'done'}) + yield sse_data_message('[DONE]') + usage_tracker.record(ctx.client_model, last_usage) + set_stream_summary(turn, { + 'event_count': event_count, + 'client_chunk_count': len(client_chunks), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'chat.completion.stream.summary', + 'model': ctx.client_model, + 'chunk_count': len(client_chunks), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _finalize_chat_response( + ctx: RouteContext, + data: dict[str, Any], + *, + turn: dict[str, Any] | None, + debug_label: str, +): + """统一收尾非流式聊天补全响应。 + + 三条后端链路最终都会回到 Chat Completions 格式,因此这里集中做: + - 回填给 Cursor 展示的模型名 + - 输出统一调试日志 + - 输出统一令牌统计日志 + """ + data['model'] = ctx.client_model + _dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000]) + log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens') + + usage_tracker.record(ctx.client_model, data.get('usage')) + attach_client_response(turn, data) + finalize_turn(turn, usage=data.get('usage')) + + for choice in data.get('choices', []): msg = choice.get('message', {}) if msg.get('reasoning_content'): thinking_cache.store_from_response( @@ -116,6 +707,8 @@ def _try_cache_thinking(response_data: dict[str, Any]) -> None: ) break + return jsonify(data) + def _log_messages(payload: dict[str, Any]) -> None: """记录消息摘要,方便排查请求形态是否符合预期。""" diff --git a/routes/common.py b/routes/common.py index 654900e..0ad7518 100644 --- a/routes/common.py +++ b/routes/common.py @@ -12,6 +12,7 @@ import logging from typing import Any import settings +from utils.http import build_anthropic_headers, build_gemini_headers, build_openai_headers logger = logging.getLogger(__name__) @@ -54,6 +55,42 @@ def build_route_context(client_model: str, is_stream: bool) -> RouteContext: ) +def build_openai_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: + """根据路由上下文生成 OpenAI 兼容后端的地址和请求头。""" + url = f'{ctx.target_url.rstrip("/")}/v1/chat/completions' + headers = build_openai_headers(ctx.api_key) + return url, headers + + +def build_responses_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: + """根据路由上下文生成 OpenAI Responses 后端的地址和请求头。""" + url = f'{ctx.target_url.rstrip("/")}/v1/responses' + headers = build_openai_headers(ctx.api_key) + return url, headers + + +def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]: + """根据路由上下文生成 Anthropic 后端的地址和请求头。""" + url = f'{ctx.target_url.rstrip("/")}/v1/messages' + headers = build_anthropic_headers(ctx.api_key) + return url, headers + + +def build_gemini_target(ctx: RouteContext, stream: bool = False) -> tuple[str, dict[str, str]]: + """根据路由上下文生成 Gemini 后端的地址和请求头。 + + Gemini URL 格式: {base}/v1/models/{model}:generateContent + 流式: {base}/v1/models/{model}:streamGenerateContent?alt=sse + """ + base = ctx.target_url.rstrip('/') + model = ctx.upstream_model + if stream: + url = f'{base}/v1/models/{model}:streamGenerateContent?alt=sse' + else: + url = f'{base}/v1/models/{model}:generateContent' + headers = build_gemini_headers(ctx.api_key) + return url, headers + def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None: """统一输出路由级日志,避免不同入口的日志格式逐渐漂移。""" @@ -100,6 +137,11 @@ def sse_event_message(event_type: str, data: Any) -> str: return f'event: {event_type}\ndata: {payload}\n\n' +def chat_error_chunk(message: str, error_type: str = 'upstream_error') -> str: + """构造聊天补全流式接口使用的错误消息。""" + return sse_data_message({'error': {'message': message, 'type': error_type}}) + + def responses_error_event(message: str) -> str: """构造 Responses 流式接口使用的错误事件。""" return sse_event_message('error', {'error': message}) @@ -173,20 +215,6 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po return payload -def should_inject_thinking(backend: str) -> bool: - """判断当前后端是否需要注入历史 thinking。 - - 仅对明确能消费历史 reasoning/thinking 的后端启用: - - anthropic - - gemini - - responses - - OpenAI Chat 兼容后端通常不接受 `reasoning_content` 历史字段, - 若注入会导致上游报错,因此显式排除。 - """ - return backend in ('anthropic', 'gemini', 'responses') - - # ─── Body / Header 修改 ────────────────────────── @@ -220,140 +248,3 @@ def apply_header_modifications(headers: dict[str, str], modifications: dict[str, headers[key] = str(value) logger.info('已应用 header_modifications: %s', list(modifications.keys())) return headers - - -# ═══════════════════════════════════════════════════════════ -# 后端注册表 + ClientFormatter 实现 -# ═══════════════════════════════════════════════════════════ - - -def get_outbound(backend: str): - """根据后端类型获取对应的 OutboundTransformer 实例。""" - from adapters.cc_anthropic_adapter import AnthropicOutbound - from adapters.cc_gemini_adapter import GeminiOutbound - from adapters.openai_compat_fixer import OpenAIChatOutbound - from adapters.responses_cc_adapter import ResponsesOutbound - - registry = { - 'openai': OpenAIChatOutbound, - 'anthropic': AnthropicOutbound, - 'gemini': GeminiOutbound, - 'responses': ResponsesOutbound, - } - cls = registry.get(backend, OpenAIChatOutbound) - return cls() - - -class CCClientFormatter: - """Chat Completions 客户端格式化器。 - - 将通用处理结果格式化为 OpenAI Chat Completions 格式, - 供 /v1/chat/completions 端点使用。 - """ - - def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]: - cc_response['model'] = model - return cc_response - - def wrap_stream_item(self, item: Any) -> str: - payload = item if isinstance(item, str) else json.dumps(item, ensure_ascii=False) - return f'data: {payload}\n\n' - - def format_error(self, message: str) -> str: - return sse_data_message({'error': {'message': message, 'type': 'upstream_error'}}) - - def format_done(self) -> str | None: - return sse_data_message('[DONE]') - - def start_events(self) -> list[str]: - return [] - - @property - def usage_input_key(self) -> str: - return 'prompt_tokens' - - @property - def usage_output_key(self) -> str: - return 'completion_tokens' - - -class ResponsesClientFormatter: - """Responses API 客户端格式化器。 - - 将通用处理结果格式化为 OpenAI Responses 格式, - 供 /v1/responses 端点使用。 - - 流式场景使用 ResponsesStreamConverter 做 CC chunk → Responses SSE 转换。 - """ - - def __init__(self, model: str = ''): - from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses - self._model = model - self._converter = ResponsesStreamConverter(model=model) - self._cc_to_responses = cc_to_responses - - def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]: - return self._cc_to_responses(cc_response, model) - - def wrap_stream_item(self, item: Any) -> str: - if isinstance(item, str): - return item - events = self._converter.process_cc_chunk(item) - return ''.join(events) - - def format_error(self, message: str) -> str: - return responses_error_event(message) - - def format_done(self) -> str | None: - events = self._converter.finalize() - return ''.join(events) if events else None - - def start_events(self) -> list[str]: - return self._converter.start_events() - - @property - def usage_input_key(self) -> str: - return 'input_tokens' - - @property - def usage_output_key(self) -> str: - return 'output_tokens' - - -class ResponsesPassthroughFormatter: - """Responses 透传格式化器。 - - 当后端本身就是 Responses 格式时使用,做轻量模型名改写。 - """ - - def __init__(self, model: str = ''): - self._model = model - - def format_response(self, response_data: dict[str, Any], model: str) -> dict[str, Any]: - response_data['model'] = model - return response_data - - def wrap_stream_item(self, item: Any) -> str: - if isinstance(item, str): - return item - event_type = item.pop('_sse_event_type', None) - if event_type: - return f'event: {event_type}\ndata: {json.dumps(item, ensure_ascii=False)}\n\n' - return f'data: {json.dumps(item, ensure_ascii=False)}\n\n' - - def format_error(self, message: str) -> str: - return responses_error_event(message) - - def format_done(self) -> str | None: - return None - - def start_events(self) -> list[str]: - return [] - - @property - def usage_input_key(self) -> str: - return 'input_tokens' - - @property - def usage_output_key(self) -> str: - return 'output_tokens' diff --git a/routes/responses.py b/routes/responses.py index ce1de4b..4889a40 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -1,7 +1,7 @@ """路由: /v1/responses 处理 Cursor 对 GPT、Claude-Opus 等模型发出的 Responses API 请求。 -请求先转换为 Chat Completions 中间表示,再通过统一出站转换器分发。 +请求会先转换为 Chat Completions 中间表示,再按后端类型分发,最后转换回 Responses 格式。 """ from __future__ import annotations @@ -13,31 +13,62 @@ from typing import Any import settings from flask import Blueprint, jsonify, request -from adapters.openai_compat_fixer import normalize_request -from adapters.responses_cc_adapter import ( - AnthropicOutboundForResponses, - ResponsesNativeOutbound, - responses_to_cc, -) -from adapters.unified import handle_non_stream, handle_stream +from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response +from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response +from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request +from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc +from config import Config from routes.common import ( - ResponsesClientFormatter, - ResponsesPassthroughFormatter, + RouteContext, + apply_body_modifications, + apply_header_modifications, + build_anthropic_target, + build_gemini_target, + build_openai_target, + build_responses_target, build_route_context, - get_outbound, + inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, log_route_context, - should_inject_thinking, + log_usage, + responses_error_event, ) -from utils.request_logger import start_turn +from utils.http import ( + forward_request, + gen_id, + iter_anthropic_sse, + iter_gemini_sse, + iter_openai_sse, + iter_responses_sse, + sse_response, +) +from utils.request_logger import ( + append_client_event, + append_upstream_event, + attach_client_response, + attach_error, + attach_upstream_request, + attach_upstream_response, + finalize_turn, + set_stream_summary, + start_turn, +) +from utils.think_tag import ThinkTagExtractor from utils.thinking_cache import thinking_cache +from utils.usage_tracker import usage_tracker logger = logging.getLogger(__name__) bp = Blueprint('responses', __name__) +def _dbg(message: str) -> None: + """仅在调试模式下输出详细日志。""" + if settings.get_debug_mode() in ('simple', 'verbose'): + logger.info('[响应生成调试] %s', message) + + @bp.route('/v1/responses', methods=['POST']) def responses_endpoint(): """处理 Responses 请求并按模型映射分发。""" @@ -59,43 +90,543 @@ def responses_endpoint(): ) log_route_context('响应生成', ctx) - if ctx.backend == 'responses': - return _handle_native_responses(ctx, payload, turn) - cc_payload = _build_cc_payload(payload, ctx) - if ctx.backend == 'anthropic': - outbound = AnthropicOutboundForResponses() - else: - outbound = get_outbound(ctx.backend) + if ctx.backend == 'openai': + return _handle_openai_backend(ctx, cc_payload, turn) + if ctx.backend == 'responses': + return _handle_responses_backend(ctx, payload, turn) + if ctx.backend == 'gemini': + return _handle_gemini_backend(ctx, cc_payload, turn) + return _handle_anthropic_backend(ctx, cc_payload, turn) - client_fmt = ResponsesClientFormatter(model=ctx.client_model) + +def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]: + """将 Responses 请求统一降级为 Chat Completions 中间表示。 + + 这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端,都能复用一套 + 中间协议,避免在路由层同时维护两套完全不同的请求编排逻辑。 + """ + cc_payload = responses_to_cc(payload) + cc_payload['model'] = ctx.upstream_model + cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) + cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) + _dbg( + '已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys())) + + f' 消息数={len(cc_payload.get("messages", []))}' + ) + return cc_payload + + +def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]): + """处理走 OpenAI 兼容后端的 Responses 请求。""" + cc_payload = normalize_request(cc_payload) + _dbg( + f'标准化完成:模型={cc_payload.get("model")} ' + f'工具数={len(cc_payload.get("tools", []))}' + ) + + url, headers = build_openai_target(ctx) + cc_payload = apply_body_modifications(cc_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return handle_stream(ctx, outbound, client_fmt, cc_payload, turn) - return handle_non_stream(ctx, outbound, client_fmt, cc_payload, turn) + return _handle_openai_stream(ctx, cc_payload, url, headers, turn) + return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn) -def _handle_native_responses(ctx, payload: dict[str, Any], turn: dict[str, Any]): - """处理走原生 Responses 后端的请求(直接透传)。""" +def _handle_openai_non_stream( + ctx: RouteContext, + cc_payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any], +): + """处理 OpenAI 兼容后端的非流式 Responses 返回。""" + cc_payload['stream'] = False + attach_upstream_request(turn, cc_payload, headers) + resp, err = forward_request(url, headers, cc_payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + fixed = fix_response(raw) + response_data = cc_to_responses(fixed, ctx.client_model) + return _finalize_responses_response( + response_data, + client_model=ctx.client_model, + turn=turn, + debug_label='转换为 Responses 后', + ) + + +def _handle_openai_stream( + ctx: RouteContext, + cc_payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 OpenAI 兼容后端的流式 Responses 返回。""" + cc_payload['stream'] = True + converter = ResponsesStreamConverter(model=ctx.client_model) + + def generate(): + """消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。""" + yield from converter.start_events() + + attach_upstream_request(turn, cc_payload, headers) + resp, err = forward_request(url, headers, cc_payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield responses_error_event(str(err)) + return + + think_extractor = ThinkTagExtractor() + chunk_count = 0 + client_events: list[str] = [] + + for chunk in iter_openai_sse(resp): + if chunk is None: + _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') + finalized_events = converter.finalize() + for item in finalized_events: + client_events.append(item) + append_client_event(turn, {'type': 'responses_event', 'data': item}) + yield item + usage_tracker.record(ctx.client_model) + set_stream_summary(turn, { + 'chunk_count': chunk_count, + 'client_event_count': len(client_events), + }) + attach_client_response(turn, { + 'type': 'responses.stream.summary', + 'model': ctx.client_model, + 'event_count': len(client_events), + }) + finalize_turn(turn) + return + + append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) + if chunk_count < 10: + _dbg( + f'上游原始片段#{chunk_count}=' + + json.dumps(chunk, ensure_ascii=False, default=str)[:500] + ) + + chunk = fix_stream_chunk(chunk) + for out in think_extractor.process_chunk(chunk): + for evt in converter.process_cc_chunk(out): + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + if chunk_count < 10: + _dbg( + f'转换后片段#{chunk_count}=' + + json.dumps(out, ensure_ascii=False, default=str)[:500] + ) + yield evt + + chunk_count += 1 + + return sse_response(generate()) + + +def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走原生 Responses 后端的请求。 + + 当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议, + 直接转发原生 Responses 请求即可。 + """ payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) - - outbound = ResponsesNativeOutbound() - client_fmt = ResponsesPassthroughFormatter(model=ctx.client_model) + url, headers = build_responses_target(ctx) + payload = apply_body_modifications(payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return handle_stream(ctx, outbound, client_fmt, payload, turn) - return handle_non_stream(ctx, outbound, client_fmt, payload, turn) + return _handle_responses_stream(ctx, payload, url, headers, turn) + return _handle_responses_non_stream(ctx, payload, url, headers, turn) -def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]: - """将 Responses 请求统一降级为 Chat Completions 中间表示。""" - cc_payload = responses_to_cc(payload) - cc_payload['model'] = ctx.upstream_model - cc_payload = normalize_request(cc_payload) - if should_inject_thinking(ctx.backend): - cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', [])) - cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position) - return cc_payload +def _handle_responses_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理原生 Responses 后端的非流式返回。""" + payload['stream'] = False + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + response_data = resp.json() + attach_upstream_response(turn, response_data) + response_data['model'] = ctx.client_model + return _finalize_responses_response( + response_data, + client_model=ctx.client_model, + turn=turn, + debug_label='原生 Responses 返回后', + ) + + +def _handle_responses_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理原生 Responses 后端的流式返回。""" + payload['stream'] = True + converter = ResponsesStreamConverter(model=ctx.client_model) + + def generate(): + """透传上游原生 Responses 流,并做轻量模型名改写。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield responses_error_event(str(err)) + return + + event_count = 0 + client_events: list[str] = [] + last_usage: dict[str, Any] | None = None + for event_type, event_data in iter_responses_sse(resp): + append_upstream_event(turn, {'type': event_type, 'data': event_data}) + extracted_usage = _extract_responses_usage(event_data) + if extracted_usage: + last_usage = extracted_usage + if event_count < 10: + _dbg( + f'上游事件#{event_count} 类型={event_type} 数据=' + + json.dumps(event_data, ensure_ascii=False, default=str)[:500] + ) + produced = converter.process_responses_event(event_type, event_data) + for evt in produced: + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + yield evt + event_count += 1 + + _dbg(f'流式响应结束,共 {event_count} 个事件') + usage_tracker.record( + ctx.client_model, + last_usage, + input_key='input_tokens', + output_key='output_tokens', + ) + set_stream_summary(turn, { + 'event_count': event_count, + 'client_event_count': len(client_events), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'responses.stream.summary', + 'model': ctx.client_model, + 'event_count': len(client_events), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None: + """从原生 Responses 事件中提取 usage。 + + 原生 `/v1/responses` 流式通常会在 `response.completed` 事件里携带 usage, + 也可能直接挂在顶层 `usage` 字段。这里统一做兼容提取,供统计与日志复用。 + """ + if not isinstance(event_data, dict): + return None + usage = event_data.get('usage') + if isinstance(usage, dict): + return usage + response_obj = event_data.get('response') + if isinstance(response_obj, dict): + nested_usage = response_obj.get('usage') + if isinstance(nested_usage, dict): + return nested_usage + return None + + +def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走 Gemini Contents 后端的 Responses 请求。""" + gemini_payload = cc_to_gemini_request(cc_payload) + _dbg( + '已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys())) + + f' 内容数={len(gemini_payload.get("contents", []))}' + ) + + url, headers = build_gemini_target(ctx, stream=ctx.is_stream) + gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) + return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) + + +def _handle_gemini_non_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Gemini 后端的非流式 Responses 返回。""" + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + cc_data = gemini_to_cc_response(raw) + response_data = cc_to_responses(cc_data, ctx.client_model) + return _finalize_responses_response( + response_data, + client_model=ctx.client_model, + turn=turn, + debug_label='Gemini 转回 Responses 后', + ) + + +def _handle_gemini_stream( + ctx: RouteContext, + payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Gemini 后端的流式 Responses 返回。""" + converter = ResponsesStreamConverter(model=ctx.client_model) + gemini_converter = GeminiStreamConverter() + + def generate(): + yield from converter.start_events() + + attach_upstream_request(turn, payload, headers) + resp, err = forward_request(url, headers, payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield responses_error_event(str(err)) + return + + chunk_count = 0 + client_events: list[str] = [] + last_usage: dict[str, Any] | None = None + for gemini_chunk in iter_gemini_sse(resp): + append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk}) + usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None + if isinstance(usage_meta, dict): + last_usage = { + 'input_tokens': usage_meta.get('promptTokenCount', 0), + 'output_tokens': usage_meta.get('candidatesTokenCount', 0), + 'total_tokens': usage_meta.get('totalTokenCount', 0), + } + if chunk_count < 10: + _dbg( + f'上游 Gemini 片段#{chunk_count}=' + + json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500] + ) + + for cc_chunk in gemini_converter.process_chunk(gemini_chunk): + for evt in converter.process_cc_chunk(cc_chunk): + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + yield evt + + chunk_count += 1 + + _dbg(f'流式响应结束,共 {chunk_count} 个数据片段') + finalized_events = converter.finalize() + for evt in finalized_events: + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + yield evt + usage_tracker.record( + ctx.client_model, + last_usage, + input_key='input_tokens', + output_key='output_tokens', + ) + set_stream_summary(turn, { + 'chunk_count': chunk_count, + 'client_event_count': len(client_events), + 'usage': last_usage, + }) + attach_client_response(turn, { + 'type': 'responses.stream.summary', + 'model': ctx.client_model, + 'event_count': len(client_events), + 'usage': last_usage, + }) + finalize_turn(turn, usage=last_usage) + + return sse_response(generate()) + + +def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): + """处理走 Anthropic 后端的 Responses 请求。""" + anthropic_payload = cc_to_messages_request(cc_payload) + _dbg( + '已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys())) + + f' 消息数={len(anthropic_payload.get("messages", []))}' + ) + + url, headers = build_anthropic_target(ctx) + anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications) + headers = apply_header_modifications(headers, ctx.header_modifications) + + if ctx.is_stream: + return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) + return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) + + +def _handle_anthropic_non_stream( + ctx: RouteContext, + anthropic_payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Anthropic 后端的非流式 Responses 返回。""" + anthropic_payload['stream'] = False + attach_upstream_request(turn, anthropic_payload, headers) + resp, err = forward_request(url, headers, anthropic_payload) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'}) + finalize_turn(turn) + return err + + raw = resp.json() + attach_upstream_response(turn, raw) + _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) + + cc_data = messages_to_cc_response(raw) + response_data = cc_to_responses(cc_data, ctx.client_model) + return _finalize_responses_response( + response_data, + client_model=ctx.client_model, + turn=turn, + debug_label='Messages 转回 Responses 后', + ) + + +def _handle_anthropic_stream( + ctx: RouteContext, + anthropic_payload: dict[str, Any], + url: str, + headers: dict[str, str], + turn: dict[str, Any] | None, +): + """处理 Anthropic 后端的流式 Responses 返回。 + + 这里直接将 Anthropic SSE 事件映射到 Responses SSE,故意跳过 CC 流式中间态, + 这样可以减少一次事件重组,降低流式转换复杂度,也更容易保留原始时序。 + """ + anthropic_payload['stream'] = True + converter = ResponsesStreamConverter(model=ctx.client_model) + + def generate(): + """消费 Anthropic SSE,并直接映射为 Responses 事件序列。""" + yield from converter.start_events() + + attach_upstream_request(turn, anthropic_payload, headers) + resp, err = forward_request(url, headers, anthropic_payload, stream=True) + if err: + attach_error(turn, {'stage': 'forward_request', 'message': str(err)}) + set_stream_summary(turn, {'status': 'error'}) + finalize_turn(turn) + yield responses_error_event(str(err)) + return + + event_count = 0 + client_events: list[str] = [] + for event_type, event_data in iter_anthropic_sse(resp): + append_upstream_event(turn, {'type': event_type, 'data': event_data}) + if event_count < 10: + _dbg( + f'上游事件#{event_count} 类型={event_type} 数据=' + + json.dumps(event_data, ensure_ascii=False, default=str)[:500] + ) + + produced = converter.process_anthropic_event(event_type, event_data) + for evt in produced: + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + yield evt + event_count += 1 + + _dbg(f'流式响应结束,共 {event_count} 个事件') + finalized_events = converter.finalize() + for evt in finalized_events: + client_events.append(evt) + append_client_event(turn, {'type': 'responses_event', 'data': evt}) + yield evt + usage_tracker.record(ctx.client_model) + set_stream_summary(turn, { + 'event_count': event_count, + 'client_event_count': len(client_events), + }) + attach_client_response(turn, { + 'type': 'responses.stream.summary', + 'model': ctx.client_model, + 'event_count': len(client_events), + }) + finalize_turn(turn) + + return sse_response(generate()) + + +def _finalize_responses_response( + response_data: dict[str, Any], + *, + client_model: str, + turn: dict[str, Any], + debug_label: str, +): + """统一收尾非流式 Responses 响应。 + + 两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象,因此这里集中 + 处理调试日志、回填展示模型名以及 usage 日志。 + """ + response_data['model'] = response_data.get('model') or '' + _dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000]) + log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens') + + usage_tracker.record( + client_model, + response_data.get('usage'), + input_key='input_tokens', + output_key='output_tokens', + ) + + attach_client_response(turn, response_data) + finalize_turn(turn, usage=response_data.get('usage')) + + return jsonify(response_data) From cb7350b100b220d726d2b7342a64cbbee4800aa0 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 5 Apr 2026 22:13:30 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=E6=9B=B4=E6=96=B0docker-compose=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E5=90=8D=E7=A7=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compose.yml => docker-compose.yml | 1 + 1 file changed, 1 insertion(+) rename compose.yml => docker-compose.yml (92%) diff --git a/compose.yml b/docker-compose.yml similarity index 92% rename from compose.yml rename to docker-compose.yml index 944b836..3d657de 100644 --- a/compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: api2cursor: build: . + container_name: api2cursor ports: - "${PROXY_PORT:-3029}:${PROXY_PORT:-3029}" env_file: From 251437a76085591f53f0f77c774b8fa009722d8d Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:14:06 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=E5=B0=9D=E8=AF=95=E4=BF=AE=E5=A4=8D/v1/r?= =?UTF-8?q?esponses=E5=90=8E=E7=AB=AF=E6=B2=A1=E6=9C=89=E5=91=BD=E4=B8=AD?= =?UTF-8?q?=E7=BC=93=E5=AD=98=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- routes/chat.py | 2 ++ routes/common.py | 18 ++++++++++++++++++ routes/responses.py | 2 ++ 3 files changed, 22 insertions(+) diff --git a/routes/chat.py b/routes/chat.py index be4f775..66e1e67 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -42,6 +42,7 @@ from routes.common import ( build_responses_target, build_route_context, chat_error_chunk, + ensure_prompt_cache_key, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -312,6 +313,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload = cc_to_responses_request(payload) responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) + responses_payload = ensure_prompt_cache_key(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' diff --git a/routes/common.py b/routes/common.py index 0ad7518..9f57233 100644 --- a/routes/common.py +++ b/routes/common.py @@ -7,6 +7,7 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复 from __future__ import annotations from dataclasses import dataclass +import hashlib import json import logging from typing import Any @@ -218,6 +219,23 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po # ─── Body / Header 修改 ────────────────────────── +def ensure_prompt_cache_key(payload: dict[str, Any]) -> dict[str, Any]: + """确保 Responses 请求携带 prompt_cache_key 以启用上游提示缓存。 + + 上游(如 sub2api)对原生 /v1/responses 请求不会自动生成 prompt_cache_key, + 导致提示缓存无法命中。这里根据模型名 + instructions 生成稳定的 cache key, + 使得相同模型和系统提示的对话可以共享缓存前缀。 + """ + if payload.get('prompt_cache_key'): + return payload + + model = payload.get('model', '') + instructions = payload.get('instructions', '') + seed = f'{model}|{instructions}' + payload['prompt_cache_key'] = hashlib.sha256(seed.encode()).hexdigest()[:32] + return payload + + def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]: """对转发请求体应用字段级修改。 diff --git a/routes/responses.py b/routes/responses.py index 4889a40..2496a4b 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -27,6 +27,7 @@ from routes.common import ( build_openai_target, build_responses_target, build_route_context, + ensure_prompt_cache_key, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) + payload = ensure_prompt_cache_key(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications) From bec7b3e5ef973c7f6fe27eeaf3d16c81ae72592b Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Wed, 29 Apr 2026 11:14:00 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=BC=93=E5=AD=98?= =?UTF-8?q?=E5=91=BD=E4=B8=AD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/cc_anthropic_adapter.py | 35 +++++++------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/adapters/cc_anthropic_adapter.py b/adapters/cc_anthropic_adapter.py index 7848d05..95aced6 100644 --- a/adapters/cc_anthropic_adapter.py +++ b/adapters/cc_anthropic_adapter.py @@ -582,38 +582,16 @@ _EPHEMERAL = {'type': 'ephemeral'} def optimize_cache_control(request: JsonDict) -> None: - """自动设置最优的 Anthropic cache_control 断点。 + """为 Anthropic Messages 请求启用顶层自动 prompt caching。 - 算法移植自 CursorProxy 的 ensure_cache_control.go: - 1. 归一化所有消息 content 为数组格式 - 2. 清空所有已有 cache_control - 3. 注入结构锚点(tools 末尾 + system 末尾) - 4. 注入消息锚点(最后一个可缓存块 + 窗口边界) - 5. 总断点数不超过 4 个 + 2026 版 Claude API 已支持在请求顶层使用 `cache_control` 开启自动缓存, + 由上游自动把断点放到最后一个可缓存块并随多轮对话前移。相比手动在嵌套 + content blocks 上打断点,这种方式对 Anthropic 兼容中转站更稳定,也更接近 + `/v1/responses` 通过顶层字段启用缓存的思路。 """ _normalize_message_contents(request) _clear_all_cache_controls(request) - - structural = _inject_structural_anchors(request) - remaining = _MAX_BREAKPOINTS - structural - if remaining <= 0: - return - - refs = _collect_cacheable_block_refs(request) - if not refs: - return - - desired = 1 if len(refs) < _BLOCK_WINDOW else 2 - anchors = min(desired, remaining) - - if anchors >= 1 and refs: - refs[-1]['cache_control'] = _EPHEMERAL - - if anchors >= 2 and len(refs) > 1: - target = len(refs) - _BLOCK_WINDOW - idx = _pick_window_anchor(refs, target) - if idx is not None and idx != len(refs) - 1: - refs[idx]['cache_control'] = _EPHEMERAL + request['cache_control'] = dict(_EPHEMERAL) def _normalize_message_contents(request: JsonDict) -> None: @@ -628,6 +606,7 @@ def _normalize_message_contents(request: JsonDict) -> None: def _clear_all_cache_controls(request: JsonDict) -> None: """清空所有已有的 cache_control 字段。""" + request.pop('cache_control', None) for tool in request.get('tools', []): tool.pop('cache_control', None) From e373295cf5d44715e829dc8e4d872afb189243a8 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 May 2026 13:42:35 +0800 Subject: [PATCH 09/10] add admin log --- routes/admin.py | 10 +++ routes/chat.py | 182 +++++++++++++++++++++++++++++++++------ routes/messages.py | 29 ++++++- routes/responses.py | 146 ++++++++++++++++++++++++++----- static/admin.css | 8 ++ static/admin.html | 10 +++ static/admin.js | 50 +++++++++++ utils/request_history.py | 111 ++++++++++++++++++++++++ 8 files changed, 495 insertions(+), 51 deletions(-) create mode 100644 utils/request_history.py diff --git a/routes/admin.py b/routes/admin.py index e8a9e77..612ee89 100644 --- a/routes/admin.py +++ b/routes/admin.py @@ -13,6 +13,7 @@ from flask import Blueprint, request, jsonify, send_from_directory import settings from config import Config +from utils.request_history import request_history logger = logging.getLogger(__name__) @@ -202,6 +203,15 @@ def get_stats(): return jsonify(usage_tracker.get_stats()) +@bp.route('/api/admin/request-logs', methods=['GET']) +def get_request_logs(): + """返回最近 500 条请求日志。""" + err = _check_auth() + if err: + return err + return jsonify({'items': request_history.get_recent(500)}) + + # ─── 内部辅助 ───────────────────────────────────── diff --git a/routes/chat.py b/routes/chat.py index 66e1e67..9532ed0 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -9,6 +9,7 @@ from __future__ import annotations import json import logging +from time import perf_counter from typing import Any import settings @@ -59,6 +60,7 @@ from utils.http import ( iter_responses_sse, sse_response, ) +from utils.request_history import request_history from utils.request_logger import ( append_client_event, append_upstream_event, @@ -113,6 +115,7 @@ def chat_completions(): client_model = payload.get('model', 'unknown') is_stream = payload.get('stream', False) ctx = build_route_context(client_model, is_stream) + request_started_at = perf_counter() turn = start_turn( route='chat', client_model=client_model, @@ -132,12 +135,12 @@ def chat_completions(): payload['messages'] = thinking_cache.inject(payload.get('messages', [])) if ctx.backend == 'openai': - return _handle_openai_backend(ctx, payload, turn) + return _handle_openai_backend(ctx, payload, turn, request_started_at) if ctx.backend == 'responses': - return _handle_responses_backend(ctx, payload, turn) + return _handle_responses_backend(ctx, payload, turn, request_started_at) if ctx.backend == 'gemini': - return _handle_gemini_backend(ctx, payload, turn) - return _handle_anthropic_backend(ctx, payload, turn) + return _handle_gemini_backend(ctx, payload, turn, request_started_at) + return _handle_anthropic_backend(ctx, payload, turn, request_started_at) def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]: @@ -158,7 +161,12 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in return payload, message_count -def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]): +def _handle_openai_backend( + ctx: RouteContext, + payload: dict[str, Any], + turn: dict[str, Any], + request_started_at: float, +): """处理走 OpenAI 兼容后端的聊天补全请求。""" _dbg( '原始请求字段=' + str(list(payload.keys())) + ' ' @@ -182,8 +190,8 @@ def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dic headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_openai_stream(ctx, payload, url, headers, turn) - return _handle_openai_non_stream(ctx, payload, url, headers, turn) + return _handle_openai_stream(ctx, payload, url, headers, turn, request_started_at) + return _handle_openai_non_stream(ctx, payload, url, headers, turn, request_started_at) def _handle_openai_non_stream( @@ -192,6 +200,7 @@ def _handle_openai_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any], + request_started_at: float, ): """处理 OpenAI 兼容后端的非流式返回。""" payload['stream'] = False @@ -207,7 +216,14 @@ def _handle_openai_non_stream( _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) data = fix_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应') + return _finalize_chat_response( + ctx, + data, + turn=turn, + debug_label='修复后响应', + request_started_at=request_started_at, + upstream_url=url, + ) def _handle_openai_stream( @@ -216,6 +232,7 @@ def _handle_openai_stream( url: str, headers: dict[str, str], turn: dict[str, Any], + request_started_at: float, ): """处理 OpenAI 兼容后端的流式返回。""" payload['stream'] = True @@ -258,7 +275,18 @@ def _handle_openai_stream( 'chunk_count': len(client_chunks), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) @@ -299,12 +327,28 @@ def _handle_openai_stream( 'chunk_count': len(client_chunks), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) -def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_responses_backend( + ctx: RouteContext, + payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走原生 Responses 后端的聊天补全请求。 当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求, @@ -324,8 +368,8 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_responses_stream(ctx, responses_payload, url, headers, turn) - return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn) + return _handle_responses_stream(ctx, responses_payload, url, headers, turn, request_started_at) + return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn, request_started_at) def _handle_responses_non_stream( @@ -334,6 +378,7 @@ def _handle_responses_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理原生 Responses 后端的非流式返回。""" payload['stream'] = False @@ -349,7 +394,14 @@ def _handle_responses_non_stream( _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) data = responses_to_cc_response(raw, ctx.client_model) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后') + return _finalize_chat_response( + ctx, + data, + turn=turn, + debug_label='Responses 转回聊天补全后', + request_started_at=request_started_at, + upstream_url=url, + ) def _handle_responses_stream( @@ -358,6 +410,7 @@ def _handle_responses_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理原生 Responses 后端的流式返回。""" payload['stream'] = True @@ -421,12 +474,28 @@ def _handle_responses_stream( 'chunk_count': len(client_chunks), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) -def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_gemini_backend( + ctx: RouteContext, + payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走 Gemini Contents 后端的聊天补全请求。""" payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position) gemini_payload = cc_to_gemini_request(payload) @@ -440,8 +509,8 @@ def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dic headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) - return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) + return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn, request_started_at) + return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn, request_started_at) def _handle_gemini_non_stream( @@ -450,6 +519,7 @@ def _handle_gemini_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Gemini 后端的非流式返回。""" attach_upstream_request(turn, payload, headers) @@ -464,7 +534,14 @@ def _handle_gemini_non_stream( _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) data = gemini_to_cc_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后') + return _finalize_chat_response( + ctx, + data, + turn=turn, + debug_label='Gemini 转回聊天补全后', + request_started_at=request_started_at, + upstream_url=url, + ) def _handle_gemini_stream( @@ -473,6 +550,7 @@ def _handle_gemini_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Gemini 后端的流式返回。""" converter = GeminiStreamConverter() @@ -535,12 +613,28 @@ def _handle_gemini_stream( 'chunk_count': len(client_chunks), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) -def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_anthropic_backend( + ctx: RouteContext, + payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走 Anthropic Messages 后端的聊天补全请求。""" payload['model'] = ctx.upstream_model anthropic_payload = cc_to_messages_request(payload) @@ -555,8 +649,8 @@ def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) - return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) + return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn, request_started_at) + return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn, request_started_at) def _handle_anthropic_non_stream( @@ -565,6 +659,7 @@ def _handle_anthropic_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Anthropic 后端的非流式返回。""" payload['stream'] = False @@ -580,7 +675,14 @@ def _handle_anthropic_non_stream( _dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000]) data = messages_to_cc_response(raw) - return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后') + return _finalize_chat_response( + ctx, + data, + turn=turn, + debug_label='Messages 转回聊天补全后', + request_started_at=request_started_at, + upstream_url=url, + ) def _handle_anthropic_stream( @@ -589,6 +691,7 @@ def _handle_anthropic_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Anthropic 后端的流式返回。 @@ -673,7 +776,18 @@ def _handle_anthropic_stream( 'chunk_count': len(client_chunks), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) @@ -684,6 +798,8 @@ def _finalize_chat_response( *, turn: dict[str, Any] | None, debug_label: str, + request_started_at: float, + upstream_url: str, ): """统一收尾非流式聊天补全响应。 @@ -696,9 +812,21 @@ def _finalize_chat_response( _dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000]) log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens') - usage_tracker.record(ctx.client_model, data.get('usage')) + usage = data.get('usage') + duration_ms = int((perf_counter() - request_started_at) * 1000) + usage_tracker.record(ctx.client_model, usage) + request_history.record( + route='chat', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=upstream_url, + usage=usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) attach_client_response(turn, data) - finalize_turn(turn, usage=data.get('usage')) + finalize_turn(turn, usage=usage, duration_ms=duration_ms) for choice in data.get('choices', []): msg = choice.get('message', {}) diff --git a/routes/messages.py b/routes/messages.py index 0d9faa5..a320081 100644 --- a/routes/messages.py +++ b/routes/messages.py @@ -7,6 +7,7 @@ Anthropic Messages API 透传。当 Cursor 直接发送 Anthropic 格式请求 import json import logging +from time import perf_counter import requests as req_lib from flask import Blueprint, request, jsonify @@ -15,6 +16,7 @@ import settings from config import Config from routes.common import apply_body_modifications, apply_header_modifications, inject_instructions_anthropic from utils.http import build_anthropic_headers, forward_request, sse_response +from utils.request_history import request_history from utils.request_logger import ( append_client_event, append_upstream_event, @@ -40,6 +42,7 @@ def messages_passthrough(): model = payload.get('model', 'unknown') is_stream = payload.get('stream', False) + request_started_at = perf_counter() logger.info(f'[透传] model={model} 流式={is_stream}') mapping = settings.resolve_model(model) @@ -78,7 +81,18 @@ def messages_passthrough(): attach_upstream_response(turn, data) _inject_thinking(data) attach_client_response(turn, data) - finalize_turn(turn) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='messages', + client_model=model, + actual_model=model, + backend='anthropic', + upstream_url=url, + usage=data.get('usage'), + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=data.get('usage'), duration_ms=duration_ms) return jsonify(data) def generate(): @@ -108,7 +122,18 @@ def messages_passthrough(): 'type': 'messages.stream.summary', 'event_count': len(client_events), }) - finalize_turn(turn) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='messages', + client_model=model, + actual_model=model, + backend='anthropic', + upstream_url=url, + usage=None, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, duration_ms=duration_ms) except req_lib.RequestException as e: logger.error(f'请求上游失败: {e}') attach_error(turn, {'stage': 'request_exception', 'message': str(e)}) diff --git a/routes/responses.py b/routes/responses.py index 2496a4b..271c30f 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -8,6 +8,7 @@ from __future__ import annotations import json import logging +from time import perf_counter from typing import Any import settings @@ -44,6 +45,7 @@ from utils.http import ( iter_responses_sse, sse_response, ) +from utils.request_history import request_history from utils.request_logger import ( append_client_event, append_upstream_event, @@ -78,6 +80,7 @@ def responses_endpoint(): client_model = payload.get('model', 'unknown') is_stream = payload.get('stream', False) + request_started_at = perf_counter() ctx = build_route_context(client_model, is_stream) turn = start_turn( route='responses', @@ -94,12 +97,12 @@ def responses_endpoint(): cc_payload = _build_cc_payload(payload, ctx) if ctx.backend == 'openai': - return _handle_openai_backend(ctx, cc_payload, turn) + return _handle_openai_backend(ctx, cc_payload, turn, request_started_at) if ctx.backend == 'responses': - return _handle_responses_backend(ctx, payload, turn) + return _handle_responses_backend(ctx, payload, turn, request_started_at) if ctx.backend == 'gemini': - return _handle_gemini_backend(ctx, cc_payload, turn) - return _handle_anthropic_backend(ctx, cc_payload, turn) + return _handle_gemini_backend(ctx, cc_payload, turn, request_started_at) + return _handle_anthropic_backend(ctx, cc_payload, turn, request_started_at) def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]: @@ -119,7 +122,12 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A return cc_payload -def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]): +def _handle_openai_backend( + ctx: RouteContext, + cc_payload: dict[str, Any], + turn: dict[str, Any], + request_started_at: float, +): """处理走 OpenAI 兼容后端的 Responses 请求。""" cc_payload = normalize_request(cc_payload) _dbg( @@ -132,8 +140,8 @@ def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_openai_stream(ctx, cc_payload, url, headers, turn) - return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn) + return _handle_openai_stream(ctx, cc_payload, url, headers, turn, request_started_at) + return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn, request_started_at) def _handle_openai_non_stream( @@ -142,6 +150,7 @@ def _handle_openai_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any], + request_started_at: float, ): """处理 OpenAI 兼容后端的非流式 Responses 返回。""" cc_payload['stream'] = False @@ -163,6 +172,9 @@ def _handle_openai_non_stream( client_model=ctx.client_model, turn=turn, debug_label='转换为 Responses 后', + ctx=ctx, + request_started_at=request_started_at, + upstream_url=url, ) @@ -172,6 +184,7 @@ def _handle_openai_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 OpenAI 兼容后端的流式 Responses 返回。""" cc_payload['stream'] = True @@ -212,7 +225,18 @@ def _handle_openai_stream( 'model': ctx.client_model, 'event_count': len(client_events), }) - finalize_turn(turn) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='responses', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=None, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, duration_ms=duration_ms) return append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk}) @@ -239,7 +263,12 @@ def _handle_openai_stream( return sse_response(generate()) -def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_responses_backend( + ctx: RouteContext, + payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走原生 Responses 后端的请求。 当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议, @@ -254,8 +283,8 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_responses_stream(ctx, payload, url, headers, turn) - return _handle_responses_non_stream(ctx, payload, url, headers, turn) + return _handle_responses_stream(ctx, payload, url, headers, turn, request_started_at) + return _handle_responses_non_stream(ctx, payload, url, headers, turn, request_started_at) def _handle_responses_non_stream( @@ -264,6 +293,7 @@ def _handle_responses_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理原生 Responses 后端的非流式返回。""" payload['stream'] = False @@ -282,6 +312,9 @@ def _handle_responses_non_stream( client_model=ctx.client_model, turn=turn, debug_label='原生 Responses 返回后', + ctx=ctx, + request_started_at=request_started_at, + upstream_url=url, ) @@ -291,6 +324,7 @@ def _handle_responses_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理原生 Responses 后端的流式返回。""" payload['stream'] = True @@ -345,7 +379,18 @@ def _handle_responses_stream( 'event_count': len(client_events), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='responses', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) @@ -369,7 +414,12 @@ def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | Non return None -def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_gemini_backend( + ctx: RouteContext, + cc_payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走 Gemini Contents 后端的 Responses 请求。""" gemini_payload = cc_to_gemini_request(cc_payload) _dbg( @@ -382,8 +432,8 @@ def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn) - return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn) + return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn, request_started_at) + return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn, request_started_at) def _handle_gemini_non_stream( @@ -392,6 +442,7 @@ def _handle_gemini_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Gemini 后端的非流式 Responses 返回。""" attach_upstream_request(turn, payload, headers) @@ -412,6 +463,9 @@ def _handle_gemini_non_stream( client_model=ctx.client_model, turn=turn, debug_label='Gemini 转回 Responses 后', + ctx=ctx, + request_started_at=request_started_at, + upstream_url=url, ) @@ -421,6 +475,7 @@ def _handle_gemini_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Gemini 后端的流式 Responses 返回。""" converter = ResponsesStreamConverter(model=ctx.client_model) @@ -487,12 +542,28 @@ def _handle_gemini_stream( 'event_count': len(client_events), 'usage': last_usage, }) - finalize_turn(turn, usage=last_usage) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='responses', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=last_usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, usage=last_usage, duration_ms=duration_ms) return sse_response(generate()) -def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None): +def _handle_anthropic_backend( + ctx: RouteContext, + cc_payload: dict[str, Any], + turn: dict[str, Any] | None, + request_started_at: float, +): """处理走 Anthropic 后端的 Responses 请求。""" anthropic_payload = cc_to_messages_request(cc_payload) _dbg( @@ -505,8 +576,8 @@ def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], tur headers = apply_header_modifications(headers, ctx.header_modifications) if ctx.is_stream: - return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn) - return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn) + return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn, request_started_at) + return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn, request_started_at) def _handle_anthropic_non_stream( @@ -515,6 +586,7 @@ def _handle_anthropic_non_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Anthropic 后端的非流式 Responses 返回。""" anthropic_payload['stream'] = False @@ -536,6 +608,9 @@ def _handle_anthropic_non_stream( client_model=ctx.client_model, turn=turn, debug_label='Messages 转回 Responses 后', + ctx=ctx, + request_started_at=request_started_at, + upstream_url=url, ) @@ -545,6 +620,7 @@ def _handle_anthropic_stream( url: str, headers: dict[str, str], turn: dict[str, Any] | None, + request_started_at: float, ): """处理 Anthropic 后端的流式 Responses 返回。 @@ -600,7 +676,18 @@ def _handle_anthropic_stream( 'model': ctx.client_model, 'event_count': len(client_events), }) - finalize_turn(turn) + duration_ms = int((perf_counter() - request_started_at) * 1000) + request_history.record( + route='responses', + client_model=ctx.client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=url, + usage=None, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) + finalize_turn(turn, duration_ms=duration_ms) return sse_response(generate()) @@ -611,6 +698,9 @@ def _finalize_responses_response( client_model: str, turn: dict[str, Any], debug_label: str, + ctx: RouteContext, + request_started_at: float, + upstream_url: str, ): """统一收尾非流式 Responses 响应。 @@ -621,14 +711,26 @@ def _finalize_responses_response( _dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000]) log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens') + usage = response_data.get('usage') + duration_ms = int((perf_counter() - request_started_at) * 1000) usage_tracker.record( client_model, - response_data.get('usage'), + usage, input_key='input_tokens', output_key='output_tokens', ) + request_history.record( + route='responses', + client_model=client_model, + actual_model=ctx.upstream_model, + backend=ctx.backend, + upstream_url=upstream_url, + usage=usage, + duration_ms=duration_ms, + started_at=(turn or {}).get('started_at'), + ) attach_client_response(turn, response_data) - finalize_turn(turn, usage=response_data.get('usage')) + finalize_turn(turn, usage=usage, duration_ms=duration_ms) return jsonify(response_data) diff --git a/static/admin.css b/static/admin.css index 875bbcb..1824b17 100644 --- a/static/admin.css +++ b/static/admin.css @@ -83,3 +83,11 @@ main{padding:28px 0 60px} .toast-ok{background:#065f46;color:#a7f3d0} .toast-err{background:#7f1d1d;color:#fca5a5} @keyframes slideIn{from{transform:translateX(100px);opacity:0}to{transform:none;opacity:1}} + +.request-logs-wrap{overflow:auto} +.request-logs-table{min-width:1100px} +.request-logs-table td{vertical-align:top} +.log-url{max-width:320px;word-break:break-all;color:var(--muted)} +.log-status{display:inline-flex;align-items:center;padding:2px 8px;border-radius:999px;font-size:12px;font-weight:600} +.status-ok{background:rgba(34,197,94,.15);color:var(--green)} +.status-error{background:rgba(239,68,68,.15);color:var(--red)} diff --git a/static/admin.html b/static/admin.html index 5d382ad..bf79a2e 100644 --- a/static/admin.html +++ b/static/admin.html @@ -90,6 +90,16 @@
加载中…
+ + +
+
+

最近 500 条请求日志

+ +
+
显示请求时间、请求模型、实际上游模型、上游 URL、Token 统计、耗时和状态。
+
加载中…
+
diff --git a/static/admin.js b/static/admin.js index a5d5e6c..67a4679 100644 --- a/static/admin.js +++ b/static/admin.js @@ -72,6 +72,7 @@ async function loadDashboard() { await loadMappings(); checkHealth(); loadStats(); + loadRequestLogs(); } catch (e) { toast('加载设置失败: ' + e.message, false); } @@ -104,6 +105,55 @@ async function loadStats() { } } +async function loadRequestLogs() { + const el = document.getElementById('requestLogsContent'); + try { + const data = await api('/api/admin/request-logs'); + const items = data.items || []; + if (!items.length) { + el.innerHTML = '
暂无请求日志
'; + return; + } + let html = '
'; + for (const item of items) { + const usage = item.usage || {}; + const tokens = '输 ' + fmtNum(usage.input_tokens) + ' / 出 ' + fmtNum(usage.output_tokens) + ' / 总 ' + fmtNum(usage.total_tokens); + const statusClass = item.status === 'ok' ? 'status-ok' : 'status-error'; + const statusText = item.status === 'ok' ? '成功' : '异常'; + html += '' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + ''; + } + html += '
请求时间请求模型实际模型上游 URLTokens耗时状态
' + esc(fmtTime(item.requested_at)) + '' + esc(item.requested_model || '-') + '' + esc(item.actual_model || '-') + '' + esc(item.upstream_url || '-') + '' + esc(tokens) + '' + fmtNum(item.duration_ms) + ' ms' + statusText + '
'; + el.innerHTML = html; + } catch (e) { + el.innerHTML = '
加载请求日志失败
'; + } +} + +function fmtNum(value) { + return Number(value || 0).toLocaleString(); +} + +function fmtTime(value) { + if (!value) return '-'; + const d = new Date(value); + if (Number.isNaN(d.getTime())) return String(value); + const pad = n => String(n).padStart(2, '0'); + return d.getFullYear() + '-' + + pad(d.getMonth() + 1) + '-' + + pad(d.getDate()) + ' ' + + pad(d.getHours()) + ':' + + pad(d.getMinutes()) + ':' + + pad(d.getSeconds()); +} + async function checkHealth() { try { const r = await fetch(API + '/health'); diff --git a/utils/request_history.py b/utils/request_history.py new file mode 100644 index 0000000..e8dde7f --- /dev/null +++ b/utils/request_history.py @@ -0,0 +1,111 @@ +"""请求历史记录。 + +为管理后台提供最近请求查询能力,默认仅保留最近 500 条, +重启后会从磁盘恢复最近一次快照。 +""" + +from __future__ import annotations + +import json +import os +import threading +from collections import deque +from datetime import datetime, timezone +from typing import Any + +from settings import DATA_DIR + +_MAX_RECORDS = 500 +_FILE_PATH = os.path.join(DATA_DIR, 'request_logs.json') + + +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + + +def _safe_int(value: Any) -> int: + try: + return int(value or 0) + except (TypeError, ValueError): + return 0 + + +def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, int]: + usage = usage or {} + input_tokens = _safe_int( + usage.get('prompt_tokens', usage.get('input_tokens', 0)) + ) + output_tokens = _safe_int( + usage.get('completion_tokens', usage.get('output_tokens', 0)) + ) + total_tokens = _safe_int(usage.get('total_tokens', input_tokens + output_tokens)) + return { + 'input_tokens': input_tokens, + 'output_tokens': output_tokens, + 'total_tokens': total_tokens, + } + + +class RequestHistory: + def __init__(self) -> None: + self._lock = threading.Lock() + self._records: deque[dict[str, Any]] = deque(maxlen=_MAX_RECORDS) + self._load() + + def record( + self, + *, + route: str, + client_model: str, + actual_model: str, + backend: str, + upstream_url: str, + usage: dict[str, Any] | None, + duration_ms: int, + started_at: str | None = None, + status: str = 'ok', + error_message: str = '', + ) -> None: + record = { + 'requested_at': started_at or _now_iso(), + 'route': route, + 'requested_model': client_model or '', + 'actual_model': actual_model or '', + 'backend': backend or '', + 'upstream_url': upstream_url or '', + 'duration_ms': max(_safe_int(duration_ms), 0), + 'status': status or 'ok', + 'error_message': error_message or '', + 'usage': _normalize_usage(usage), + 'recorded_at': _now_iso(), + } + with self._lock: + self._records.appendleft(record) + self._persist_locked() + + def get_recent(self, limit: int = _MAX_RECORDS) -> list[dict[str, Any]]: + size = max(1, min(_safe_int(limit), _MAX_RECORDS)) + with self._lock: + return list(self._records)[:size] + + def _load(self) -> None: + if not os.path.exists(_FILE_PATH): + return + try: + with open(_FILE_PATH, 'r', encoding='utf-8') as f: + data = json.load(f) + if not isinstance(data, list): + return + for item in data[:_MAX_RECORDS]: + if isinstance(item, dict): + self._records.append(item) + except (OSError, json.JSONDecodeError): + self._records.clear() + + def _persist_locked(self) -> None: + os.makedirs(DATA_DIR, exist_ok=True) + with open(_FILE_PATH, 'w', encoding='utf-8') as f: + json.dump(list(self._records), f, ensure_ascii=False, indent=2) + + +request_history = RequestHistory() From 4c6bede153457d2ceaf97be9c8c3243d56b4df58 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 5 May 2026 14:30:31 +0800 Subject: [PATCH 10/10] you hua le rizhi de xianshi --- README.md | 12 ++++++++++++ static/admin.css | 2 +- static/admin.html | 4 ++-- static/admin.js | 5 ++++- utils/request_history.py | 14 ++++++++++++++ 5 files changed, 33 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b7b6d4c..6dcea52 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,18 @@ api2cursor/ - `file_path` → `path` 字段映射 - `finish_reason` 修正 + +============================ +增加缓存,在api2cursor里面的body修改中加个你喜欢的随意字段: +{ + "prompt_cache_key": "GPT5-4-xxx-xxx" +} +openai 开 fast 模式 +{ + "service_tier": "priority" +} + + ## 许可证 [MIT](LICENSE) diff --git a/static/admin.css b/static/admin.css index 1824b17..e52b5e9 100644 --- a/static/admin.css +++ b/static/admin.css @@ -9,7 +9,7 @@ body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI','PingFang SC','Micr input,select,button,textarea{font-family:inherit;font-size:inherit} a{color:var(--primary);text-decoration:none} code{background:var(--input);padding:1px 5px;border-radius:4px;font-size:12px;font-family:Consolas,Monaco,monospace} -.container{max-width:960px;margin:0 auto;padding:0 20px} +.container{width:min(100%,1680px);margin:0 auto;padding:0 20px} #login{display:flex;align-items:center;justify-content:center;min-height:100vh;background:linear-gradient(145deg,#0b1120 0%,#121a2e 50%,#0b1120 100%)} .login-card{background:var(--card);border:1px solid var(--border);border-radius:16px;padding:40px;width:380px;box-shadow:0 20px 60px rgba(0,0,0,.4)} diff --git a/static/admin.html b/static/admin.html index bf79a2e..b4a0b85 100644 --- a/static/admin.html +++ b/static/admin.html @@ -4,7 +4,7 @@ API 2 Cursor - 管理面板 - + @@ -190,6 +190,6 @@
- + diff --git a/static/admin.js b/static/admin.js index 67a4679..236c33a 100644 --- a/static/admin.js +++ b/static/admin.js @@ -117,7 +117,10 @@ async function loadRequestLogs() { let html = '
'; for (const item of items) { const usage = item.usage || {}; - const tokens = '输 ' + fmtNum(usage.input_tokens) + ' / 出 ' + fmtNum(usage.output_tokens) + ' / 总 ' + fmtNum(usage.total_tokens); + let tokens = '输 ' + fmtNum(usage.input_tokens) + ' / 出 ' + fmtNum(usage.output_tokens) + ' / 总 ' + fmtNum(usage.total_tokens); + if (Number(usage.cache_read_tokens || 0) > 0 || Number(usage.cache_write_tokens || 0) > 0) { + tokens += ' / 缓存读 ' + fmtNum(usage.cache_read_tokens) + ' / 缓存写 ' + fmtNum(usage.cache_write_tokens); + } const statusClass = item.status === 'ok' ? 'status-ok' : 'status-error'; const statusText = item.status === 'ok' ? '成功' : '异常'; html += '' diff --git a/utils/request_history.py b/utils/request_history.py index e8dde7f..18162c8 100644 --- a/utils/request_history.py +++ b/utils/request_history.py @@ -39,10 +39,24 @@ def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, int]: usage.get('completion_tokens', usage.get('output_tokens', 0)) ) total_tokens = _safe_int(usage.get('total_tokens', input_tokens + output_tokens)) + + prompt_details = usage.get('prompt_tokens_details') + input_details = usage.get('input_tokens_details') + + cache_read_tokens = _safe_int(usage.get('cache_read_input_tokens', 0)) + cache_write_tokens = _safe_int(usage.get('cache_creation_input_tokens', 0)) + + if isinstance(prompt_details, dict): + cache_read_tokens = max(cache_read_tokens, _safe_int(prompt_details.get('cached_tokens', 0))) + if isinstance(input_details, dict): + cache_read_tokens = max(cache_read_tokens, _safe_int(input_details.get('cached_tokens', 0))) + return { 'input_tokens': input_tokens, 'output_tokens': output_tokens, 'total_tokens': total_tokens, + 'cache_read_tokens': cache_read_tokens, + 'cache_write_tokens': cache_write_tokens, }
请求时间请求模型实际模型上游 URLTokens耗时状态