From 98f3ae24a02caf46e6b14094966ee69b65d4a093 Mon Sep 17 00:00:00 2001 From: h88782481 <54714341+h88782481@users.noreply.github.com> Date: Sun, 15 Mar 2026 13:25:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=BC=93=E5=AD=98=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adapters/responses_cc_adapter.py | 45 ++++++++++++++++++++++++++++---- routes/chat.py | 2 ++ routes/common.py | 17 ++++++++++++ routes/responses.py | 20 ++++++++++++++ 4 files changed, 79 insertions(+), 5 deletions(-) diff --git a/adapters/responses_cc_adapter.py b/adapters/responses_cc_adapter.py index 0f35c52..b793dbd 100644 --- a/adapters/responses_cc_adapter.py +++ b/adapters/responses_cc_adapter.py @@ -654,6 +654,10 @@ class ResponsesToCCStreamConverter: 'completion_tokens': self._usage.get('output_tokens', 0), 'total_tokens': self._usage.get('total_tokens', 0), } + if isinstance(self._usage.get('input_tokens_details'), dict): + chunk['usage']['prompt_tokens_details'] = dict(self._usage['input_tokens_details']) + if isinstance(self._usage.get('output_tokens_details'), dict): + chunk['usage']['completion_tokens_details'] = dict(self._usage['output_tokens_details']) return [chunk] def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict: @@ -678,20 +682,44 @@ def _copy_request_options(payload: JsonDict, result: JsonDict) -> None: """将 Responses 请求中的通用选项复制到 CC 请求体。""" if 'tools' in payload: result['tools'] = _convert_tools(payload['tools']) - for key in ('temperature', 'top_p'): + for key in ( + 'temperature', + 'top_p', + 'tool_choice', + 'parallel_tool_calls', + 'truncation', + 'store', + 'metadata', + 'conversation', + 'previous_response_id', + 'prompt_cache_key', + 'service_tier', + 'user', + ): if key in payload: result[key] = payload[key] if 'max_output_tokens' in payload: result['max_tokens'] = payload['max_output_tokens'] - if 'tool_choice' in payload: - result['tool_choice'] = payload['tool_choice'] def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None: """将聊天补全请求中的通用选项复制到原生 Responses 请求体。""" if 'tools' in payload: result['tools'] = _convert_cc_tools_to_responses(payload['tools']) - for key in ('temperature', 'top_p', 'tool_choice'): + for key in ( + 'temperature', + 'top_p', + 'tool_choice', + 'parallel_tool_calls', + 'truncation', + 'store', + 'metadata', + 'conversation', + 'previous_response_id', + 'prompt_cache_key', + 'service_tier', + 'user', + ): if key in payload: result[key] = payload[key] if 'max_tokens' in payload: @@ -914,11 +942,18 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict: def _build_responses_usage(usage: JsonDict) -> JsonDict: """将 Chat Completions 的 usage 字段映射为 Responses usage 结构。""" - return { + result = { 'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'total_tokens': usage.get('total_tokens', 0), } + prompt_details = usage.get('prompt_tokens_details') + if isinstance(prompt_details, dict): + result['input_tokens_details'] = dict(prompt_details) + completion_details = usage.get('completion_tokens_details') + if isinstance(completion_details, dict): + result['output_tokens_details'] = dict(completion_details) + return result def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]: diff --git a/routes/chat.py b/routes/chat.py index 1ca6f81..f8b0f54 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -42,6 +42,7 @@ from routes.common import ( build_responses_target, build_route_context, chat_error_chunk, + ensure_responses_cache_control, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -311,6 +312,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: responses_payload = cc_to_responses_request(payload) responses_payload['model'] = ctx.upstream_model responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position) + responses_payload = ensure_responses_cache_control(responses_payload) _dbg( '已转换为 Responses 请求:字段=' + str(list(responses_payload.keys())) + f' 输入项数={len(responses_payload.get("input", []))}' diff --git a/routes/common.py b/routes/common.py index 0ad7518..3277347 100644 --- a/routes/common.py +++ b/routes/common.py @@ -195,6 +195,23 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po return payload +def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]: + """为 Responses 请求补齐自动 prompt caching 开关。 + + 一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点。 + Cursor 侧通常不会主动携带这个字段,因此这里在缺失时补一个保守的默认值, + 同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭。 + """ + if not isinstance(payload, dict): + return payload + cache_control = payload.get('cache_control') + if isinstance(cache_control, dict) and cache_control.get('type'): + return payload + payload['cache_control'] = {'type': 'ephemeral'} + logger.info('已为 Responses 请求自动启用 cache_control=ephemeral') + return payload + + def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]: """向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。 diff --git a/routes/responses.py b/routes/responses.py index 4889a40..eeb65a3 100644 --- a/routes/responses.py +++ b/routes/responses.py @@ -27,6 +27,7 @@ from routes.common import ( build_openai_target, build_responses_target, build_route_context, + ensure_responses_cache_control, inject_instructions_anthropic, inject_instructions_cc, inject_instructions_responses, @@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: payload = dict(payload) payload['model'] = ctx.upstream_model payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position) + payload = ensure_responses_cache_control(payload) url, headers = build_responses_target(ctx) payload = apply_body_modifications(payload, ctx.body_modifications) headers = apply_header_modifications(headers, ctx.header_modifications) @@ -629,4 +631,22 @@ def _finalize_responses_response( attach_client_response(turn, response_data) finalize_turn(turn, usage=response_data.get('usage')) + output_items = response_data.get('output', []) + if isinstance(output_items, list): + for item in output_items: + if not isinstance(item, dict) or item.get('type') != 'reasoning': + continue + summary = item.get('summary', []) + if not isinstance(summary, list): + continue + reasoning_text = ''.join( + part.get('text', '') + for part in summary + if isinstance(part, dict) and part.get('type') == 'summary_text' + ) + if reasoning_text: + cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', []) + thinking_cache.store_from_response(cc_messages, reasoning_text) + break + return jsonify(response_data)