修复缓存问题
This commit is contained in:
parent
049f91e549
commit
98f3ae24a0
4 changed files with 79 additions and 5 deletions
|
|
@ -654,6 +654,10 @@ class ResponsesToCCStreamConverter:
|
||||||
'completion_tokens': self._usage.get('output_tokens', 0),
|
'completion_tokens': self._usage.get('output_tokens', 0),
|
||||||
'total_tokens': self._usage.get('total_tokens', 0),
|
'total_tokens': self._usage.get('total_tokens', 0),
|
||||||
}
|
}
|
||||||
|
if isinstance(self._usage.get('input_tokens_details'), dict):
|
||||||
|
chunk['usage']['prompt_tokens_details'] = dict(self._usage['input_tokens_details'])
|
||||||
|
if isinstance(self._usage.get('output_tokens_details'), dict):
|
||||||
|
chunk['usage']['completion_tokens_details'] = dict(self._usage['output_tokens_details'])
|
||||||
return [chunk]
|
return [chunk]
|
||||||
|
|
||||||
def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict:
|
def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict:
|
||||||
|
|
@ -678,20 +682,44 @@ def _copy_request_options(payload: JsonDict, result: JsonDict) -> None:
|
||||||
"""将 Responses 请求中的通用选项复制到 CC 请求体。"""
|
"""将 Responses 请求中的通用选项复制到 CC 请求体。"""
|
||||||
if 'tools' in payload:
|
if 'tools' in payload:
|
||||||
result['tools'] = _convert_tools(payload['tools'])
|
result['tools'] = _convert_tools(payload['tools'])
|
||||||
for key in ('temperature', 'top_p'):
|
for key in (
|
||||||
|
'temperature',
|
||||||
|
'top_p',
|
||||||
|
'tool_choice',
|
||||||
|
'parallel_tool_calls',
|
||||||
|
'truncation',
|
||||||
|
'store',
|
||||||
|
'metadata',
|
||||||
|
'conversation',
|
||||||
|
'previous_response_id',
|
||||||
|
'prompt_cache_key',
|
||||||
|
'service_tier',
|
||||||
|
'user',
|
||||||
|
):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
result[key] = payload[key]
|
result[key] = payload[key]
|
||||||
if 'max_output_tokens' in payload:
|
if 'max_output_tokens' in payload:
|
||||||
result['max_tokens'] = payload['max_output_tokens']
|
result['max_tokens'] = payload['max_output_tokens']
|
||||||
if 'tool_choice' in payload:
|
|
||||||
result['tool_choice'] = payload['tool_choice']
|
|
||||||
|
|
||||||
|
|
||||||
def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None:
|
def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None:
|
||||||
"""将聊天补全请求中的通用选项复制到原生 Responses 请求体。"""
|
"""将聊天补全请求中的通用选项复制到原生 Responses 请求体。"""
|
||||||
if 'tools' in payload:
|
if 'tools' in payload:
|
||||||
result['tools'] = _convert_cc_tools_to_responses(payload['tools'])
|
result['tools'] = _convert_cc_tools_to_responses(payload['tools'])
|
||||||
for key in ('temperature', 'top_p', 'tool_choice'):
|
for key in (
|
||||||
|
'temperature',
|
||||||
|
'top_p',
|
||||||
|
'tool_choice',
|
||||||
|
'parallel_tool_calls',
|
||||||
|
'truncation',
|
||||||
|
'store',
|
||||||
|
'metadata',
|
||||||
|
'conversation',
|
||||||
|
'previous_response_id',
|
||||||
|
'prompt_cache_key',
|
||||||
|
'service_tier',
|
||||||
|
'user',
|
||||||
|
):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
result[key] = payload[key]
|
result[key] = payload[key]
|
||||||
if 'max_tokens' in payload:
|
if 'max_tokens' in payload:
|
||||||
|
|
@ -914,11 +942,18 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict:
|
||||||
|
|
||||||
def _build_responses_usage(usage: JsonDict) -> JsonDict:
|
def _build_responses_usage(usage: JsonDict) -> JsonDict:
|
||||||
"""将 Chat Completions 的 usage 字段映射为 Responses usage 结构。"""
|
"""将 Chat Completions 的 usage 字段映射为 Responses usage 结构。"""
|
||||||
return {
|
result = {
|
||||||
'input_tokens': usage.get('prompt_tokens', 0),
|
'input_tokens': usage.get('prompt_tokens', 0),
|
||||||
'output_tokens': usage.get('completion_tokens', 0),
|
'output_tokens': usage.get('completion_tokens', 0),
|
||||||
'total_tokens': usage.get('total_tokens', 0),
|
'total_tokens': usage.get('total_tokens', 0),
|
||||||
}
|
}
|
||||||
|
prompt_details = usage.get('prompt_tokens_details')
|
||||||
|
if isinstance(prompt_details, dict):
|
||||||
|
result['input_tokens_details'] = dict(prompt_details)
|
||||||
|
completion_details = usage.get('completion_tokens_details')
|
||||||
|
if isinstance(completion_details, dict):
|
||||||
|
result['output_tokens_details'] = dict(completion_details)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]:
|
def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]:
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ from routes.common import (
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
chat_error_chunk,
|
chat_error_chunk,
|
||||||
|
ensure_responses_cache_control,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -311,6 +312,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
|
||||||
responses_payload = cc_to_responses_request(payload)
|
responses_payload = cc_to_responses_request(payload)
|
||||||
responses_payload['model'] = ctx.upstream_model
|
responses_payload['model'] = ctx.upstream_model
|
||||||
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
|
responses_payload = ensure_responses_cache_control(responses_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
||||||
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
||||||
|
|
|
||||||
|
|
@ -195,6 +195,23 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""为 Responses 请求补齐自动 prompt caching 开关。
|
||||||
|
|
||||||
|
一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点。
|
||||||
|
Cursor 侧通常不会主动携带这个字段,因此这里在缺失时补一个保守的默认值,
|
||||||
|
同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭。
|
||||||
|
"""
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return payload
|
||||||
|
cache_control = payload.get('cache_control')
|
||||||
|
if isinstance(cache_control, dict) and cache_control.get('type'):
|
||||||
|
return payload
|
||||||
|
payload['cache_control'] = {'type': 'ephemeral'}
|
||||||
|
logger.info('已为 Responses 请求自动启用 cache_control=ephemeral')
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]:
|
def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]:
|
||||||
"""向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。
|
"""向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ from routes.common import (
|
||||||
build_openai_target,
|
build_openai_target,
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
|
ensure_responses_cache_control,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
|
||||||
payload = dict(payload)
|
payload = dict(payload)
|
||||||
payload['model'] = ctx.upstream_model
|
payload['model'] = ctx.upstream_model
|
||||||
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
|
payload = ensure_responses_cache_control(payload)
|
||||||
url, headers = build_responses_target(ctx)
|
url, headers = build_responses_target(ctx)
|
||||||
payload = apply_body_modifications(payload, ctx.body_modifications)
|
payload = apply_body_modifications(payload, ctx.body_modifications)
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
@ -629,4 +631,22 @@ def _finalize_responses_response(
|
||||||
attach_client_response(turn, response_data)
|
attach_client_response(turn, response_data)
|
||||||
finalize_turn(turn, usage=response_data.get('usage'))
|
finalize_turn(turn, usage=response_data.get('usage'))
|
||||||
|
|
||||||
|
output_items = response_data.get('output', [])
|
||||||
|
if isinstance(output_items, list):
|
||||||
|
for item in output_items:
|
||||||
|
if not isinstance(item, dict) or item.get('type') != 'reasoning':
|
||||||
|
continue
|
||||||
|
summary = item.get('summary', [])
|
||||||
|
if not isinstance(summary, list):
|
||||||
|
continue
|
||||||
|
reasoning_text = ''.join(
|
||||||
|
part.get('text', '')
|
||||||
|
for part in summary
|
||||||
|
if isinstance(part, dict) and part.get('type') == 'summary_text'
|
||||||
|
)
|
||||||
|
if reasoning_text:
|
||||||
|
cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', [])
|
||||||
|
thinking_cache.store_from_response(cc_messages, reasoning_text)
|
||||||
|
break
|
||||||
|
|
||||||
return jsonify(response_data)
|
return jsonify(response_data)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue