重构代码

This commit is contained in:
h88782481 2026-03-22 08:24:19 +08:00
parent 56faf4fcf1
commit 70361242ab
9 changed files with 1195 additions and 1579 deletions

View file

@ -1,8 +1,7 @@
"""路由: /v1/chat/completions
处理 Cursor 发来的 OpenAI Chat Completions 格式请求
根据模型映射的后端类型转发到 OpenAI 兼容接口Anthropic Messages 接口
或原生 OpenAI Responses 接口
根据模型映射的后端类型通过统一的出站转换器转发到不同后端
"""
from __future__ import annotations
@ -11,103 +10,33 @@ import json
import logging
from typing import Any
import settings
from flask import Blueprint, jsonify, request
from adapters.cc_anthropic_adapter import (
AnthropicStreamConverter,
cc_to_messages_request,
messages_to_cc_response,
)
from adapters.cc_gemini_adapter import (
GeminiStreamConverter,
cc_to_gemini_request,
gemini_to_cc_response,
)
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
from adapters.responses_cc_adapter import (
ResponsesToCCStreamConverter,
cc_to_responses_request,
responses_to_cc,
responses_to_cc_response,
)
from config import Config
from adapters.openai_compat_fixer import normalize_request
from adapters.responses_cc_adapter import responses_to_cc
from adapters.unified import handle_non_stream, handle_stream
from routes.common import (
RouteContext,
apply_body_modifications,
apply_header_modifications,
build_anthropic_target,
build_gemini_target,
build_openai_target,
build_responses_target,
CCClientFormatter,
build_route_context,
chat_error_chunk,
inject_instructions_anthropic,
get_outbound,
inject_instructions_cc,
inject_instructions_responses,
log_route_context,
log_usage,
sse_data_message,
)
from utils.http import (
forward_request,
gen_id,
iter_anthropic_sse,
iter_gemini_sse,
iter_openai_sse,
iter_responses_sse,
sse_response,
)
from utils.request_logger import (
append_client_event,
append_upstream_event,
attach_client_response,
attach_error,
attach_upstream_request,
attach_upstream_response,
finalize_turn,
set_stream_summary,
start_turn,
)
from utils.think_tag import ThinkTagExtractor
from utils.request_logger import start_turn
from utils.thinking_cache import thinking_cache
from utils.usage_tracker import usage_tracker
logger = logging.getLogger(__name__)
bp = Blueprint('chat', __name__)
def _dbg(message: str) -> None:
"""仅在调试模式下输出详细日志。"""
if settings.get_debug_mode() in ('simple', 'verbose'):
logger.info('[聊天补全调试] %s', message)
def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None:
"""从原生 Responses 事件中提取 usage。
`/v1/chat/completions -> /v1/responses` 的桥接流式路径也需要读取 usage
因此在本文件保留一个本地辅助函数避免依赖其他路由模块的私有实现
"""
if not isinstance(event_data, dict):
return None
usage = event_data.get('usage')
if isinstance(usage, dict):
return usage
response_obj = event_data.get('response')
if isinstance(response_obj, dict):
nested_usage = response_obj.get('usage')
if isinstance(nested_usage, dict):
return nested_usage
return None
@bp.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
"""处理聊天补全请求并按模型映射分发到不同后端。"""
original_payload = request.get_json(force=True)
payload, message_count = _normalize_chat_payload(json.loads(json.dumps(original_payload, ensure_ascii=False, default=str)))
payload, message_count = _normalize_chat_payload(
json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))
)
client_model = payload.get('model', 'unknown')
is_stream = payload.get('stream', False)
@ -127,23 +56,38 @@ def chat_completions():
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
_log_messages(payload)
if ctx.backend != 'responses':
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
payload['model'] = ctx.upstream_model
payload = normalize_request(payload)
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
if ctx.backend == 'openai':
return _handle_openai_backend(ctx, payload, turn)
if ctx.backend == 'responses':
return _handle_responses_backend(ctx, payload, turn)
if ctx.backend == 'gemini':
return _handle_gemini_backend(ctx, payload, turn)
return _handle_anthropic_backend(ctx, payload, turn)
outbound = get_outbound(ctx.backend)
client_fmt = CCClientFormatter()
if ctx.is_stream:
result = handle_stream(ctx, outbound, client_fmt, payload, turn)
else:
result = handle_non_stream(ctx, outbound, client_fmt, payload, turn)
if not ctx.is_stream and isinstance(result, tuple):
response_data = result
elif hasattr(result, 'json'):
try:
response_data = result.get_json(silent=True) or {}
except Exception:
response_data = {}
else:
response_data = {}
_try_cache_thinking(response_data)
return result
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
"""整理聊天补全入口的请求体。
这里保留了一层兼容逻辑 Cursor 或调用方把 Responses 格式误发到
`/v1/chat/completions` 先降级转换成 Chat Completions再进入统一主流程
Cursor 或调用方把 Responses 格式误发到 `/v1/chat/completions`
先降级转换成 Chat Completions再进入统一主流程
"""
message_count = len(payload.get('messages', []))
@ -157,548 +101,11 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in
return payload, message_count
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]):
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
_dbg(
'原始请求字段=' + str(list(payload.keys())) + ' '
+ '附加字段='
+ json.dumps(
{k: v for k, v in payload.items() if k != 'messages'},
ensure_ascii=False,
default=str,
)[:500]
)
payload = normalize_request(payload, ctx.upstream_model)
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
_dbg(
f'标准化完成:模型={payload.get("model")} '
f'工具数={len(payload.get("tools", []))}'
)
url, headers = build_openai_target(ctx)
payload = apply_body_modifications(payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_openai_stream(ctx, payload, url, headers, turn)
return _handle_openai_non_stream(ctx, payload, url, headers, turn)
def _handle_openai_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any],
):
"""处理 OpenAI 兼容后端的非流式返回。"""
payload['stream'] = False
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = fix_response(raw)
return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应')
def _handle_openai_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any],
):
"""处理 OpenAI 兼容后端的流式返回。"""
payload['stream'] = True
def generate():
"""消费上游 OpenAI SSE并逐段产出给 Cursor 的聊天补全流。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield chat_error_chunk(str(err))
return
think_extractor = ThinkTagExtractor()
chunk_count = 0
last_usage = None
client_chunks: list[dict[str, Any]] = []
for chunk in iter_openai_sse(resp):
if chunk is None:
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
close_chunk = think_extractor.finalize()
if close_chunk:
client_chunks.append(close_chunk)
append_client_event(turn, {'type': 'chat_chunk', 'data': close_chunk})
yield sse_data_message(close_chunk)
append_client_event(turn, {'type': 'done'})
yield sse_data_message('[DONE]')
usage_tracker.record(ctx.client_model, last_usage)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_chunk_count': len(client_chunks),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'chat.completion.stream.summary',
'model': ctx.client_model,
'chunk_count': len(client_chunks),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
if chunk.get('usage'):
last_usage = chunk['usage']
if chunk_count < 10:
_dbg(
f'上游原始片段#{chunk_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
chunk = fix_stream_chunk(chunk)
chunk['model'] = ctx.client_model
for out in think_extractor.process_chunk(chunk):
client_chunks.append(out)
append_client_event(turn, {'type': 'chat_chunk', 'data': out})
if chunk_count < 10:
_dbg(
f'返回片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield sse_data_message(out)
chunk_count += 1
usage_tracker.record(ctx.client_model, last_usage)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_chunk_count': len(client_chunks),
'usage': last_usage,
'ended_without_done': True,
})
attach_client_response(turn, {
'type': 'chat.completion.stream.summary',
'model': ctx.client_model,
'chunk_count': len(client_chunks),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走原生 Responses 后端的聊天补全请求。
当上游只支持 `/v1/responses` 需要先把聊天补全请求转换为 Responses 请求
返回时再转换回聊天补全协议
"""
responses_payload = cc_to_responses_request(payload)
responses_payload['model'] = ctx.upstream_model
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
_dbg(
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
+ f' 输入项数={len(responses_payload.get("input", []))}'
)
url, headers = build_responses_target(ctx)
responses_payload = apply_body_modifications(responses_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_responses_stream(ctx, responses_payload, url, headers, turn)
return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn)
def _handle_responses_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的非流式返回。"""
payload['stream'] = False
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = responses_to_cc_response(raw, ctx.client_model)
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后')
def _handle_responses_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的流式返回。"""
payload['stream'] = True
converter = ResponsesToCCStreamConverter(model=ctx.client_model)
def generate():
"""消费上游 Responses 事件,并实时转换成聊天补全 chunk。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield chat_error_chunk(str(err))
return
event_count = 0
client_chunks: list[Any] = []
last_usage: dict[str, Any] | None = None
for event_type, event_data in iter_responses_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
extracted_usage = _extract_responses_usage(event_data)
if extracted_usage:
last_usage = {
'prompt_tokens': extracted_usage.get('input_tokens', 0),
'completion_tokens': extracted_usage.get('output_tokens', 0),
'total_tokens': extracted_usage.get('total_tokens', 0),
}
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
for chunk in converter.process_event(event_type, event_data):
client_chunks.append(chunk)
append_client_event(turn, {'type': 'chat_chunk', 'data': chunk})
if isinstance(chunk, dict) and isinstance(chunk.get('usage'), dict):
last_usage = chunk['usage']
if event_count < 10:
_dbg(
f'返回片段#{event_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
yield sse_data_message(chunk)
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
append_client_event(turn, {'type': 'done'})
yield sse_data_message('[DONE]')
usage_tracker.record(ctx.client_model, last_usage)
set_stream_summary(turn, {
'event_count': event_count,
'client_chunk_count': len(client_chunks),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'chat.completion.stream.summary',
'model': ctx.client_model,
'chunk_count': len(client_chunks),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Gemini Contents 后端的聊天补全请求。"""
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
gemini_payload = cc_to_gemini_request(payload)
_dbg(
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
+ f' 内容数={len(gemini_payload.get("contents", []))}'
)
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
def _handle_gemini_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的非流式返回。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = gemini_to_cc_response(raw)
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后')
def _handle_gemini_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的流式返回。"""
converter = GeminiStreamConverter()
def generate():
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield chat_error_chunk(str(err))
return
chunk_count = 0
client_chunks: list[Any] = []
last_usage: dict[str, Any] | None = None
for gemini_chunk in iter_gemini_sse(resp):
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None
if isinstance(usage_meta, dict):
last_usage = {
'prompt_tokens': usage_meta.get('promptTokenCount', 0),
'completion_tokens': usage_meta.get('candidatesTokenCount', 0),
'total_tokens': usage_meta.get('totalTokenCount', 0),
}
if chunk_count < 10:
_dbg(
f'上游 Gemini 片段#{chunk_count}='
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
)
for cc_chunk in converter.process_chunk(gemini_chunk):
cc_chunk['model'] = ctx.client_model
client_chunks.append(cc_chunk)
append_client_event(turn, {'type': 'chat_chunk', 'data': cc_chunk})
if isinstance(cc_chunk, dict) and isinstance(cc_chunk.get('usage'), dict):
last_usage = cc_chunk['usage']
if chunk_count < 10:
_dbg(
f'返回片段#{chunk_count}='
+ json.dumps(cc_chunk, ensure_ascii=False, default=str)[:500]
)
yield sse_data_message(cc_chunk)
chunk_count += 1
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
append_client_event(turn, {'type': 'done'})
yield sse_data_message('[DONE]')
usage_tracker.record(ctx.client_model, last_usage)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_chunk_count': len(client_chunks),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'chat.completion.stream.summary',
'model': ctx.client_model,
'chunk_count': len(client_chunks),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
payload['model'] = ctx.upstream_model
anthropic_payload = cc_to_messages_request(payload)
anthropic_payload = inject_instructions_anthropic(anthropic_payload, ctx.custom_instructions, ctx.instructions_position)
_dbg(
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
)
url, headers = build_anthropic_target(ctx)
anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
def _handle_anthropic_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的非流式返回。"""
payload['stream'] = False
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = messages_to_cc_response(raw)
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后')
def _handle_anthropic_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的流式返回。
这里仍然保留独立的事件级转换器而不是先落成完整响应再回放
是为了尽量保持 Cursor 端的流式体验和工具调用时序
"""
payload['stream'] = True
converter = AnthropicStreamConverter()
def generate():
"""消费上游 Anthropic 事件流,并逐步映射为聊天补全 SSE。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield chat_error_chunk(str(err))
return
event_count = 0
client_chunks: list[Any] = []
last_usage: dict[str, Any] | None = None
for event_type, event_data in iter_anthropic_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
if event_type == 'message_start':
message_usage = event_data.get('message', {}).get('usage', {})
if isinstance(message_usage, dict):
last_usage = {
'prompt_tokens': message_usage.get('input_tokens', 0),
'completion_tokens': 0,
'total_tokens': message_usage.get('input_tokens', 0),
}
elif event_type == 'message_delta':
delta_usage = event_data.get('usage', {})
if isinstance(delta_usage, dict):
prompt_tokens = 0
if isinstance(last_usage, dict):
prompt_tokens = last_usage.get('prompt_tokens', 0)
completion_tokens = delta_usage.get('output_tokens', 0)
last_usage = {
'prompt_tokens': prompt_tokens,
'completion_tokens': completion_tokens,
'total_tokens': prompt_tokens + completion_tokens,
}
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
for chunk_str in converter.process_event(event_type, event_data):
try:
chunk_obj = json.loads(chunk_str)
chunk_obj['model'] = ctx.client_model
if isinstance(chunk_obj.get('usage'), dict):
last_usage = chunk_obj['usage']
chunk_str = json.dumps(chunk_obj, ensure_ascii=False)
except (json.JSONDecodeError, TypeError):
pass
client_chunks.append(chunk_str)
append_client_event(turn, {'type': 'chat_chunk', 'data': chunk_str})
if event_count < 10:
_dbg(f'返回片段#{event_count}={chunk_str[:500]}')
yield sse_data_message(chunk_str)
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
append_client_event(turn, {'type': 'done'})
yield sse_data_message('[DONE]')
usage_tracker.record(ctx.client_model, last_usage)
set_stream_summary(turn, {
'event_count': event_count,
'client_chunk_count': len(client_chunks),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'chat.completion.stream.summary',
'model': ctx.client_model,
'chunk_count': len(client_chunks),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _finalize_chat_response(
ctx: RouteContext,
data: dict[str, Any],
*,
turn: dict[str, Any] | None,
debug_label: str,
):
"""统一收尾非流式聊天补全响应。
三条后端链路最终都会回到 Chat Completions 格式因此这里集中做
- 回填给 Cursor 展示的模型名
- 输出统一调试日志
- 输出统一令牌统计日志
"""
data['model'] = ctx.client_model
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
usage_tracker.record(ctx.client_model, data.get('usage'))
attach_client_response(turn, data)
finalize_turn(turn, usage=data.get('usage'))
for choice in data.get('choices', []):
def _try_cache_thinking(response_data: dict[str, Any]) -> None:
"""尝试从非流式响应中缓存思维链内容。"""
if not isinstance(response_data, dict):
return
for choice in response_data.get('choices', []):
msg = choice.get('message', {})
if msg.get('reasoning_content'):
thinking_cache.store_from_response(
@ -707,8 +114,6 @@ def _finalize_chat_response(
)
break
return jsonify(data)
def _log_messages(payload: dict[str, Any]) -> None:
"""记录消息摘要,方便排查请求形态是否符合预期。"""

View file

@ -12,7 +12,6 @@ import logging
from typing import Any
import settings
from utils.http import build_anthropic_headers, build_gemini_headers, build_openai_headers
logger = logging.getLogger(__name__)
@ -55,42 +54,6 @@ def build_route_context(client_model: str, is_stream: bool) -> RouteContext:
)
def build_openai_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 OpenAI 兼容后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/chat/completions'
headers = build_openai_headers(ctx.api_key)
return url, headers
def build_responses_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 OpenAI Responses 后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/responses'
headers = build_openai_headers(ctx.api_key)
return url, headers
def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 Anthropic 后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/messages'
headers = build_anthropic_headers(ctx.api_key)
return url, headers
def build_gemini_target(ctx: RouteContext, stream: bool = False) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 Gemini 后端的地址和请求头。
Gemini URL 格式: {base}/v1/models/{model}:generateContent
流式: {base}/v1/models/{model}:streamGenerateContent?alt=sse
"""
base = ctx.target_url.rstrip('/')
model = ctx.upstream_model
if stream:
url = f'{base}/v1/models/{model}:streamGenerateContent?alt=sse'
else:
url = f'{base}/v1/models/{model}:generateContent'
headers = build_gemini_headers(ctx.api_key)
return url, headers
def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None:
"""统一输出路由级日志,避免不同入口的日志格式逐渐漂移。"""
@ -137,11 +100,6 @@ def sse_event_message(event_type: str, data: Any) -> str:
return f'event: {event_type}\ndata: {payload}\n\n'
def chat_error_chunk(message: str, error_type: str = 'upstream_error') -> str:
"""构造聊天补全流式接口使用的错误消息。"""
return sse_data_message({'error': {'message': message, 'type': error_type}})
def responses_error_event(message: str) -> str:
"""构造 Responses 流式接口使用的错误事件。"""
return sse_event_message('error', {'error': message})
@ -248,3 +206,140 @@ def apply_header_modifications(headers: dict[str, str], modifications: dict[str,
headers[key] = str(value)
logger.info('已应用 header_modifications: %s', list(modifications.keys()))
return headers
# ═══════════════════════════════════════════════════════════
# 后端注册表 + ClientFormatter 实现
# ═══════════════════════════════════════════════════════════
def get_outbound(backend: str):
"""根据后端类型获取对应的 OutboundTransformer 实例。"""
from adapters.cc_anthropic_adapter import AnthropicOutbound
from adapters.cc_gemini_adapter import GeminiOutbound
from adapters.openai_compat_fixer import OpenAIChatOutbound
from adapters.responses_cc_adapter import ResponsesOutbound
registry = {
'openai': OpenAIChatOutbound,
'anthropic': AnthropicOutbound,
'gemini': GeminiOutbound,
'responses': ResponsesOutbound,
}
cls = registry.get(backend, OpenAIChatOutbound)
return cls()
class CCClientFormatter:
"""Chat Completions 客户端格式化器。
将通用处理结果格式化为 OpenAI Chat Completions 格式
/v1/chat/completions 端点使用
"""
def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]:
cc_response['model'] = model
return cc_response
def wrap_stream_item(self, item: Any) -> str:
payload = item if isinstance(item, str) else json.dumps(item, ensure_ascii=False)
return f'data: {payload}\n\n'
def format_error(self, message: str) -> str:
return sse_data_message({'error': {'message': message, 'type': 'upstream_error'}})
def format_done(self) -> str | None:
return sse_data_message('[DONE]')
def start_events(self) -> list[str]:
return []
@property
def usage_input_key(self) -> str:
return 'prompt_tokens'
@property
def usage_output_key(self) -> str:
return 'completion_tokens'
class ResponsesClientFormatter:
"""Responses API 客户端格式化器。
将通用处理结果格式化为 OpenAI Responses 格式
/v1/responses 端点使用
流式场景使用 ResponsesStreamConverter CC chunk Responses SSE 转换
"""
def __init__(self, model: str = ''):
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses
self._model = model
self._converter = ResponsesStreamConverter(model=model)
self._cc_to_responses = cc_to_responses
def format_response(self, cc_response: dict[str, Any], model: str) -> dict[str, Any]:
return self._cc_to_responses(cc_response, model)
def wrap_stream_item(self, item: Any) -> str:
if isinstance(item, str):
return item
events = self._converter.process_cc_chunk(item)
return ''.join(events)
def format_error(self, message: str) -> str:
return responses_error_event(message)
def format_done(self) -> str | None:
events = self._converter.finalize()
return ''.join(events) if events else None
def start_events(self) -> list[str]:
return self._converter.start_events()
@property
def usage_input_key(self) -> str:
return 'input_tokens'
@property
def usage_output_key(self) -> str:
return 'output_tokens'
class ResponsesPassthroughFormatter:
"""Responses 透传格式化器。
当后端本身就是 Responses 格式时使用做轻量模型名改写
"""
def __init__(self, model: str = ''):
self._model = model
def format_response(self, response_data: dict[str, Any], model: str) -> dict[str, Any]:
response_data['model'] = model
return response_data
def wrap_stream_item(self, item: Any) -> str:
if isinstance(item, str):
return item
event_type = item.pop('_sse_event_type', None)
if event_type:
return f'event: {event_type}\ndata: {json.dumps(item, ensure_ascii=False)}\n\n'
return f'data: {json.dumps(item, ensure_ascii=False)}\n\n'
def format_error(self, message: str) -> str:
return responses_error_event(message)
def format_done(self) -> str | None:
return None
def start_events(self) -> list[str]:
return []
@property
def usage_input_key(self) -> str:
return 'input_tokens'
@property
def usage_output_key(self) -> str:
return 'output_tokens'

View file

@ -1,7 +1,7 @@
"""路由: /v1/responses
处理 Cursor GPTClaude-Opus 等模型发出的 Responses API 请求
请求先转换为 Chat Completions 中间表示按后端类型分发最后转换回 Responses 格式
请求先转换为 Chat Completions 中间表示通过统一出站转换器分发
"""
from __future__ import annotations
@ -13,62 +13,30 @@ from typing import Any
import settings
from flask import Blueprint, jsonify, request
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
from config import Config
from adapters.openai_compat_fixer import normalize_request
from adapters.responses_cc_adapter import (
AnthropicOutboundForResponses,
ResponsesNativeOutbound,
responses_to_cc,
)
from adapters.unified import handle_non_stream, handle_stream
from routes.common import (
RouteContext,
apply_body_modifications,
apply_header_modifications,
build_anthropic_target,
build_gemini_target,
build_openai_target,
build_responses_target,
ResponsesClientFormatter,
ResponsesPassthroughFormatter,
build_route_context,
inject_instructions_anthropic,
get_outbound,
inject_instructions_cc,
inject_instructions_responses,
log_route_context,
log_usage,
responses_error_event,
)
from utils.http import (
forward_request,
gen_id,
iter_anthropic_sse,
iter_gemini_sse,
iter_openai_sse,
iter_responses_sse,
sse_response,
)
from utils.request_logger import (
append_client_event,
append_upstream_event,
attach_client_response,
attach_error,
attach_upstream_request,
attach_upstream_response,
finalize_turn,
set_stream_summary,
start_turn,
)
from utils.think_tag import ThinkTagExtractor
from utils.request_logger import start_turn
from utils.thinking_cache import thinking_cache
from utils.usage_tracker import usage_tracker
logger = logging.getLogger(__name__)
bp = Blueprint('responses', __name__)
def _dbg(message: str) -> None:
"""仅在调试模式下输出详细日志。"""
if settings.get_debug_mode() in ('simple', 'verbose'):
logger.info('[响应生成调试] %s', message)
@bp.route('/v1/responses', methods=['POST'])
def responses_endpoint():
"""处理 Responses 请求并按模型映射分发。"""
@ -90,543 +58,42 @@ def responses_endpoint():
)
log_route_context('响应生成', ctx)
if ctx.backend == 'responses':
return _handle_native_responses(ctx, payload, turn)
cc_payload = _build_cc_payload(payload, ctx)
if ctx.backend == 'openai':
return _handle_openai_backend(ctx, cc_payload, turn)
if ctx.backend == 'responses':
return _handle_responses_backend(ctx, payload, turn)
if ctx.backend == 'gemini':
return _handle_gemini_backend(ctx, cc_payload, turn)
return _handle_anthropic_backend(ctx, cc_payload, turn)
if ctx.backend == 'anthropic':
outbound = AnthropicOutboundForResponses()
else:
outbound = get_outbound(ctx.backend)
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
"""将 Responses 请求统一降级为 Chat Completions 中间表示。
这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端都能复用一套
中间协议避免在路由层同时维护两套完全不同的请求编排逻辑
"""
cc_payload = responses_to_cc(payload)
cc_payload['model'] = ctx.upstream_model
cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
_dbg(
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
+ f' 消息数={len(cc_payload.get("messages", []))}'
)
return cc_payload
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]):
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
cc_payload = normalize_request(cc_payload)
_dbg(
f'标准化完成:模型={cc_payload.get("model")} '
f'工具数={len(cc_payload.get("tools", []))}'
)
url, headers = build_openai_target(ctx)
cc_payload = apply_body_modifications(cc_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
client_fmt = ResponsesClientFormatter(model=ctx.client_model)
if ctx.is_stream:
return _handle_openai_stream(ctx, cc_payload, url, headers, turn)
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn)
return handle_stream(ctx, outbound, client_fmt, cc_payload, turn)
return handle_non_stream(ctx, outbound, client_fmt, cc_payload, turn)
def _handle_openai_non_stream(
ctx: RouteContext,
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any],
):
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
cc_payload['stream'] = False
attach_upstream_request(turn, cc_payload, headers)
resp, err = forward_request(url, headers, cc_payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
fixed = fix_response(raw)
response_data = cc_to_responses(fixed, ctx.client_model)
return _finalize_responses_response(
response_data,
client_model=ctx.client_model,
turn=turn,
debug_label='转换为 Responses 后',
)
def _handle_openai_stream(
ctx: RouteContext,
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
cc_payload['stream'] = True
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
"""消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。"""
yield from converter.start_events()
attach_upstream_request(turn, cc_payload, headers)
resp, err = forward_request(url, headers, cc_payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
think_extractor = ThinkTagExtractor()
chunk_count = 0
client_events: list[str] = []
for chunk in iter_openai_sse(resp):
if chunk is None:
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
finalized_events = converter.finalize()
for item in finalized_events:
client_events.append(item)
append_client_event(turn, {'type': 'responses_event', 'data': item})
yield item
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'event_count': len(client_events),
})
finalize_turn(turn)
return
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
if chunk_count < 10:
_dbg(
f'上游原始片段#{chunk_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
chunk = fix_stream_chunk(chunk)
for out in think_extractor.process_chunk(chunk):
for evt in converter.process_cc_chunk(out):
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
if chunk_count < 10:
_dbg(
f'转换后片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield evt
chunk_count += 1
return sse_response(generate())
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走原生 Responses 后端的请求。
当中转站本身就只支持 `/v1/responses` 不需要再绕到聊天补全中间协议
直接转发原生 Responses 请求即可
"""
def _handle_native_responses(ctx, payload: dict[str, Any], turn: dict[str, Any]):
"""处理走原生 Responses 后端的请求(直接透传)。"""
payload = dict(payload)
payload['model'] = ctx.upstream_model
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
url, headers = build_responses_target(ctx)
payload = apply_body_modifications(payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
outbound = ResponsesNativeOutbound()
client_fmt = ResponsesPassthroughFormatter(model=ctx.client_model)
if ctx.is_stream:
return _handle_responses_stream(ctx, payload, url, headers, turn)
return _handle_responses_non_stream(ctx, payload, url, headers, turn)
return handle_stream(ctx, outbound, client_fmt, payload, turn)
return handle_non_stream(ctx, outbound, client_fmt, payload, turn)
def _handle_responses_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的非流式返回。"""
payload['stream'] = False
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
response_data = resp.json()
attach_upstream_response(turn, response_data)
response_data['model'] = ctx.client_model
return _finalize_responses_response(
response_data,
client_model=ctx.client_model,
turn=turn,
debug_label='原生 Responses 返回后',
)
def _handle_responses_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的流式返回。"""
payload['stream'] = True
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
"""透传上游原生 Responses 流,并做轻量模型名改写。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
event_count = 0
client_events: list[str] = []
last_usage: dict[str, Any] | None = None
for event_type, event_data in iter_responses_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
extracted_usage = _extract_responses_usage(event_data)
if extracted_usage:
last_usage = extracted_usage
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
produced = converter.process_responses_event(event_type, event_data)
for evt in produced:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
usage_tracker.record(
ctx.client_model,
last_usage,
input_key='input_tokens',
output_key='output_tokens',
)
set_stream_summary(turn, {
'event_count': event_count,
'client_event_count': len(client_events),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'event_count': len(client_events),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None:
"""从原生 Responses 事件中提取 usage。
原生 `/v1/responses` 流式通常会在 `response.completed` 事件里携带 usage
也可能直接挂在顶层 `usage` 字段这里统一做兼容提取供统计与日志复用
"""
if not isinstance(event_data, dict):
return None
usage = event_data.get('usage')
if isinstance(usage, dict):
return usage
response_obj = event_data.get('response')
if isinstance(response_obj, dict):
nested_usage = response_obj.get('usage')
if isinstance(nested_usage, dict):
return nested_usage
return None
def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Gemini Contents 后端的 Responses 请求。"""
gemini_payload = cc_to_gemini_request(cc_payload)
_dbg(
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
+ f' 内容数={len(gemini_payload.get("contents", []))}'
)
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
def _handle_gemini_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的非流式 Responses 返回。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
cc_data = gemini_to_cc_response(raw)
response_data = cc_to_responses(cc_data, ctx.client_model)
return _finalize_responses_response(
response_data,
client_model=ctx.client_model,
turn=turn,
debug_label='Gemini 转回 Responses 后',
)
def _handle_gemini_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的流式 Responses 返回。"""
converter = ResponsesStreamConverter(model=ctx.client_model)
gemini_converter = GeminiStreamConverter()
def generate():
yield from converter.start_events()
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
chunk_count = 0
client_events: list[str] = []
last_usage: dict[str, Any] | None = None
for gemini_chunk in iter_gemini_sse(resp):
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None
if isinstance(usage_meta, dict):
last_usage = {
'input_tokens': usage_meta.get('promptTokenCount', 0),
'output_tokens': usage_meta.get('candidatesTokenCount', 0),
'total_tokens': usage_meta.get('totalTokenCount', 0),
}
if chunk_count < 10:
_dbg(
f'上游 Gemini 片段#{chunk_count}='
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
)
for cc_chunk in gemini_converter.process_chunk(gemini_chunk):
for evt in converter.process_cc_chunk(cc_chunk):
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
chunk_count += 1
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
finalized_events = converter.finalize()
for evt in finalized_events:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
usage_tracker.record(
ctx.client_model,
last_usage,
input_key='input_tokens',
output_key='output_tokens',
)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_event_count': len(client_events),
'usage': last_usage,
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'event_count': len(client_events),
'usage': last_usage,
})
finalize_turn(turn, usage=last_usage)
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Anthropic 后端的 Responses 请求。"""
anthropic_payload = cc_to_messages_request(cc_payload)
_dbg(
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
)
url, headers = build_anthropic_target(ctx)
anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
def _handle_anthropic_non_stream(
ctx: RouteContext,
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的非流式 Responses 返回。"""
anthropic_payload['stream'] = False
attach_upstream_request(turn, anthropic_payload, headers)
resp, err = forward_request(url, headers, anthropic_payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
cc_data = messages_to_cc_response(raw)
response_data = cc_to_responses(cc_data, ctx.client_model)
return _finalize_responses_response(
response_data,
client_model=ctx.client_model,
turn=turn,
debug_label='Messages 转回 Responses 后',
)
def _handle_anthropic_stream(
ctx: RouteContext,
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的流式 Responses 返回。
这里直接将 Anthropic SSE 事件映射到 Responses SSE故意跳过 CC 流式中间态
这样可以减少一次事件重组降低流式转换复杂度也更容易保留原始时序
"""
anthropic_payload['stream'] = True
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
"""消费 Anthropic SSE并直接映射为 Responses 事件序列。"""
yield from converter.start_events()
attach_upstream_request(turn, anthropic_payload, headers)
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
event_count = 0
client_events: list[str] = []
for event_type, event_data in iter_anthropic_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
produced = converter.process_anthropic_event(event_type, event_data)
for evt in produced:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
finalized_events = converter.finalize()
for evt in finalized_events:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'event_count': event_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'event_count': len(client_events),
})
finalize_turn(turn)
return sse_response(generate())
def _finalize_responses_response(
response_data: dict[str, Any],
*,
client_model: str,
turn: dict[str, Any],
debug_label: str,
):
"""统一收尾非流式 Responses 响应。
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象因此这里集中
处理调试日志回填展示模型名以及 usage 日志
"""
response_data['model'] = response_data.get('model') or ''
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
usage_tracker.record(
client_model,
response_data.get('usage'),
input_key='input_tokens',
output_key='output_tokens',
)
attach_client_response(turn, response_data)
finalize_turn(turn, usage=response_data.get('usage'))
return jsonify(response_data)
def _build_cc_payload(payload: dict[str, Any], ctx) -> dict[str, Any]:
"""将 Responses 请求统一降级为 Chat Completions 中间表示。"""
cc_payload = responses_to_cc(payload)
cc_payload['model'] = ctx.upstream_model
cc_payload = normalize_request(cc_payload)
cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
return cc_payload