优化代码

This commit is contained in:
h88782481 2026-03-09 19:43:51 +08:00
parent 202731df74
commit 96fbc4da80
15 changed files with 2768 additions and 1383 deletions

View file

@ -1,216 +1,388 @@
"""路由: /v1/chat/completions
处理 Cursor 发来的 OpenAI Chat Completions 格式请求
根据模型映射的 backend 字段分发到 OpenAI Anthropic 后端
根据模型映射的后端类型转发到 OpenAI 兼容接口Anthropic Messages 接口
或原生 OpenAI Responses 接口
"""
from __future__ import annotations
import json
import logging
from typing import Any
from flask import Blueprint, request, jsonify
from flask import Blueprint, jsonify, request
import settings
from config import Config
from adapters.openai_fixer import normalize_request, fix_response, fix_stream_chunk
from adapters.openai_anthropic import (
cc_to_messages_request, messages_to_cc_response, AnthropicStreamConverter,
from adapters.cc_anthropic_adapter import (
AnthropicStreamConverter,
cc_to_messages_request,
messages_to_cc_response,
)
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
from adapters.responses_cc_adapter import (
ResponsesToCCStreamConverter,
cc_to_responses_request,
responses_to_cc,
responses_to_cc_response,
)
from config import Config
from routes.common import (
RouteContext,
build_anthropic_target,
build_openai_target,
build_responses_target,
build_route_context,
chat_error_chunk,
log_route_context,
log_usage,
sse_data_message,
)
from adapters.responses_adapter import responses_to_cc
from utils.http import (
build_openai_headers, build_anthropic_headers,
forward_request, sse_response,
iter_openai_sse, iter_anthropic_sse,
forward_request,
iter_anthropic_sse,
iter_openai_sse,
iter_responses_sse,
sse_response,
)
from utils.think_tag import ThinkTagExtractor
logger = logging.getLogger(__name__)
def _dbg(msg):
"""DEBUG 模式下输出详细日志"""
if Config.DEBUG:
logger.info(f'[调试] {msg}')
bp = Blueprint('chat', __name__)
def _dbg(message: str) -> None:
"""仅在调试模式下输出详细日志。"""
if Config.DEBUG:
logger.info('[聊天补全调试] %s', message)
@bp.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
"""处理聊天补全请求并按模型映射分发到不同后端。"""
payload = request.get_json(force=True)
payload, message_count = _normalize_chat_payload(payload)
client_model = payload.get('model', 'unknown')
is_stream = payload.get('stream', False)
# 保留 Cursor 发送的原始模型名,响应时需要回填
cursor_model = payload.get('model', 'unknown')
msg_count = len(payload.get('messages', []))
ctx = build_route_context(client_model, is_stream)
# 容错Responses 格式误入 CC 端点
if msg_count == 0 and 'input' in payload:
logger.info('检测到 Responses 格式(有 input 无 messages自动转换')
payload = responses_to_cc(payload)
msg_count = len(payload.get('messages', []))
elif msg_count == 0:
logger.warning(f'messages 为空, payload keys: {list(payload.keys())}')
mapping = settings.resolve_model(cursor_model)
backend = mapping['backend']
upstream = mapping['upstream_model']
url_base = mapping['target_url']
api_key = mapping['api_key']
logger.info(
f'[CC] {cursor_model}{upstream} '
f'后端={backend} 流式={is_stream} 消息数={msg_count}'
)
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
_log_messages(payload)
if backend == 'openai':
return _via_openai(payload, upstream, url_base, api_key, is_stream, cursor_model)
else:
return _via_anthropic(payload, upstream, url_base, api_key, is_stream, cursor_model)
if ctx.backend == 'openai':
return _handle_openai_backend(ctx, payload)
if ctx.backend == 'responses':
return _handle_responses_backend(ctx, payload)
return _handle_anthropic_backend(ctx, payload)
# ─── OpenAI 后端 ──────────────────────────────────
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
"""整理聊天补全入口的请求体。
这里保留了一层兼容逻辑 Cursor 或调用方把 Responses 格式误发到
`/v1/chat/completions` 先降级转换成 Chat Completions再进入统一主流程
"""
message_count = len(payload.get('messages', []))
if message_count == 0 and 'input' in payload:
logger.info('检测到 Responses 格式误入聊天补全接口,已自动转换为 Chat Completions 格式')
payload = responses_to_cc(payload)
message_count = len(payload.get('messages', []))
elif message_count == 0:
logger.warning('消息列表为空,请求字段=%s', list(payload.keys()))
return payload, message_count
def _via_openai(payload, upstream, url_base, api_key, is_stream, cursor_model):
"""通过 OpenAI 兼容后端转发"""
_dbg(f'Cursor 原始请求 keys={list(payload.keys())} '
f'其他字段={json.dumps({k: v for k, v in payload.items() if k != "messages"}, ensure_ascii=False, default=str)[:500]}')
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any]):
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
_dbg(
'原始请求字段=' + str(list(payload.keys())) + ' '
+ '附加字段='
+ json.dumps(
{k: v for k, v in payload.items() if k != 'messages'},
ensure_ascii=False,
default=str,
)[:500]
)
payload = normalize_request(payload, upstream)
_dbg(f'normalize 后 model={payload.get("model")} tools数={len(payload.get("tools", []))}')
payload = normalize_request(payload, ctx.upstream_model)
_dbg(
f'标准化完成:模型={payload.get("model")} '
f'工具数={len(payload.get("tools", []))}'
)
headers = build_openai_headers(api_key)
url = f'{url_base.rstrip("/")}/v1/chat/completions'
url, headers = build_openai_target(ctx)
if not is_stream:
payload['stream'] = False
resp, err = forward_request(url, headers, payload)
if err:
return err
raw = resp.json()
_dbg(f'上游原始响应={json.dumps(raw, ensure_ascii=False, default=str)[:1000]}')
data = fix_response(raw)
data['model'] = cursor_model
_dbg(f'修复后响应={json.dumps(data, ensure_ascii=False, default=str)[:1000]}')
usage = data.get('usage', {})
logger.info(
f'[CC] 完成 prompt={usage.get("prompt_tokens", 0)} '
f'completion={usage.get("completion_tokens", 0)}'
)
return jsonify(data)
if ctx.is_stream:
return _handle_openai_stream(ctx, payload, url, headers)
return _handle_openai_non_stream(ctx, payload, url, headers)
# 流式处理
def _handle_openai_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 OpenAI 兼容后端的非流式返回。"""
payload['stream'] = False
resp, err = forward_request(url, headers, payload)
if err:
return err
raw = resp.json()
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = fix_response(raw)
return _finalize_chat_response(ctx, data, debug_label='修复后响应')
def _handle_openai_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 OpenAI 兼容后端的流式返回。"""
payload['stream'] = True
_n = [0]
def generate():
resp, err = forward_request(url, headers, payload, stream=True)
if err:
yield f'data: {json.dumps({"error": {"message": err, "type": "upstream_error"}})}\n\n'
yield chat_error_chunk(str(err))
return
think_ext = ThinkTagExtractor()
think_extractor = ThinkTagExtractor()
chunk_count = 0
for chunk in iter_openai_sse(resp):
if chunk is None: # [DONE]
_dbg(f'流结束,共 {_n[0]} 个 chunk')
yield 'data: [DONE]\n\n'
if chunk is None:
_dbg(f'式响应结束,共 {chunk_count} 个数据片段')
yield sse_data_message('[DONE]')
return
if _n[0] < 10:
_dbg(f'上游原始 chunk#{_n[0]}={json.dumps(chunk, ensure_ascii=False, default=str)[:500]}')
if chunk_count < 10:
_dbg(
f'上游原始片段#{chunk_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
chunk = fix_stream_chunk(chunk)
chunk['model'] = cursor_model
chunk['model'] = ctx.client_model
for out in think_ext.process_chunk(chunk):
if _n[0] < 10:
_dbg(f'发给Cursor chunk#{_n[0]}={json.dumps(out, ensure_ascii=False, default=str)[:500]}')
yield f'data: {json.dumps(out)}\n\n'
for out in think_extractor.process_chunk(chunk):
if chunk_count < 10:
_dbg(
f'返回片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield sse_data_message(out)
_n[0] += 1
chunk_count += 1
return sse_response(generate())
# ─── Anthropic 后端 ───────────────────────────────
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
"""处理走原生 Responses 后端的聊天补全请求。
当上游只支持 `/v1/responses` 需要先把聊天补全请求转换为 Responses 请求
返回时再转换回聊天补全协议
"""
responses_payload = cc_to_responses_request(payload)
responses_payload['model'] = ctx.upstream_model
_dbg(
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
+ f' 输入项数={len(responses_payload.get("input", []))}'
)
url, headers = build_responses_target(ctx)
if ctx.is_stream:
return _handle_responses_stream(ctx, responses_payload, url, headers)
return _handle_responses_non_stream(ctx, responses_payload, url, headers)
def _via_anthropic(payload, upstream, url_base, api_key, is_stream, cursor_model):
"""通过 Anthropic 后端转发CC → Messages → CC"""
payload['model'] = upstream
anthropic_payload = cc_to_messages_request(payload)
_dbg(f'CC→Messages 转换后 keys={list(anthropic_payload.keys())} '
f'messages数={len(anthropic_payload.get("messages", []))}')
def _handle_responses_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理原生 Responses 后端的非流式返回。"""
payload['stream'] = False
resp, err = forward_request(url, headers, payload)
if err:
return err
headers = build_anthropic_headers(api_key)
url = f'{url_base.rstrip("/")}/v1/messages'
raw = resp.json()
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
if not is_stream:
anthropic_payload['stream'] = False
resp, err = forward_request(url, headers, anthropic_payload)
if err:
return err
raw = resp.json()
_dbg(f'上游原始响应={json.dumps(raw, ensure_ascii=False, default=str)[:1000]}')
data = messages_to_cc_response(raw)
data['model'] = cursor_model
_dbg(f'Messages→CC 转换后={json.dumps(data, ensure_ascii=False, default=str)[:1000]}')
usage = data.get('usage', {})
logger.info(
f'[CC] 完成 prompt={usage.get("prompt_tokens", 0)} '
f'completion={usage.get("completion_tokens", 0)}'
)
return jsonify(data)
data = responses_to_cc_response(raw, ctx.client_model)
return _finalize_chat_response(ctx, data, debug_label='Responses 转回聊天补全后')
# 流式处理
anthropic_payload['stream'] = True
converter = AnthropicStreamConverter()
_n = [0]
def _handle_responses_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理原生 Responses 后端的流式返回。"""
payload['stream'] = True
converter = ResponsesToCCStreamConverter(model=ctx.client_model)
def generate():
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
yield f'data: {json.dumps({"error": {"message": err, "type": "upstream_error"}})}\n\n'
yield chat_error_chunk(str(err))
return
event_count = 0
for event_type, event_data in iter_responses_sse(resp):
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
for chunk in converter.process_event(event_type, event_data):
if event_count < 10:
_dbg(
f'返回片段#{event_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
yield sse_data_message(chunk)
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
yield sse_data_message('[DONE]')
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any]):
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
payload['model'] = ctx.upstream_model
anthropic_payload = cc_to_messages_request(payload)
_dbg(
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
)
url, headers = build_anthropic_target(ctx)
if ctx.is_stream:
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
def _handle_anthropic_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 Anthropic 后端的非流式返回。"""
payload['stream'] = False
resp, err = forward_request(url, headers, payload)
if err:
return err
raw = resp.json()
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
data = messages_to_cc_response(raw)
return _finalize_chat_response(ctx, data, debug_label='Messages 转回聊天补全后')
def _handle_anthropic_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 Anthropic 后端的流式返回。
这里仍然保留独立的事件级转换器而不是先落成完整响应再回放
是为了尽量保持 Cursor 端的流式体验和工具调用时序
"""
payload['stream'] = True
converter = AnthropicStreamConverter()
def generate():
resp, err = forward_request(url, headers, payload, stream=True)
if err:
yield chat_error_chunk(str(err))
return
event_count = 0
for event_type, event_data in iter_anthropic_sse(resp):
if _n[0] < 10:
_dbg(f'上游事件#{_n[0]} {event_type}={json.dumps(event_data, ensure_ascii=False, default=str)[:500]}')
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
for chunk_str in converter.process_event(event_type, event_data):
try:
chunk_obj = json.loads(chunk_str)
chunk_obj['model'] = cursor_model
chunk_str = json.dumps(chunk_obj)
chunk_obj['model'] = ctx.client_model
chunk_str = json.dumps(chunk_obj, ensure_ascii=False)
except (json.JSONDecodeError, TypeError):
pass
if _n[0] < 10:
_dbg(f'发给Cursor chunk#{_n[0]}={chunk_str[:500]}')
yield f'data: {chunk_str}\n\n'
_n[0] += 1
if event_count < 10:
_dbg(f'返回片段#{event_count}={chunk_str[:500]}')
yield sse_data_message(chunk_str)
_dbg(f'流结束,共 {_n[0]} 个事件')
yield 'data: [DONE]\n\n'
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
yield sse_data_message('[DONE]')
return sse_response(generate())
def _log_messages(payload):
"""记录请求中的消息摘要"""
for i, msg in enumerate(payload.get('messages', [])):
role = msg.get('role', '?')
content = msg.get('content')
def _finalize_chat_response(
ctx: RouteContext,
data: dict[str, Any],
*,
debug_label: str,
):
"""统一收尾非流式聊天补全响应。
三条后端链路最终都会回到 Chat Completions 格式因此这里集中做
- 回填给 Cursor 展示的模型名
- 输出统一调试日志
- 输出统一令牌统计日志
"""
data['model'] = ctx.client_model
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
return jsonify(data)
def _log_messages(payload: dict[str, Any]) -> None:
"""记录消息摘要,方便排查请求形态是否符合预期。"""
for index, message in enumerate(payload.get('messages', [])):
role = message.get('role', '?')
content = message.get('content')
extra = ''
if 'tool_calls' in msg:
extra += f' tool_calls={len(msg["tool_calls"])}'
if msg.get('tool_call_id'):
extra += f' tool_call_id={msg["tool_call_id"]}'
if 'tool_calls' in message:
extra += f' 工具调用数={len(message["tool_calls"])}'
if message.get('tool_call_id'):
extra += f' 工具调用ID={message["tool_call_id"]}'
if isinstance(content, list):
info = f'list[{len(content)}]'
content_info = f'列表[{len(content)}]'
elif isinstance(content, str):
info = f'str[{len(content)}]'
content_info = f'文本[{len(content)}]'
else:
info = type(content).__name__
logger.info(f' 消息[{i}] {role} {info}{extra}')
content_info = type(content).__name__
logger.info(' 消息[%s] 角色=%s 内容=%s%s', index, role, content_info, extra)

118
routes/common.py Normal file
View file

@ -0,0 +1,118 @@
"""路由层公共辅助
收敛多个数据面路由都会用到的上下文解析上游目标构造日志输出和
SSE 消息拼装逻辑避免 `chat.py` `responses.py` 各自维护重复实现
"""
from __future__ import annotations
from dataclasses import dataclass
import json
import logging
from typing import Any
import settings
from utils.http import build_anthropic_headers, build_openai_headers
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class RouteContext:
"""数据面路由使用的标准请求上下文。"""
client_model: str
upstream_model: str
backend: str
target_url: str
api_key: str
is_stream: bool
def build_route_context(client_model: str, is_stream: bool) -> RouteContext:
"""解析模型映射,得到当前请求的统一路由上下文。"""
mapping = settings.resolve_model(client_model)
return RouteContext(
client_model=client_model,
upstream_model=mapping['upstream_model'],
backend=mapping['backend'],
target_url=mapping['target_url'],
api_key=mapping['api_key'],
is_stream=is_stream,
)
def build_openai_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 OpenAI 兼容后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/chat/completions'
headers = build_openai_headers(ctx.api_key)
return url, headers
def build_responses_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 OpenAI Responses 后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/responses'
headers = build_openai_headers(ctx.api_key)
return url, headers
def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
"""根据路由上下文生成 Anthropic 后端的地址和请求头。"""
url = f'{ctx.target_url.rstrip("/")}/v1/messages'
headers = build_anthropic_headers(ctx.api_key)
return url, headers
def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None:
"""统一输出路由级日志,避免不同入口的日志格式逐渐漂移。"""
parts = [
f'[{route_name}]',
f'模型={ctx.client_model}',
f'上游模型={ctx.upstream_model}',
f'后端={ctx.backend}',
f'流式={ctx.is_stream}',
]
if extra:
parts.append(extra)
logger.info(' '.join(parts))
def log_usage(
route_name: str,
usage: dict[str, Any],
*,
input_key: str,
output_key: str,
) -> None:
"""统一输出令牌统计日志。
不同协议对 usage 字段命名不一致这里只接收字段名不在调用方重复拼接日志文案
"""
logger.info(
'[%s] 请求完成 输入令牌=%s 输出令牌=%s',
route_name,
usage.get(input_key, 0),
usage.get(output_key, 0),
)
def sse_data_message(data: Any) -> str:
"""构造仅包含 data 的 SSE 消息。"""
payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
return f'data: {payload}\n\n'
def sse_event_message(event_type: str, data: Any) -> str:
"""构造带 event 名称的 SSE 消息。"""
payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
return f'event: {event_type}\ndata: {payload}\n\n'
def chat_error_chunk(message: str, error_type: str = 'upstream_error') -> str:
"""构造聊天补全流式接口使用的错误消息。"""
return sse_data_message({'error': {'message': message, 'type': error_type}})
def responses_error_event(message: str) -> str:
"""构造 Responses 流式接口使用的错误事件。"""
return sse_event_message('error', {'error': message})

View file

@ -1,22 +1,37 @@
"""路由: /v1/responses
处理 Cursor GPT/Claude-Opus 等模型发出的 Responses API 格式请求
转换为 CC 格式后分发到对应后端响应再转Responses 格式
处理 Cursor GPTClaude-Opus 等模型发出的 Responses API 请求
请求会先转换为 Chat Completions 中间表示再按后端类型分发最后转换Responses 格式
"""
from __future__ import annotations
import json
import logging
from typing import Any
from flask import Blueprint, request, jsonify
from flask import Blueprint, jsonify, request
import settings
from adapters.responses_adapter import responses_to_cc, cc_to_responses, ResponsesStreamConverter
from adapters.openai_fixer import normalize_request, fix_response, fix_stream_chunk
from adapters.openai_anthropic import cc_to_messages_request, messages_to_cc_response
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
from config import Config
from routes.common import (
RouteContext,
build_anthropic_target,
build_openai_target,
build_responses_target,
build_route_context,
log_route_context,
log_usage,
responses_error_event,
)
from utils.http import (
build_openai_headers, build_anthropic_headers,
forward_request, sse_response,
iter_openai_sse, iter_anthropic_sse,
forward_request,
iter_anthropic_sse,
iter_openai_sse,
iter_responses_sse,
sse_response,
)
from utils.think_tag import ThinkTagExtractor
@ -25,102 +40,272 @@ logger = logging.getLogger(__name__)
bp = Blueprint('responses', __name__)
def _dbg(message: str) -> None:
"""仅在调试模式下输出详细日志。"""
if Config.DEBUG:
logger.info('[响应生成调试] %s', message)
@bp.route('/v1/responses', methods=['POST'])
def responses_endpoint():
"""处理 Responses 请求并按模型映射分发。"""
payload = request.get_json(force=True)
model = payload.get('model', 'unknown')
client_model = payload.get('model', 'unknown')
is_stream = payload.get('stream', False)
mapping = settings.resolve_model(model)
backend = mapping['backend']
upstream = mapping['upstream_model']
url_base = mapping['target_url']
api_key = mapping['api_key']
ctx = build_route_context(client_model, is_stream)
log_route_context('响应生成', ctx)
logger.info(f'[Responses] {model}{upstream} 后端={backend} 流式={is_stream}')
cc_payload = _build_cc_payload(payload, ctx)
# Responses → CC
if ctx.backend == 'openai':
return _handle_openai_backend(ctx, cc_payload)
if ctx.backend == 'responses':
return _handle_responses_backend(ctx, payload)
return _handle_anthropic_backend(ctx, cc_payload)
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
"""将 Responses 请求统一降级为 Chat Completions 中间表示。
这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端都能复用一套
中间协议避免在路由层同时维护两套完全不同的请求编排逻辑
"""
cc_payload = responses_to_cc(payload)
cc_payload['model'] = upstream
if backend == 'openai':
return _via_openai(cc_payload, url_base, api_key, is_stream, model)
else:
return _via_anthropic(cc_payload, url_base, api_key, is_stream, model)
cc_payload['model'] = ctx.upstream_model
_dbg(
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
+ f' 消息数={len(cc_payload.get("messages", []))}'
)
return cc_payload
# ─── OpenAI 后端 ──────────────────────────────────
def _via_openai(cc_payload, url_base, api_key, is_stream, display_model):
"""通过 OpenAI 后端处理"""
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
cc_payload = normalize_request(cc_payload)
headers = build_openai_headers(api_key)
url = f'{url_base.rstrip("/")}/v1/chat/completions'
_dbg(
f'标准化完成:模型={cc_payload.get("model")} '
f'工具数={len(cc_payload.get("tools", []))}'
)
if not is_stream:
cc_payload['stream'] = False
resp, err = forward_request(url, headers, cc_payload)
if err:
return err
return jsonify(cc_to_responses(fix_response(resp.json()), display_model))
url, headers = build_openai_target(ctx)
# 流式处理
if ctx.is_stream:
return _handle_openai_stream(ctx, cc_payload, url, headers)
return _handle_openai_non_stream(ctx, cc_payload, url, headers)
def _handle_openai_non_stream(
ctx: RouteContext,
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
cc_payload['stream'] = False
resp, err = forward_request(url, headers, cc_payload)
if err:
return err
raw = resp.json()
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
fixed = fix_response(raw)
response_data = cc_to_responses(fixed, ctx.client_model)
return _finalize_responses_response(response_data, debug_label='转换为 Responses 后')
def _handle_openai_stream(
ctx: RouteContext,
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
cc_payload['stream'] = True
converter = ResponsesStreamConverter(model=display_model)
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
yield from converter.start_events()
resp, err = forward_request(url, headers, cc_payload, stream=True)
if err:
yield f'event: error\ndata: {json.dumps({"error": err})}\n\n'
yield responses_error_event(str(err))
return
think_ext = ThinkTagExtractor()
think_extractor = ThinkTagExtractor()
chunk_count = 0
for chunk in iter_openai_sse(resp):
if chunk is None:
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
yield from converter.finalize()
return
if chunk_count < 10:
_dbg(
f'上游原始片段#{chunk_count}='
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
)
chunk = fix_stream_chunk(chunk)
for out in think_ext.process_chunk(chunk):
for out in think_extractor.process_chunk(chunk):
if chunk_count < 10:
_dbg(
f'转换后片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_cc_chunk(out)
chunk_count += 1
return sse_response(generate())
# ─── Anthropic 后端 ───────────────────────────────
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
"""处理走原生 Responses 后端的请求。
当中转站本身就只支持 `/v1/responses` 不需要再绕到聊天补全中间协议
直接转发原生 Responses 请求即可
"""
payload = dict(payload)
payload['model'] = ctx.upstream_model
url, headers = build_responses_target(ctx)
if ctx.is_stream:
return _handle_responses_stream(ctx, payload, url, headers)
return _handle_responses_non_stream(ctx, payload, url, headers)
def _via_anthropic(cc_payload, url_base, api_key, is_stream, display_model):
"""通过 Anthropic 后端处理"""
anthropic_payload = cc_to_messages_request(cc_payload)
headers = build_anthropic_headers(api_key)
url = f'{url_base.rstrip("/")}/v1/messages'
def _handle_responses_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理原生 Responses 后端的非流式返回。"""
payload['stream'] = False
resp, err = forward_request(url, headers, payload)
if err:
return err
if not is_stream:
anthropic_payload['stream'] = False
resp, err = forward_request(url, headers, anthropic_payload)
response_data = resp.json()
response_data['model'] = ctx.client_model
return _finalize_responses_response(response_data, debug_label='原生 Responses 返回后')
def _handle_responses_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理原生 Responses 后端的流式返回。"""
payload['stream'] = True
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
resp, err = forward_request(url, headers, payload, stream=True)
if err:
return err
cc_data = messages_to_cc_response(resp.json())
return jsonify(cc_to_responses(cc_data, display_model))
yield responses_error_event(str(err))
return
# 流式处理Anthropic SSE → Responses SSE跳过 CC 中间态)
event_count = 0
for event_type, event_data in iter_responses_sse(resp):
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_responses_event(event_type, event_data)
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
"""处理走 Anthropic 后端的 Responses 请求。"""
anthropic_payload = cc_to_messages_request(cc_payload)
_dbg(
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
)
url, headers = build_anthropic_target(ctx)
if ctx.is_stream:
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
def _handle_anthropic_non_stream(
ctx: RouteContext,
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 Anthropic 后端的非流式 Responses 返回。"""
anthropic_payload['stream'] = False
resp, err = forward_request(url, headers, anthropic_payload)
if err:
return err
raw = resp.json()
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
cc_data = messages_to_cc_response(raw)
response_data = cc_to_responses(cc_data, ctx.client_model)
return _finalize_responses_response(response_data, debug_label='Messages 转回 Responses 后')
def _handle_anthropic_stream(
ctx: RouteContext,
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
):
"""处理 Anthropic 后端的流式 Responses 返回。
这里直接将 Anthropic SSE 事件映射到 Responses SSE故意跳过 CC 流式中间态
这样可以减少一次事件重组降低流式转换复杂度也更容易保留原始时序
"""
anthropic_payload['stream'] = True
converter = ResponsesStreamConverter(model=display_model)
converter = ResponsesStreamConverter(model=ctx.client_model)
def generate():
yield from converter.start_events()
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
if err:
yield f'event: error\ndata: {json.dumps({"error": err})}\n\n'
yield responses_error_event(str(err))
return
event_count = 0
for event_type, event_data in iter_anthropic_sse(resp):
yield from converter.process_anthropic_event(event_type, event_data)
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_anthropic_event(event_type, event_data)
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
yield from converter.finalize()
return sse_response(generate())
def _finalize_responses_response(response_data: dict[str, Any], *, debug_label: str):
"""统一收尾非流式 Responses 响应。
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象因此这里集中
处理调试日志回填展示模型名以及 usage 日志
"""
response_data['model'] = response_data.get('model') or ''
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
return jsonify(response_data)