优化代码
This commit is contained in:
parent
202731df74
commit
96fbc4da80
15 changed files with 2768 additions and 1383 deletions
444
routes/chat.py
444
routes/chat.py
|
|
@ -1,216 +1,388 @@
|
|||
"""路由: /v1/chat/completions
|
||||
|
||||
处理 Cursor 发来的 OpenAI Chat Completions 格式请求。
|
||||
根据模型映射的 backend 字段分发到 OpenAI 或 Anthropic 后端。
|
||||
根据模型映射的后端类型,转发到 OpenAI 兼容接口、Anthropic Messages 接口,
|
||||
或原生 OpenAI Responses 接口。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from flask import Blueprint, request, jsonify
|
||||
from flask import Blueprint, jsonify, request
|
||||
|
||||
import settings
|
||||
from config import Config
|
||||
from adapters.openai_fixer import normalize_request, fix_response, fix_stream_chunk
|
||||
from adapters.openai_anthropic import (
|
||||
cc_to_messages_request, messages_to_cc_response, AnthropicStreamConverter,
|
||||
from adapters.cc_anthropic_adapter import (
|
||||
AnthropicStreamConverter,
|
||||
cc_to_messages_request,
|
||||
messages_to_cc_response,
|
||||
)
|
||||
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
|
||||
from adapters.responses_cc_adapter import (
|
||||
ResponsesToCCStreamConverter,
|
||||
cc_to_responses_request,
|
||||
responses_to_cc,
|
||||
responses_to_cc_response,
|
||||
)
|
||||
from config import Config
|
||||
from routes.common import (
|
||||
RouteContext,
|
||||
build_anthropic_target,
|
||||
build_openai_target,
|
||||
build_responses_target,
|
||||
build_route_context,
|
||||
chat_error_chunk,
|
||||
log_route_context,
|
||||
log_usage,
|
||||
sse_data_message,
|
||||
)
|
||||
from adapters.responses_adapter import responses_to_cc
|
||||
from utils.http import (
|
||||
build_openai_headers, build_anthropic_headers,
|
||||
forward_request, sse_response,
|
||||
iter_openai_sse, iter_anthropic_sse,
|
||||
forward_request,
|
||||
iter_anthropic_sse,
|
||||
iter_openai_sse,
|
||||
iter_responses_sse,
|
||||
sse_response,
|
||||
)
|
||||
from utils.think_tag import ThinkTagExtractor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _dbg(msg):
|
||||
"""DEBUG 模式下输出详细日志"""
|
||||
if Config.DEBUG:
|
||||
logger.info(f'[调试] {msg}')
|
||||
|
||||
bp = Blueprint('chat', __name__)
|
||||
|
||||
|
||||
def _dbg(message: str) -> None:
|
||||
"""仅在调试模式下输出详细日志。"""
|
||||
if Config.DEBUG:
|
||||
logger.info('[聊天补全调试] %s', message)
|
||||
|
||||
|
||||
@bp.route('/v1/chat/completions', methods=['POST'])
|
||||
def chat_completions():
|
||||
"""处理聊天补全请求并按模型映射分发到不同后端。"""
|
||||
payload = request.get_json(force=True)
|
||||
payload, message_count = _normalize_chat_payload(payload)
|
||||
|
||||
client_model = payload.get('model', 'unknown')
|
||||
is_stream = payload.get('stream', False)
|
||||
# 保留 Cursor 发送的原始模型名,响应时需要回填
|
||||
cursor_model = payload.get('model', 'unknown')
|
||||
msg_count = len(payload.get('messages', []))
|
||||
ctx = build_route_context(client_model, is_stream)
|
||||
|
||||
# 容错:Responses 格式误入 CC 端点
|
||||
if msg_count == 0 and 'input' in payload:
|
||||
logger.info('检测到 Responses 格式(有 input 无 messages),自动转换')
|
||||
payload = responses_to_cc(payload)
|
||||
msg_count = len(payload.get('messages', []))
|
||||
elif msg_count == 0:
|
||||
logger.warning(f'messages 为空, payload keys: {list(payload.keys())}')
|
||||
|
||||
mapping = settings.resolve_model(cursor_model)
|
||||
backend = mapping['backend']
|
||||
upstream = mapping['upstream_model']
|
||||
url_base = mapping['target_url']
|
||||
api_key = mapping['api_key']
|
||||
|
||||
logger.info(
|
||||
f'[CC] {cursor_model} → {upstream} '
|
||||
f'后端={backend} 流式={is_stream} 消息数={msg_count}'
|
||||
)
|
||||
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
||||
_log_messages(payload)
|
||||
|
||||
if backend == 'openai':
|
||||
return _via_openai(payload, upstream, url_base, api_key, is_stream, cursor_model)
|
||||
else:
|
||||
return _via_anthropic(payload, upstream, url_base, api_key, is_stream, cursor_model)
|
||||
if ctx.backend == 'openai':
|
||||
return _handle_openai_backend(ctx, payload)
|
||||
if ctx.backend == 'responses':
|
||||
return _handle_responses_backend(ctx, payload)
|
||||
return _handle_anthropic_backend(ctx, payload)
|
||||
|
||||
|
||||
# ─── OpenAI 后端 ──────────────────────────────────
|
||||
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
|
||||
"""整理聊天补全入口的请求体。
|
||||
|
||||
这里保留了一层兼容逻辑:当 Cursor 或调用方把 Responses 格式误发到
|
||||
`/v1/chat/completions` 时,先降级转换成 Chat Completions,再进入统一主流程。
|
||||
"""
|
||||
message_count = len(payload.get('messages', []))
|
||||
|
||||
if message_count == 0 and 'input' in payload:
|
||||
logger.info('检测到 Responses 格式误入聊天补全接口,已自动转换为 Chat Completions 格式')
|
||||
payload = responses_to_cc(payload)
|
||||
message_count = len(payload.get('messages', []))
|
||||
elif message_count == 0:
|
||||
logger.warning('消息列表为空,请求字段=%s', list(payload.keys()))
|
||||
|
||||
return payload, message_count
|
||||
|
||||
|
||||
def _via_openai(payload, upstream, url_base, api_key, is_stream, cursor_model):
|
||||
"""通过 OpenAI 兼容后端转发"""
|
||||
_dbg(f'Cursor 原始请求 keys={list(payload.keys())} '
|
||||
f'其他字段={json.dumps({k: v for k, v in payload.items() if k != "messages"}, ensure_ascii=False, default=str)[:500]}')
|
||||
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
|
||||
_dbg(
|
||||
'原始请求字段=' + str(list(payload.keys())) + ' '
|
||||
+ '附加字段='
|
||||
+ json.dumps(
|
||||
{k: v for k, v in payload.items() if k != 'messages'},
|
||||
ensure_ascii=False,
|
||||
default=str,
|
||||
)[:500]
|
||||
)
|
||||
|
||||
payload = normalize_request(payload, upstream)
|
||||
_dbg(f'normalize 后 model={payload.get("model")} tools数={len(payload.get("tools", []))}')
|
||||
payload = normalize_request(payload, ctx.upstream_model)
|
||||
_dbg(
|
||||
f'标准化完成:模型={payload.get("model")} '
|
||||
f'工具数={len(payload.get("tools", []))}'
|
||||
)
|
||||
|
||||
headers = build_openai_headers(api_key)
|
||||
url = f'{url_base.rstrip("/")}/v1/chat/completions'
|
||||
url, headers = build_openai_target(ctx)
|
||||
|
||||
if not is_stream:
|
||||
payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
return err
|
||||
raw = resp.json()
|
||||
_dbg(f'上游原始响应={json.dumps(raw, ensure_ascii=False, default=str)[:1000]}')
|
||||
data = fix_response(raw)
|
||||
data['model'] = cursor_model
|
||||
_dbg(f'修复后响应={json.dumps(data, ensure_ascii=False, default=str)[:1000]}')
|
||||
usage = data.get('usage', {})
|
||||
logger.info(
|
||||
f'[CC] 完成 prompt={usage.get("prompt_tokens", 0)} '
|
||||
f'completion={usage.get("completion_tokens", 0)}'
|
||||
)
|
||||
return jsonify(data)
|
||||
if ctx.is_stream:
|
||||
return _handle_openai_stream(ctx, payload, url, headers)
|
||||
return _handle_openai_non_stream(ctx, payload, url, headers)
|
||||
|
||||
# 流式处理
|
||||
|
||||
def _handle_openai_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = fix_response(raw)
|
||||
return _finalize_chat_response(ctx, data, debug_label='修复后响应')
|
||||
|
||||
|
||||
def _handle_openai_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
_n = [0]
|
||||
|
||||
def generate():
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
yield f'data: {json.dumps({"error": {"message": err, "type": "upstream_error"}})}\n\n'
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
think_ext = ThinkTagExtractor()
|
||||
think_extractor = ThinkTagExtractor()
|
||||
chunk_count = 0
|
||||
|
||||
for chunk in iter_openai_sse(resp):
|
||||
if chunk is None: # [DONE]
|
||||
_dbg(f'流结束,共 {_n[0]} 个 chunk')
|
||||
yield 'data: [DONE]\n\n'
|
||||
if chunk is None:
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
yield sse_data_message('[DONE]')
|
||||
return
|
||||
|
||||
if _n[0] < 10:
|
||||
_dbg(f'上游原始 chunk#{_n[0]}={json.dumps(chunk, ensure_ascii=False, default=str)[:500]}')
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游原始片段#{chunk_count}='
|
||||
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
chunk = fix_stream_chunk(chunk)
|
||||
chunk['model'] = cursor_model
|
||||
chunk['model'] = ctx.client_model
|
||||
|
||||
for out in think_ext.process_chunk(chunk):
|
||||
if _n[0] < 10:
|
||||
_dbg(f'发给Cursor chunk#{_n[0]}={json.dumps(out, ensure_ascii=False, default=str)[:500]}')
|
||||
yield f'data: {json.dumps(out)}\n\n'
|
||||
for out in think_extractor.process_chunk(chunk):
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'返回片段#{chunk_count}='
|
||||
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield sse_data_message(out)
|
||||
|
||||
_n[0] += 1
|
||||
chunk_count += 1
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
# ─── Anthropic 后端 ───────────────────────────────
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
"""处理走原生 Responses 后端的聊天补全请求。
|
||||
|
||||
当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求,
|
||||
返回时再转换回聊天补全协议。
|
||||
"""
|
||||
responses_payload = cc_to_responses_request(payload)
|
||||
responses_payload['model'] = ctx.upstream_model
|
||||
_dbg(
|
||||
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
||||
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
||||
)
|
||||
|
||||
url, headers = build_responses_target(ctx)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_responses_stream(ctx, responses_payload, url, headers)
|
||||
return _handle_responses_non_stream(ctx, responses_payload, url, headers)
|
||||
|
||||
|
||||
def _via_anthropic(payload, upstream, url_base, api_key, is_stream, cursor_model):
|
||||
"""通过 Anthropic 后端转发(CC → Messages → CC)"""
|
||||
payload['model'] = upstream
|
||||
anthropic_payload = cc_to_messages_request(payload)
|
||||
_dbg(f'CC→Messages 转换后 keys={list(anthropic_payload.keys())} '
|
||||
f'messages数={len(anthropic_payload.get("messages", []))}')
|
||||
def _handle_responses_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理原生 Responses 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
headers = build_anthropic_headers(api_key)
|
||||
url = f'{url_base.rstrip("/")}/v1/messages'
|
||||
raw = resp.json()
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
if not is_stream:
|
||||
anthropic_payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, anthropic_payload)
|
||||
if err:
|
||||
return err
|
||||
raw = resp.json()
|
||||
_dbg(f'上游原始响应={json.dumps(raw, ensure_ascii=False, default=str)[:1000]}')
|
||||
data = messages_to_cc_response(raw)
|
||||
data['model'] = cursor_model
|
||||
_dbg(f'Messages→CC 转换后={json.dumps(data, ensure_ascii=False, default=str)[:1000]}')
|
||||
usage = data.get('usage', {})
|
||||
logger.info(
|
||||
f'[CC] 完成 prompt={usage.get("prompt_tokens", 0)} '
|
||||
f'completion={usage.get("completion_tokens", 0)}'
|
||||
)
|
||||
return jsonify(data)
|
||||
data = responses_to_cc_response(raw, ctx.client_model)
|
||||
return _finalize_chat_response(ctx, data, debug_label='Responses 转回聊天补全后')
|
||||
|
||||
# 流式处理
|
||||
anthropic_payload['stream'] = True
|
||||
converter = AnthropicStreamConverter()
|
||||
_n = [0]
|
||||
|
||||
def _handle_responses_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理原生 Responses 后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
converter = ResponsesToCCStreamConverter(model=ctx.client_model)
|
||||
|
||||
def generate():
|
||||
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
yield f'data: {json.dumps({"error": {"message": err, "type": "upstream_error"}})}\n\n'
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
for event_type, event_data in iter_responses_sse(resp):
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
for chunk in converter.process_event(event_type, event_data):
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'返回片段#{event_count}='
|
||||
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield sse_data_message(chunk)
|
||||
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
yield sse_data_message('[DONE]')
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
|
||||
payload['model'] = ctx.upstream_model
|
||||
anthropic_payload = cc_to_messages_request(payload)
|
||||
_dbg(
|
||||
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
|
||||
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
|
||||
)
|
||||
|
||||
url, headers = build_anthropic_target(ctx)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
|
||||
|
||||
|
||||
def _handle_anthropic_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 Anthropic 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = messages_to_cc_response(raw)
|
||||
return _finalize_chat_response(ctx, data, debug_label='Messages 转回聊天补全后')
|
||||
|
||||
|
||||
def _handle_anthropic_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 Anthropic 后端的流式返回。
|
||||
|
||||
这里仍然保留独立的事件级转换器,而不是先落成完整响应再回放,
|
||||
是为了尽量保持 Cursor 端的流式体验和工具调用时序。
|
||||
"""
|
||||
payload['stream'] = True
|
||||
converter = AnthropicStreamConverter()
|
||||
|
||||
def generate():
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
for event_type, event_data in iter_anthropic_sse(resp):
|
||||
if _n[0] < 10:
|
||||
_dbg(f'上游事件#{_n[0]} {event_type}={json.dumps(event_data, ensure_ascii=False, default=str)[:500]}')
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
for chunk_str in converter.process_event(event_type, event_data):
|
||||
try:
|
||||
chunk_obj = json.loads(chunk_str)
|
||||
chunk_obj['model'] = cursor_model
|
||||
chunk_str = json.dumps(chunk_obj)
|
||||
chunk_obj['model'] = ctx.client_model
|
||||
chunk_str = json.dumps(chunk_obj, ensure_ascii=False)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if _n[0] < 10:
|
||||
_dbg(f'发给Cursor chunk#{_n[0]}={chunk_str[:500]}')
|
||||
yield f'data: {chunk_str}\n\n'
|
||||
|
||||
_n[0] += 1
|
||||
if event_count < 10:
|
||||
_dbg(f'返回片段#{event_count}={chunk_str[:500]}')
|
||||
yield sse_data_message(chunk_str)
|
||||
|
||||
_dbg(f'流结束,共 {_n[0]} 个事件')
|
||||
yield 'data: [DONE]\n\n'
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
yield sse_data_message('[DONE]')
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _log_messages(payload):
|
||||
"""记录请求中的消息摘要"""
|
||||
for i, msg in enumerate(payload.get('messages', [])):
|
||||
role = msg.get('role', '?')
|
||||
content = msg.get('content')
|
||||
def _finalize_chat_response(
|
||||
ctx: RouteContext,
|
||||
data: dict[str, Any],
|
||||
*,
|
||||
debug_label: str,
|
||||
):
|
||||
"""统一收尾非流式聊天补全响应。
|
||||
|
||||
三条后端链路最终都会回到 Chat Completions 格式,因此这里集中做:
|
||||
- 回填给 Cursor 展示的模型名
|
||||
- 输出统一调试日志
|
||||
- 输出统一令牌统计日志
|
||||
"""
|
||||
data['model'] = ctx.client_model
|
||||
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
|
||||
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
def _log_messages(payload: dict[str, Any]) -> None:
|
||||
"""记录消息摘要,方便排查请求形态是否符合预期。"""
|
||||
for index, message in enumerate(payload.get('messages', [])):
|
||||
role = message.get('role', '?')
|
||||
content = message.get('content')
|
||||
extra = ''
|
||||
if 'tool_calls' in msg:
|
||||
extra += f' tool_calls={len(msg["tool_calls"])}'
|
||||
if msg.get('tool_call_id'):
|
||||
extra += f' tool_call_id={msg["tool_call_id"]}'
|
||||
|
||||
if 'tool_calls' in message:
|
||||
extra += f' 工具调用数={len(message["tool_calls"])}'
|
||||
if message.get('tool_call_id'):
|
||||
extra += f' 工具调用ID={message["tool_call_id"]}'
|
||||
|
||||
if isinstance(content, list):
|
||||
info = f'list[{len(content)}]'
|
||||
content_info = f'列表[{len(content)}]'
|
||||
elif isinstance(content, str):
|
||||
info = f'str[{len(content)}]'
|
||||
content_info = f'文本[{len(content)}]'
|
||||
else:
|
||||
info = type(content).__name__
|
||||
logger.info(f' 消息[{i}] {role} {info}{extra}')
|
||||
content_info = type(content).__name__
|
||||
|
||||
logger.info(' 消息[%s] 角色=%s 内容=%s%s', index, role, content_info, extra)
|
||||
|
|
|
|||
118
routes/common.py
Normal file
118
routes/common.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
"""路由层公共辅助
|
||||
|
||||
收敛多个数据面路由都会用到的上下文解析、上游目标构造、日志输出和
|
||||
SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复实现。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import settings
|
||||
from utils.http import build_anthropic_headers, build_openai_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RouteContext:
|
||||
"""数据面路由使用的标准请求上下文。"""
|
||||
|
||||
client_model: str
|
||||
upstream_model: str
|
||||
backend: str
|
||||
target_url: str
|
||||
api_key: str
|
||||
is_stream: bool
|
||||
|
||||
|
||||
def build_route_context(client_model: str, is_stream: bool) -> RouteContext:
|
||||
"""解析模型映射,得到当前请求的统一路由上下文。"""
|
||||
mapping = settings.resolve_model(client_model)
|
||||
return RouteContext(
|
||||
client_model=client_model,
|
||||
upstream_model=mapping['upstream_model'],
|
||||
backend=mapping['backend'],
|
||||
target_url=mapping['target_url'],
|
||||
api_key=mapping['api_key'],
|
||||
is_stream=is_stream,
|
||||
)
|
||||
|
||||
|
||||
def build_openai_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
|
||||
"""根据路由上下文生成 OpenAI 兼容后端的地址和请求头。"""
|
||||
url = f'{ctx.target_url.rstrip("/")}/v1/chat/completions'
|
||||
headers = build_openai_headers(ctx.api_key)
|
||||
return url, headers
|
||||
|
||||
|
||||
def build_responses_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
|
||||
"""根据路由上下文生成 OpenAI Responses 后端的地址和请求头。"""
|
||||
url = f'{ctx.target_url.rstrip("/")}/v1/responses'
|
||||
headers = build_openai_headers(ctx.api_key)
|
||||
return url, headers
|
||||
|
||||
|
||||
def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
|
||||
"""根据路由上下文生成 Anthropic 后端的地址和请求头。"""
|
||||
url = f'{ctx.target_url.rstrip("/")}/v1/messages'
|
||||
headers = build_anthropic_headers(ctx.api_key)
|
||||
return url, headers
|
||||
|
||||
|
||||
def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None:
|
||||
"""统一输出路由级日志,避免不同入口的日志格式逐渐漂移。"""
|
||||
parts = [
|
||||
f'[{route_name}]',
|
||||
f'模型={ctx.client_model}',
|
||||
f'上游模型={ctx.upstream_model}',
|
||||
f'后端={ctx.backend}',
|
||||
f'流式={ctx.is_stream}',
|
||||
]
|
||||
if extra:
|
||||
parts.append(extra)
|
||||
logger.info(' '.join(parts))
|
||||
|
||||
|
||||
def log_usage(
|
||||
route_name: str,
|
||||
usage: dict[str, Any],
|
||||
*,
|
||||
input_key: str,
|
||||
output_key: str,
|
||||
) -> None:
|
||||
"""统一输出令牌统计日志。
|
||||
|
||||
不同协议对 usage 字段命名不一致,这里只接收字段名,不在调用方重复拼接日志文案。
|
||||
"""
|
||||
logger.info(
|
||||
'[%s] 请求完成 输入令牌=%s 输出令牌=%s',
|
||||
route_name,
|
||||
usage.get(input_key, 0),
|
||||
usage.get(output_key, 0),
|
||||
)
|
||||
|
||||
|
||||
def sse_data_message(data: Any) -> str:
|
||||
"""构造仅包含 data 的 SSE 消息。"""
|
||||
payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
|
||||
return f'data: {payload}\n\n'
|
||||
|
||||
|
||||
def sse_event_message(event_type: str, data: Any) -> str:
|
||||
"""构造带 event 名称的 SSE 消息。"""
|
||||
payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
|
||||
return f'event: {event_type}\ndata: {payload}\n\n'
|
||||
|
||||
|
||||
def chat_error_chunk(message: str, error_type: str = 'upstream_error') -> str:
|
||||
"""构造聊天补全流式接口使用的错误消息。"""
|
||||
return sse_data_message({'error': {'message': message, 'type': error_type}})
|
||||
|
||||
|
||||
def responses_error_event(message: str) -> str:
|
||||
"""构造 Responses 流式接口使用的错误事件。"""
|
||||
return sse_event_message('error', {'error': message})
|
||||
|
|
@ -1,22 +1,37 @@
|
|||
"""路由: /v1/responses
|
||||
|
||||
处理 Cursor 对 GPT/Claude-Opus 等模型发出的 Responses API 格式请求。
|
||||
转换为 CC 格式后分发到对应后端,响应再转回 Responses 格式。
|
||||
处理 Cursor 对 GPT、Claude-Opus 等模型发出的 Responses API 请求。
|
||||
请求会先转换为 Chat Completions 中间表示,再按后端类型分发,最后转换回 Responses 格式。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from flask import Blueprint, request, jsonify
|
||||
from flask import Blueprint, jsonify, request
|
||||
|
||||
import settings
|
||||
from adapters.responses_adapter import responses_to_cc, cc_to_responses, ResponsesStreamConverter
|
||||
from adapters.openai_fixer import normalize_request, fix_response, fix_stream_chunk
|
||||
from adapters.openai_anthropic import cc_to_messages_request, messages_to_cc_response
|
||||
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
|
||||
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
|
||||
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
|
||||
from config import Config
|
||||
from routes.common import (
|
||||
RouteContext,
|
||||
build_anthropic_target,
|
||||
build_openai_target,
|
||||
build_responses_target,
|
||||
build_route_context,
|
||||
log_route_context,
|
||||
log_usage,
|
||||
responses_error_event,
|
||||
)
|
||||
from utils.http import (
|
||||
build_openai_headers, build_anthropic_headers,
|
||||
forward_request, sse_response,
|
||||
iter_openai_sse, iter_anthropic_sse,
|
||||
forward_request,
|
||||
iter_anthropic_sse,
|
||||
iter_openai_sse,
|
||||
iter_responses_sse,
|
||||
sse_response,
|
||||
)
|
||||
from utils.think_tag import ThinkTagExtractor
|
||||
|
||||
|
|
@ -25,102 +40,272 @@ logger = logging.getLogger(__name__)
|
|||
bp = Blueprint('responses', __name__)
|
||||
|
||||
|
||||
def _dbg(message: str) -> None:
|
||||
"""仅在调试模式下输出详细日志。"""
|
||||
if Config.DEBUG:
|
||||
logger.info('[响应生成调试] %s', message)
|
||||
|
||||
|
||||
@bp.route('/v1/responses', methods=['POST'])
|
||||
def responses_endpoint():
|
||||
"""处理 Responses 请求并按模型映射分发。"""
|
||||
payload = request.get_json(force=True)
|
||||
model = payload.get('model', 'unknown')
|
||||
client_model = payload.get('model', 'unknown')
|
||||
is_stream = payload.get('stream', False)
|
||||
|
||||
mapping = settings.resolve_model(model)
|
||||
backend = mapping['backend']
|
||||
upstream = mapping['upstream_model']
|
||||
url_base = mapping['target_url']
|
||||
api_key = mapping['api_key']
|
||||
ctx = build_route_context(client_model, is_stream)
|
||||
log_route_context('响应生成', ctx)
|
||||
|
||||
logger.info(f'[Responses] {model} → {upstream} 后端={backend} 流式={is_stream}')
|
||||
cc_payload = _build_cc_payload(payload, ctx)
|
||||
|
||||
# Responses → CC
|
||||
if ctx.backend == 'openai':
|
||||
return _handle_openai_backend(ctx, cc_payload)
|
||||
if ctx.backend == 'responses':
|
||||
return _handle_responses_backend(ctx, payload)
|
||||
return _handle_anthropic_backend(ctx, cc_payload)
|
||||
|
||||
|
||||
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
|
||||
"""将 Responses 请求统一降级为 Chat Completions 中间表示。
|
||||
|
||||
这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端,都能复用一套
|
||||
中间协议,避免在路由层同时维护两套完全不同的请求编排逻辑。
|
||||
"""
|
||||
cc_payload = responses_to_cc(payload)
|
||||
cc_payload['model'] = upstream
|
||||
|
||||
if backend == 'openai':
|
||||
return _via_openai(cc_payload, url_base, api_key, is_stream, model)
|
||||
else:
|
||||
return _via_anthropic(cc_payload, url_base, api_key, is_stream, model)
|
||||
cc_payload['model'] = ctx.upstream_model
|
||||
_dbg(
|
||||
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
|
||||
+ f' 消息数={len(cc_payload.get("messages", []))}'
|
||||
)
|
||||
return cc_payload
|
||||
|
||||
|
||||
# ─── OpenAI 后端 ──────────────────────────────────
|
||||
|
||||
|
||||
def _via_openai(cc_payload, url_base, api_key, is_stream, display_model):
|
||||
"""通过 OpenAI 后端处理"""
|
||||
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
||||
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
|
||||
cc_payload = normalize_request(cc_payload)
|
||||
headers = build_openai_headers(api_key)
|
||||
url = f'{url_base.rstrip("/")}/v1/chat/completions'
|
||||
_dbg(
|
||||
f'标准化完成:模型={cc_payload.get("model")} '
|
||||
f'工具数={len(cc_payload.get("tools", []))}'
|
||||
)
|
||||
|
||||
if not is_stream:
|
||||
cc_payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, cc_payload)
|
||||
if err:
|
||||
return err
|
||||
return jsonify(cc_to_responses(fix_response(resp.json()), display_model))
|
||||
url, headers = build_openai_target(ctx)
|
||||
|
||||
# 流式处理
|
||||
if ctx.is_stream:
|
||||
return _handle_openai_stream(ctx, cc_payload, url, headers)
|
||||
return _handle_openai_non_stream(ctx, cc_payload, url, headers)
|
||||
|
||||
|
||||
def _handle_openai_non_stream(
|
||||
ctx: RouteContext,
|
||||
cc_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
|
||||
cc_payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, cc_payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
fixed = fix_response(raw)
|
||||
response_data = cc_to_responses(fixed, ctx.client_model)
|
||||
return _finalize_responses_response(response_data, debug_label='转换为 Responses 后')
|
||||
|
||||
|
||||
def _handle_openai_stream(
|
||||
ctx: RouteContext,
|
||||
cc_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
|
||||
cc_payload['stream'] = True
|
||||
converter = ResponsesStreamConverter(model=display_model)
|
||||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||||
|
||||
def generate():
|
||||
yield from converter.start_events()
|
||||
|
||||
resp, err = forward_request(url, headers, cc_payload, stream=True)
|
||||
if err:
|
||||
yield f'event: error\ndata: {json.dumps({"error": err})}\n\n'
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
think_ext = ThinkTagExtractor()
|
||||
think_extractor = ThinkTagExtractor()
|
||||
chunk_count = 0
|
||||
|
||||
for chunk in iter_openai_sse(resp):
|
||||
if chunk is None:
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
yield from converter.finalize()
|
||||
return
|
||||
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游原始片段#{chunk_count}='
|
||||
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
chunk = fix_stream_chunk(chunk)
|
||||
for out in think_ext.process_chunk(chunk):
|
||||
for out in think_extractor.process_chunk(chunk):
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'转换后片段#{chunk_count}='
|
||||
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield from converter.process_cc_chunk(out)
|
||||
|
||||
chunk_count += 1
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
# ─── Anthropic 后端 ───────────────────────────────
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
"""处理走原生 Responses 后端的请求。
|
||||
|
||||
当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议,
|
||||
直接转发原生 Responses 请求即可。
|
||||
"""
|
||||
payload = dict(payload)
|
||||
payload['model'] = ctx.upstream_model
|
||||
url, headers = build_responses_target(ctx)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_responses_stream(ctx, payload, url, headers)
|
||||
return _handle_responses_non_stream(ctx, payload, url, headers)
|
||||
|
||||
|
||||
def _via_anthropic(cc_payload, url_base, api_key, is_stream, display_model):
|
||||
"""通过 Anthropic 后端处理"""
|
||||
anthropic_payload = cc_to_messages_request(cc_payload)
|
||||
headers = build_anthropic_headers(api_key)
|
||||
url = f'{url_base.rstrip("/")}/v1/messages'
|
||||
def _handle_responses_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理原生 Responses 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
if not is_stream:
|
||||
anthropic_payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, anthropic_payload)
|
||||
response_data = resp.json()
|
||||
response_data['model'] = ctx.client_model
|
||||
return _finalize_responses_response(response_data, debug_label='原生 Responses 返回后')
|
||||
|
||||
|
||||
def _handle_responses_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理原生 Responses 后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||||
|
||||
def generate():
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
return err
|
||||
cc_data = messages_to_cc_response(resp.json())
|
||||
return jsonify(cc_to_responses(cc_data, display_model))
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
# 流式处理:Anthropic SSE → Responses SSE(跳过 CC 中间态)
|
||||
event_count = 0
|
||||
for event_type, event_data in iter_responses_sse(resp):
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield from converter.process_responses_event(event_type, event_data)
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
||||
"""处理走 Anthropic 后端的 Responses 请求。"""
|
||||
anthropic_payload = cc_to_messages_request(cc_payload)
|
||||
_dbg(
|
||||
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
|
||||
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
|
||||
)
|
||||
|
||||
url, headers = build_anthropic_target(ctx)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
|
||||
|
||||
|
||||
def _handle_anthropic_non_stream(
|
||||
ctx: RouteContext,
|
||||
anthropic_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 Anthropic 后端的非流式 Responses 返回。"""
|
||||
anthropic_payload['stream'] = False
|
||||
resp, err = forward_request(url, headers, anthropic_payload)
|
||||
if err:
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
cc_data = messages_to_cc_response(raw)
|
||||
response_data = cc_to_responses(cc_data, ctx.client_model)
|
||||
return _finalize_responses_response(response_data, debug_label='Messages 转回 Responses 后')
|
||||
|
||||
|
||||
def _handle_anthropic_stream(
|
||||
ctx: RouteContext,
|
||||
anthropic_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
):
|
||||
"""处理 Anthropic 后端的流式 Responses 返回。
|
||||
|
||||
这里直接将 Anthropic SSE 事件映射到 Responses SSE,故意跳过 CC 流式中间态,
|
||||
这样可以减少一次事件重组,降低流式转换复杂度,也更容易保留原始时序。
|
||||
"""
|
||||
anthropic_payload['stream'] = True
|
||||
converter = ResponsesStreamConverter(model=display_model)
|
||||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||||
|
||||
def generate():
|
||||
yield from converter.start_events()
|
||||
|
||||
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
|
||||
if err:
|
||||
yield f'event: error\ndata: {json.dumps({"error": err})}\n\n'
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
for event_type, event_data in iter_anthropic_sse(resp):
|
||||
yield from converter.process_anthropic_event(event_type, event_data)
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
yield from converter.process_anthropic_event(event_type, event_data)
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
yield from converter.finalize()
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _finalize_responses_response(response_data: dict[str, Any], *, debug_label: str):
|
||||
"""统一收尾非流式 Responses 响应。
|
||||
|
||||
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象,因此这里集中
|
||||
处理调试日志、回填展示模型名以及 usage 日志。
|
||||
"""
|
||||
response_data['model'] = response_data.get('model') or ''
|
||||
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
|
||||
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
|
||||
return jsonify(response_data)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue