支持gimini格式,优化debug日志

This commit is contained in:
h88782481 2026-03-14 09:27:15 +08:00
parent e726f11bad
commit 4de6db13f9
16 changed files with 1783 additions and 55 deletions

View file

@ -13,6 +13,7 @@ from typing import Any
from flask import Blueprint, jsonify, request
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
from config import Config
@ -21,6 +22,7 @@ from routes.common import (
apply_body_modifications,
apply_header_modifications,
build_anthropic_target,
build_gemini_target,
build_openai_target,
build_responses_target,
build_route_context,
@ -33,12 +35,27 @@ from routes.common import (
)
from utils.http import (
forward_request,
gen_id,
iter_anthropic_sse,
iter_gemini_sse,
iter_openai_sse,
iter_responses_sse,
sse_response,
)
from utils.request_logger import (
append_client_event,
append_upstream_event,
attach_client_response,
attach_error,
attach_upstream_request,
attach_upstream_response,
finalize_turn,
set_stream_summary,
start_turn,
)
from utils.think_tag import ThinkTagExtractor
from utils.thinking_cache import thinking_cache
from utils.usage_tracker import usage_tracker
logger = logging.getLogger(__name__)
@ -54,20 +71,33 @@ def _dbg(message: str) -> None:
@bp.route('/v1/responses', methods=['POST'])
def responses_endpoint():
"""处理 Responses 请求并按模型映射分发。"""
payload = request.get_json(force=True)
original_payload = request.get_json(force=True)
payload = json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))
client_model = payload.get('model', 'unknown')
is_stream = payload.get('stream', False)
ctx = build_route_context(client_model, is_stream)
turn = start_turn(
route='responses',
client_model=client_model,
backend=ctx.backend,
stream=is_stream,
client_request=original_payload,
request_headers=dict(request.headers),
target_url=ctx.target_url,
upstream_model=ctx.upstream_model,
)
log_route_context('响应生成', ctx)
cc_payload = _build_cc_payload(payload, ctx)
if ctx.backend == 'openai':
return _handle_openai_backend(ctx, cc_payload)
return _handle_openai_backend(ctx, cc_payload, turn)
if ctx.backend == 'responses':
return _handle_responses_backend(ctx, payload)
return _handle_anthropic_backend(ctx, cc_payload)
return _handle_responses_backend(ctx, payload, turn)
if ctx.backend == 'gemini':
return _handle_gemini_backend(ctx, cc_payload, turn)
return _handle_anthropic_backend(ctx, cc_payload, turn)
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
@ -78,6 +108,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A
"""
cc_payload = responses_to_cc(payload)
cc_payload['model'] = ctx.upstream_model
cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
_dbg(
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
@ -86,7 +117,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A
return cc_payload
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]):
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
cc_payload = normalize_request(cc_payload)
_dbg(
@ -99,8 +130,8 @@ def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_openai_stream(ctx, cc_payload, url, headers)
return _handle_openai_non_stream(ctx, cc_payload, url, headers)
return _handle_openai_stream(ctx, cc_payload, url, headers, turn)
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn)
def _handle_openai_non_stream(
@ -108,19 +139,24 @@ def _handle_openai_non_stream(
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any],
):
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
cc_payload['stream'] = False
attach_upstream_request(turn, cc_payload, headers)
resp, err = forward_request(url, headers, cc_payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
fixed = fix_response(raw)
response_data = cc_to_responses(fixed, ctx.client_model)
return _finalize_responses_response(response_data, debug_label='转换为 Responses 后')
return _finalize_responses_response(response_data, turn=turn, debug_label='转换为 Responses 后')
def _handle_openai_stream(
@ -128,6 +164,7 @@ def _handle_openai_stream(
cc_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
cc_payload['stream'] = True
@ -137,20 +174,41 @@ def _handle_openai_stream(
"""消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。"""
yield from converter.start_events()
attach_upstream_request(turn, cc_payload, headers)
resp, err = forward_request(url, headers, cc_payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
think_extractor = ThinkTagExtractor()
chunk_count = 0
client_events: list[str] = []
for chunk in iter_openai_sse(resp):
if chunk is None:
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
yield from converter.finalize()
finalized_events = converter.finalize()
for item in finalized_events:
client_events.append(item)
append_client_event(turn, {'type': 'responses_event', 'data': item})
yield item
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'events': client_events,
})
finalize_turn(turn)
return
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
if chunk_count < 10:
_dbg(
f'上游原始片段#{chunk_count}='
@ -159,19 +217,22 @@ def _handle_openai_stream(
chunk = fix_stream_chunk(chunk)
for out in think_extractor.process_chunk(chunk):
if chunk_count < 10:
_dbg(
f'转换后片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_cc_chunk(out)
for evt in converter.process_cc_chunk(out):
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
if chunk_count < 10:
_dbg(
f'转换后片段#{chunk_count}='
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
)
yield evt
chunk_count += 1
return sse_response(generate())
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走原生 Responses 后端的请求。
当中转站本身就只支持 `/v1/responses` 不需要再绕到聊天补全中间协议
@ -185,8 +246,8 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_responses_stream(ctx, payload, url, headers)
return _handle_responses_non_stream(ctx, payload, url, headers)
return _handle_responses_stream(ctx, payload, url, headers, turn)
return _handle_responses_non_stream(ctx, payload, url, headers, turn)
def _handle_responses_non_stream(
@ -194,16 +255,21 @@ def _handle_responses_non_stream(
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的非流式返回。"""
payload['stream'] = False
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
response_data = resp.json()
attach_upstream_response(turn, response_data)
response_data['model'] = ctx.client_model
return _finalize_responses_response(response_data, debug_label='原生 Responses 返回后')
return _finalize_responses_response(response_data, turn=turn, debug_label='原生 Responses 返回后')
def _handle_responses_stream(
@ -211,6 +277,7 @@ def _handle_responses_stream(
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理原生 Responses 后端的流式返回。"""
payload['stream'] = True
@ -218,27 +285,151 @@ def _handle_responses_stream(
def generate():
"""透传上游原生 Responses 流,并做轻量模型名改写。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
event_count = 0
client_events: list[str] = []
for event_type, event_data in iter_responses_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_responses_event(event_type, event_data)
produced = converter.process_responses_event(event_type, event_data)
for evt in produced:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'event_count': event_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'events': client_events,
})
finalize_turn(turn)
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Gemini Contents 后端的 Responses 请求。"""
gemini_payload = cc_to_gemini_request(cc_payload)
_dbg(
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
+ f' 内容数={len(gemini_payload.get("contents", []))}'
)
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
def _handle_gemini_non_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的非流式 Responses 返回。"""
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
cc_data = gemini_to_cc_response(raw)
response_data = cc_to_responses(cc_data, ctx.client_model)
return _finalize_responses_response(response_data, turn=turn, debug_label='Gemini 转回 Responses 后')
def _handle_gemini_stream(
ctx: RouteContext,
payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Gemini 后端的流式 Responses 返回。"""
converter = ResponsesStreamConverter(model=ctx.client_model)
gemini_converter = GeminiStreamConverter()
def generate():
yield from converter.start_events()
attach_upstream_request(turn, payload, headers)
resp, err = forward_request(url, headers, payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
chunk_count = 0
client_events: list[str] = []
for gemini_chunk in iter_gemini_sse(resp):
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
if chunk_count < 10:
_dbg(
f'上游 Gemini 片段#{chunk_count}='
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
)
for cc_chunk in gemini_converter.process_chunk(gemini_chunk):
for evt in converter.process_cc_chunk(cc_chunk):
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
chunk_count += 1
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
finalized_events = converter.finalize()
for evt in finalized_events:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'chunk_count': chunk_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'events': client_events,
})
finalize_turn(turn)
return sse_response(generate())
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
"""处理走 Anthropic 后端的 Responses 请求。"""
anthropic_payload = cc_to_messages_request(cc_payload)
_dbg(
@ -251,8 +442,8 @@ def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
headers = apply_header_modifications(headers, ctx.header_modifications)
if ctx.is_stream:
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
def _handle_anthropic_non_stream(
@ -260,19 +451,24 @@ def _handle_anthropic_non_stream(
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的非流式 Responses 返回。"""
anthropic_payload['stream'] = False
attach_upstream_request(turn, anthropic_payload, headers)
resp, err = forward_request(url, headers, anthropic_payload)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
finalize_turn(turn)
return err
raw = resp.json()
attach_upstream_response(turn, raw)
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
cc_data = messages_to_cc_response(raw)
response_data = cc_to_responses(cc_data, ctx.client_model)
return _finalize_responses_response(response_data, debug_label='Messages 转回 Responses 后')
return _finalize_responses_response(response_data, turn=turn, debug_label='Messages 转回 Responses 后')
def _handle_anthropic_stream(
@ -280,6 +476,7 @@ def _handle_anthropic_stream(
anthropic_payload: dict[str, Any],
url: str,
headers: dict[str, str],
turn: dict[str, Any] | None,
):
"""处理 Anthropic 后端的流式 Responses 返回。
@ -293,29 +490,54 @@ def _handle_anthropic_stream(
"""消费 Anthropic SSE并直接映射为 Responses 事件序列。"""
yield from converter.start_events()
attach_upstream_request(turn, anthropic_payload, headers)
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
if err:
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
set_stream_summary(turn, {'status': 'error'})
finalize_turn(turn)
yield responses_error_event(str(err))
return
event_count = 0
client_events: list[str] = []
for event_type, event_data in iter_anthropic_sse(resp):
append_upstream_event(turn, {'type': event_type, 'data': event_data})
if event_count < 10:
_dbg(
f'上游事件#{event_count} 类型={event_type} 数据='
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
)
yield from converter.process_anthropic_event(event_type, event_data)
produced = converter.process_anthropic_event(event_type, event_data)
for evt in produced:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
event_count += 1
_dbg(f'流式响应结束,共 {event_count} 个事件')
yield from converter.finalize()
finalized_events = converter.finalize()
for evt in finalized_events:
client_events.append(evt)
append_client_event(turn, {'type': 'responses_event', 'data': evt})
yield evt
usage_tracker.record(ctx.client_model)
set_stream_summary(turn, {
'event_count': event_count,
'client_event_count': len(client_events),
})
attach_client_response(turn, {
'type': 'responses.stream.summary',
'model': ctx.client_model,
'events': client_events,
})
finalize_turn(turn)
return sse_response(generate())
def _finalize_responses_response(response_data: dict[str, Any], *, debug_label: str):
def _finalize_responses_response(response_data: dict[str, Any], *, turn: dict[str, Any], debug_label: str):
"""统一收尾非流式 Responses 响应。
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象因此这里集中
@ -324,4 +546,15 @@ def _finalize_responses_response(response_data: dict[str, Any], *, debug_label:
response_data['model'] = response_data.get('model') or ''
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
usage_tracker.record(
response_data.get('model', ''),
response_data.get('usage'),
input_key='input_tokens',
output_key='output_tokens',
)
attach_client_response(turn, response_data)
finalize_turn(turn, usage=response_data.get('usage'))
return jsonify(response_data)