660 lines
24 KiB
Python
660 lines
24 KiB
Python
"""路由: /v1/responses
|
||
|
||
处理 Cursor 对 GPT、Claude-Opus 等模型发出的 Responses API 请求。
|
||
请求会先转换为 Chat Completions 中间表示,再按后端类型分发,最后转换回 Responses 格式。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
from typing import Any
|
||
|
||
import settings
|
||
from flask import Blueprint, jsonify, request
|
||
|
||
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
|
||
from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response
|
||
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
|
||
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
|
||
from config import Config
|
||
from routes.common import (
|
||
RouteContext,
|
||
apply_body_modifications,
|
||
apply_header_modifications,
|
||
build_anthropic_target,
|
||
build_gemini_target,
|
||
build_openai_target,
|
||
build_responses_target,
|
||
build_route_context,
|
||
ensure_responses_cache_control,
|
||
attach_previous_response_id,
|
||
remember_response_id,
|
||
inject_instructions_anthropic,
|
||
inject_instructions_cc,
|
||
inject_instructions_responses,
|
||
log_route_context,
|
||
log_usage,
|
||
responses_error_event,
|
||
)
|
||
from utils.http import (
|
||
forward_request,
|
||
gen_id,
|
||
iter_anthropic_sse,
|
||
iter_gemini_sse,
|
||
iter_openai_sse,
|
||
iter_responses_sse,
|
||
sse_response,
|
||
)
|
||
from utils.request_logger import (
|
||
append_client_event,
|
||
append_upstream_event,
|
||
attach_client_response,
|
||
attach_error,
|
||
attach_upstream_request,
|
||
attach_upstream_response,
|
||
finalize_turn,
|
||
set_stream_summary,
|
||
start_turn,
|
||
)
|
||
from utils.think_tag import ThinkTagExtractor
|
||
from utils.thinking_cache import thinking_cache
|
||
from utils.usage_tracker import usage_tracker
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
bp = Blueprint('responses', __name__)
|
||
|
||
|
||
def _dbg(message: str) -> None:
|
||
"""仅在调试模式下输出详细日志。"""
|
||
if settings.get_debug_mode() in ('simple', 'verbose'):
|
||
logger.info('[响应生成调试] %s', message)
|
||
|
||
|
||
@bp.route('/v1/responses', methods=['POST'])
|
||
def responses_endpoint():
|
||
"""处理 Responses 请求并按模型映射分发。"""
|
||
original_payload = request.get_json(force=True)
|
||
payload = json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))
|
||
client_model = payload.get('model', 'unknown')
|
||
is_stream = payload.get('stream', False)
|
||
|
||
ctx = build_route_context(client_model, is_stream)
|
||
turn = start_turn(
|
||
route='responses',
|
||
client_model=client_model,
|
||
backend=ctx.backend,
|
||
stream=is_stream,
|
||
client_request=original_payload,
|
||
request_headers=dict(request.headers),
|
||
target_url=ctx.target_url,
|
||
upstream_model=ctx.upstream_model,
|
||
)
|
||
log_route_context('响应生成', ctx)
|
||
|
||
cc_payload = _build_cc_payload(payload, ctx)
|
||
|
||
if ctx.backend == 'openai':
|
||
return _handle_openai_backend(ctx, cc_payload, turn)
|
||
if ctx.backend == 'responses':
|
||
return _handle_responses_backend(ctx, payload, turn)
|
||
if ctx.backend == 'gemini':
|
||
return _handle_gemini_backend(ctx, cc_payload, turn)
|
||
return _handle_anthropic_backend(ctx, cc_payload, turn)
|
||
|
||
|
||
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
|
||
"""将 Responses 请求统一降级为 Chat Completions 中间表示。
|
||
|
||
这样后续无论走 OpenAI 兼容后端还是 Anthropic 后端,都能复用一套
|
||
中间协议,避免在路由层同时维护两套完全不同的请求编排逻辑。
|
||
"""
|
||
cc_payload = responses_to_cc(payload)
|
||
cc_payload['model'] = ctx.upstream_model
|
||
cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
|
||
cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
|
||
_dbg(
|
||
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
|
||
+ f' 消息数={len(cc_payload.get("messages", []))}'
|
||
)
|
||
return cc_payload
|
||
|
||
|
||
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]):
|
||
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
|
||
cc_payload = normalize_request(cc_payload)
|
||
_dbg(
|
||
f'标准化完成:模型={cc_payload.get("model")} '
|
||
f'工具数={len(cc_payload.get("tools", []))}'
|
||
)
|
||
|
||
url, headers = build_openai_target(ctx)
|
||
cc_payload = apply_body_modifications(cc_payload, ctx.body_modifications)
|
||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||
|
||
if ctx.is_stream:
|
||
return _handle_openai_stream(ctx, cc_payload, url, headers, turn)
|
||
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn)
|
||
|
||
|
||
def _handle_openai_non_stream(
|
||
ctx: RouteContext,
|
||
cc_payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any],
|
||
):
|
||
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
|
||
cc_payload['stream'] = False
|
||
attach_upstream_request(turn, cc_payload, headers)
|
||
resp, err = forward_request(url, headers, cc_payload)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||
finalize_turn(turn)
|
||
return err
|
||
|
||
raw = resp.json()
|
||
attach_upstream_response(turn, raw)
|
||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||
|
||
fixed = fix_response(raw)
|
||
response_data = cc_to_responses(fixed, ctx.client_model)
|
||
return _finalize_responses_response(
|
||
response_data,
|
||
client_model=ctx.client_model,
|
||
turn=turn,
|
||
debug_label='转换为 Responses 后',
|
||
)
|
||
|
||
|
||
def _handle_openai_stream(
|
||
ctx: RouteContext,
|
||
cc_payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
|
||
cc_payload['stream'] = True
|
||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||
|
||
def generate():
|
||
"""消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。"""
|
||
yield from converter.start_events()
|
||
|
||
attach_upstream_request(turn, cc_payload, headers)
|
||
resp, err = forward_request(url, headers, cc_payload, stream=True)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||
set_stream_summary(turn, {'status': 'error'})
|
||
finalize_turn(turn)
|
||
yield responses_error_event(str(err))
|
||
return
|
||
|
||
think_extractor = ThinkTagExtractor()
|
||
chunk_count = 0
|
||
client_events: list[str] = []
|
||
|
||
for chunk in iter_openai_sse(resp):
|
||
if chunk is None:
|
||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||
finalized_events = converter.finalize()
|
||
for item in finalized_events:
|
||
client_events.append(item)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': item})
|
||
yield item
|
||
usage_tracker.record(ctx.client_model)
|
||
set_stream_summary(turn, {
|
||
'chunk_count': chunk_count,
|
||
'client_event_count': len(client_events),
|
||
})
|
||
attach_client_response(turn, {
|
||
'type': 'responses.stream.summary',
|
||
'model': ctx.client_model,
|
||
'event_count': len(client_events),
|
||
})
|
||
finalize_turn(turn)
|
||
return
|
||
|
||
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
||
if chunk_count < 10:
|
||
_dbg(
|
||
f'上游原始片段#{chunk_count}='
|
||
+ json.dumps(chunk, ensure_ascii=False, default=str)[:500]
|
||
)
|
||
|
||
chunk = fix_stream_chunk(chunk)
|
||
for out in think_extractor.process_chunk(chunk):
|
||
for evt in converter.process_cc_chunk(out):
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
if chunk_count < 10:
|
||
_dbg(
|
||
f'转换后片段#{chunk_count}='
|
||
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
|
||
)
|
||
yield evt
|
||
|
||
chunk_count += 1
|
||
|
||
return sse_response(generate())
|
||
|
||
|
||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||
"""处理走原生 Responses 后端的请求。
|
||
|
||
当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议,
|
||
直接转发原生 Responses 请求即可。
|
||
"""
|
||
payload = dict(payload)
|
||
payload['model'] = ctx.upstream_model
|
||
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
||
payload = ensure_responses_cache_control(payload)
|
||
payload = attach_previous_response_id(payload)
|
||
url, headers = build_responses_target(ctx)
|
||
payload = apply_body_modifications(payload, ctx.body_modifications)
|
||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||
|
||
if ctx.is_stream:
|
||
return _handle_responses_stream(ctx, payload, url, headers, turn)
|
||
return _handle_responses_non_stream(ctx, payload, url, headers, turn)
|
||
|
||
|
||
def _handle_responses_non_stream(
|
||
ctx: RouteContext,
|
||
payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理原生 Responses 后端的非流式返回。"""
|
||
payload['stream'] = False
|
||
attach_upstream_request(turn, payload, headers)
|
||
resp, err = forward_request(url, headers, payload)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||
finalize_turn(turn)
|
||
return err
|
||
|
||
response_data = resp.json()
|
||
attach_upstream_response(turn, response_data)
|
||
remember_response_id(payload, response_data)
|
||
response_data['model'] = ctx.client_model
|
||
return _finalize_responses_response(
|
||
response_data,
|
||
client_model=ctx.client_model,
|
||
turn=turn,
|
||
debug_label='原生 Responses 返回后',
|
||
)
|
||
|
||
|
||
def _handle_responses_stream(
|
||
ctx: RouteContext,
|
||
payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理原生 Responses 后端的流式返回。"""
|
||
payload['stream'] = True
|
||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||
|
||
def generate():
|
||
"""透传上游原生 Responses 流,并做轻量模型名改写。"""
|
||
attach_upstream_request(turn, payload, headers)
|
||
resp, err = forward_request(url, headers, payload, stream=True)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||
set_stream_summary(turn, {'status': 'error'})
|
||
finalize_turn(turn)
|
||
yield responses_error_event(str(err))
|
||
return
|
||
|
||
event_count = 0
|
||
client_events: list[str] = []
|
||
last_usage: dict[str, Any] | None = None
|
||
for event_type, event_data in iter_responses_sse(resp):
|
||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||
extracted_usage = _extract_responses_usage(event_data)
|
||
if extracted_usage:
|
||
last_usage = extracted_usage
|
||
if event_type == 'response.completed':
|
||
response_obj = event_data.get('response') if isinstance(event_data, dict) else None
|
||
if isinstance(response_obj, dict):
|
||
remember_response_id(payload, response_obj)
|
||
if event_count < 10:
|
||
_dbg(
|
||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||
)
|
||
produced = converter.process_responses_event(event_type, event_data)
|
||
for evt in produced:
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
yield evt
|
||
event_count += 1
|
||
|
||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||
usage_tracker.record(
|
||
ctx.client_model,
|
||
last_usage,
|
||
input_key='input_tokens',
|
||
output_key='output_tokens',
|
||
)
|
||
set_stream_summary(turn, {
|
||
'event_count': event_count,
|
||
'client_event_count': len(client_events),
|
||
'usage': last_usage,
|
||
})
|
||
attach_client_response(turn, {
|
||
'type': 'responses.stream.summary',
|
||
'model': ctx.client_model,
|
||
'event_count': len(client_events),
|
||
'usage': last_usage,
|
||
})
|
||
finalize_turn(turn, usage=last_usage)
|
||
|
||
return sse_response(generate())
|
||
|
||
|
||
def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | None:
|
||
"""从原生 Responses 事件中提取 usage。
|
||
|
||
原生 `/v1/responses` 流式通常会在 `response.completed` 事件里携带 usage,
|
||
也可能直接挂在顶层 `usage` 字段。这里统一做兼容提取,供统计与日志复用。
|
||
"""
|
||
if not isinstance(event_data, dict):
|
||
return None
|
||
usage = event_data.get('usage')
|
||
if isinstance(usage, dict):
|
||
return usage
|
||
response_obj = event_data.get('response')
|
||
if isinstance(response_obj, dict):
|
||
nested_usage = response_obj.get('usage')
|
||
if isinstance(nested_usage, dict):
|
||
return nested_usage
|
||
return None
|
||
|
||
|
||
def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||
"""处理走 Gemini Contents 后端的 Responses 请求。"""
|
||
gemini_payload = cc_to_gemini_request(cc_payload)
|
||
_dbg(
|
||
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
|
||
+ f' 内容数={len(gemini_payload.get("contents", []))}'
|
||
)
|
||
|
||
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
|
||
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
|
||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||
|
||
if ctx.is_stream:
|
||
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
|
||
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
|
||
|
||
|
||
def _handle_gemini_non_stream(
|
||
ctx: RouteContext,
|
||
payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理 Gemini 后端的非流式 Responses 返回。"""
|
||
attach_upstream_request(turn, payload, headers)
|
||
resp, err = forward_request(url, headers, payload)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||
finalize_turn(turn)
|
||
return err
|
||
|
||
raw = resp.json()
|
||
attach_upstream_response(turn, raw)
|
||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||
|
||
cc_data = gemini_to_cc_response(raw)
|
||
response_data = cc_to_responses(cc_data, ctx.client_model)
|
||
return _finalize_responses_response(
|
||
response_data,
|
||
client_model=ctx.client_model,
|
||
turn=turn,
|
||
debug_label='Gemini 转回 Responses 后',
|
||
)
|
||
|
||
|
||
def _handle_gemini_stream(
|
||
ctx: RouteContext,
|
||
payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理 Gemini 后端的流式 Responses 返回。"""
|
||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||
gemini_converter = GeminiStreamConverter()
|
||
|
||
def generate():
|
||
yield from converter.start_events()
|
||
|
||
attach_upstream_request(turn, payload, headers)
|
||
resp, err = forward_request(url, headers, payload, stream=True)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||
set_stream_summary(turn, {'status': 'error'})
|
||
finalize_turn(turn)
|
||
yield responses_error_event(str(err))
|
||
return
|
||
|
||
chunk_count = 0
|
||
client_events: list[str] = []
|
||
last_usage: dict[str, Any] | None = None
|
||
for gemini_chunk in iter_gemini_sse(resp):
|
||
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
|
||
usage_meta = gemini_chunk.get('usageMetadata') if isinstance(gemini_chunk, dict) else None
|
||
if isinstance(usage_meta, dict):
|
||
last_usage = {
|
||
'input_tokens': usage_meta.get('promptTokenCount', 0),
|
||
'output_tokens': usage_meta.get('candidatesTokenCount', 0),
|
||
'total_tokens': usage_meta.get('totalTokenCount', 0),
|
||
}
|
||
if chunk_count < 10:
|
||
_dbg(
|
||
f'上游 Gemini 片段#{chunk_count}='
|
||
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
|
||
)
|
||
|
||
for cc_chunk in gemini_converter.process_chunk(gemini_chunk):
|
||
for evt in converter.process_cc_chunk(cc_chunk):
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
yield evt
|
||
|
||
chunk_count += 1
|
||
|
||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||
finalized_events = converter.finalize()
|
||
for evt in finalized_events:
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
yield evt
|
||
usage_tracker.record(
|
||
ctx.client_model,
|
||
last_usage,
|
||
input_key='input_tokens',
|
||
output_key='output_tokens',
|
||
)
|
||
set_stream_summary(turn, {
|
||
'chunk_count': chunk_count,
|
||
'client_event_count': len(client_events),
|
||
'usage': last_usage,
|
||
})
|
||
attach_client_response(turn, {
|
||
'type': 'responses.stream.summary',
|
||
'model': ctx.client_model,
|
||
'event_count': len(client_events),
|
||
'usage': last_usage,
|
||
})
|
||
finalize_turn(turn, usage=last_usage)
|
||
|
||
return sse_response(generate())
|
||
|
||
|
||
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||
"""处理走 Anthropic 后端的 Responses 请求。"""
|
||
anthropic_payload = cc_to_messages_request(cc_payload)
|
||
_dbg(
|
||
'已转换为 Messages 请求:字段=' + str(list(anthropic_payload.keys()))
|
||
+ f' 消息数={len(anthropic_payload.get("messages", []))}'
|
||
)
|
||
|
||
url, headers = build_anthropic_target(ctx)
|
||
anthropic_payload = apply_body_modifications(anthropic_payload, ctx.body_modifications)
|
||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||
|
||
if ctx.is_stream:
|
||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
|
||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
|
||
|
||
|
||
def _handle_anthropic_non_stream(
|
||
ctx: RouteContext,
|
||
anthropic_payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理 Anthropic 后端的非流式 Responses 返回。"""
|
||
anthropic_payload['stream'] = False
|
||
attach_upstream_request(turn, anthropic_payload, headers)
|
||
resp, err = forward_request(url, headers, anthropic_payload)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||
finalize_turn(turn)
|
||
return err
|
||
|
||
raw = resp.json()
|
||
attach_upstream_response(turn, raw)
|
||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||
|
||
cc_data = messages_to_cc_response(raw)
|
||
response_data = cc_to_responses(cc_data, ctx.client_model)
|
||
return _finalize_responses_response(
|
||
response_data,
|
||
client_model=ctx.client_model,
|
||
turn=turn,
|
||
debug_label='Messages 转回 Responses 后',
|
||
)
|
||
|
||
|
||
def _handle_anthropic_stream(
|
||
ctx: RouteContext,
|
||
anthropic_payload: dict[str, Any],
|
||
url: str,
|
||
headers: dict[str, str],
|
||
turn: dict[str, Any] | None,
|
||
):
|
||
"""处理 Anthropic 后端的流式 Responses 返回。
|
||
|
||
这里直接将 Anthropic SSE 事件映射到 Responses SSE,故意跳过 CC 流式中间态,
|
||
这样可以减少一次事件重组,降低流式转换复杂度,也更容易保留原始时序。
|
||
"""
|
||
anthropic_payload['stream'] = True
|
||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||
|
||
def generate():
|
||
"""消费 Anthropic SSE,并直接映射为 Responses 事件序列。"""
|
||
yield from converter.start_events()
|
||
|
||
attach_upstream_request(turn, anthropic_payload, headers)
|
||
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
|
||
if err:
|
||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||
set_stream_summary(turn, {'status': 'error'})
|
||
finalize_turn(turn)
|
||
yield responses_error_event(str(err))
|
||
return
|
||
|
||
event_count = 0
|
||
client_events: list[str] = []
|
||
for event_type, event_data in iter_anthropic_sse(resp):
|
||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||
if event_count < 10:
|
||
_dbg(
|
||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||
)
|
||
|
||
produced = converter.process_anthropic_event(event_type, event_data)
|
||
for evt in produced:
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
yield evt
|
||
event_count += 1
|
||
|
||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||
finalized_events = converter.finalize()
|
||
for evt in finalized_events:
|
||
client_events.append(evt)
|
||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||
yield evt
|
||
usage_tracker.record(ctx.client_model)
|
||
set_stream_summary(turn, {
|
||
'event_count': event_count,
|
||
'client_event_count': len(client_events),
|
||
})
|
||
attach_client_response(turn, {
|
||
'type': 'responses.stream.summary',
|
||
'model': ctx.client_model,
|
||
'event_count': len(client_events),
|
||
})
|
||
finalize_turn(turn)
|
||
|
||
return sse_response(generate())
|
||
|
||
|
||
def _finalize_responses_response(
|
||
response_data: dict[str, Any],
|
||
*,
|
||
client_model: str,
|
||
turn: dict[str, Any],
|
||
debug_label: str,
|
||
):
|
||
"""统一收尾非流式 Responses 响应。
|
||
|
||
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象,因此这里集中
|
||
处理调试日志、回填展示模型名以及 usage 日志。
|
||
"""
|
||
response_data['model'] = response_data.get('model') or ''
|
||
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
|
||
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
|
||
|
||
usage_tracker.record(
|
||
client_model,
|
||
response_data.get('usage'),
|
||
input_key='input_tokens',
|
||
output_key='output_tokens',
|
||
)
|
||
|
||
attach_client_response(turn, response_data)
|
||
finalize_turn(turn, usage=response_data.get('usage'))
|
||
|
||
output_items = response_data.get('output', [])
|
||
if isinstance(output_items, list):
|
||
for item in output_items:
|
||
if not isinstance(item, dict) or item.get('type') != 'reasoning':
|
||
continue
|
||
summary = item.get('summary', [])
|
||
if not isinstance(summary, list):
|
||
continue
|
||
reasoning_text = ''.join(
|
||
part.get('text', '')
|
||
for part in summary
|
||
if isinstance(part, dict) and part.get('type') == 'summary_text'
|
||
)
|
||
if reasoning_text:
|
||
cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', [])
|
||
thinking_cache.store_from_response(cc_messages, reasoning_text)
|
||
break
|
||
|
||
return jsonify(response_data)
|