支持gimini格式,优化debug日志
This commit is contained in:
parent
e726f11bad
commit
4de6db13f9
16 changed files with 1783 additions and 55 deletions
|
|
@ -188,6 +188,19 @@ def delete_mapping(name):
|
|||
return jsonify({'ok': True})
|
||||
|
||||
|
||||
# ─── 用量统计 ─────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route('/api/admin/stats', methods=['GET'])
|
||||
def get_stats():
|
||||
"""返回运行时用量统计数据。"""
|
||||
err = _check_auth()
|
||||
if err:
|
||||
return err
|
||||
from utils.usage_tracker import usage_tracker
|
||||
return jsonify(usage_tracker.get_stats())
|
||||
|
||||
|
||||
# ─── 内部辅助 ─────────────────────────────────────
|
||||
|
||||
|
||||
|
|
|
|||
288
routes/chat.py
288
routes/chat.py
|
|
@ -18,6 +18,11 @@ from adapters.cc_anthropic_adapter import (
|
|||
cc_to_messages_request,
|
||||
messages_to_cc_response,
|
||||
)
|
||||
from adapters.cc_gemini_adapter import (
|
||||
GeminiStreamConverter,
|
||||
cc_to_gemini_request,
|
||||
gemini_to_cc_response,
|
||||
)
|
||||
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
|
||||
from adapters.responses_cc_adapter import (
|
||||
ResponsesToCCStreamConverter,
|
||||
|
|
@ -31,6 +36,7 @@ from routes.common import (
|
|||
apply_body_modifications,
|
||||
apply_header_modifications,
|
||||
build_anthropic_target,
|
||||
build_gemini_target,
|
||||
build_openai_target,
|
||||
build_responses_target,
|
||||
build_route_context,
|
||||
|
|
@ -44,12 +50,27 @@ from routes.common import (
|
|||
)
|
||||
from utils.http import (
|
||||
forward_request,
|
||||
gen_id,
|
||||
iter_anthropic_sse,
|
||||
iter_gemini_sse,
|
||||
iter_openai_sse,
|
||||
iter_responses_sse,
|
||||
sse_response,
|
||||
)
|
||||
from utils.request_logger import (
|
||||
append_client_event,
|
||||
append_upstream_event,
|
||||
attach_client_response,
|
||||
attach_error,
|
||||
attach_upstream_request,
|
||||
attach_upstream_response,
|
||||
finalize_turn,
|
||||
set_stream_summary,
|
||||
start_turn,
|
||||
)
|
||||
from utils.think_tag import ThinkTagExtractor
|
||||
from utils.thinking_cache import thinking_cache
|
||||
from utils.usage_tracker import usage_tracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -65,21 +86,36 @@ def _dbg(message: str) -> None:
|
|||
@bp.route('/v1/chat/completions', methods=['POST'])
|
||||
def chat_completions():
|
||||
"""处理聊天补全请求并按模型映射分发到不同后端。"""
|
||||
payload = request.get_json(force=True)
|
||||
payload, message_count = _normalize_chat_payload(payload)
|
||||
original_payload = request.get_json(force=True)
|
||||
payload, message_count = _normalize_chat_payload(json.loads(json.dumps(original_payload, ensure_ascii=False, default=str)))
|
||||
|
||||
client_model = payload.get('model', 'unknown')
|
||||
is_stream = payload.get('stream', False)
|
||||
ctx = build_route_context(client_model, is_stream)
|
||||
turn = start_turn(
|
||||
route='chat',
|
||||
client_model=client_model,
|
||||
backend=ctx.backend,
|
||||
stream=is_stream,
|
||||
client_request=original_payload,
|
||||
request_headers=dict(request.headers),
|
||||
target_url=ctx.target_url,
|
||||
upstream_model=ctx.upstream_model,
|
||||
metadata={'message_count': message_count},
|
||||
)
|
||||
|
||||
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
||||
_log_messages(payload)
|
||||
|
||||
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
|
||||
|
||||
if ctx.backend == 'openai':
|
||||
return _handle_openai_backend(ctx, payload)
|
||||
return _handle_openai_backend(ctx, payload, turn)
|
||||
if ctx.backend == 'responses':
|
||||
return _handle_responses_backend(ctx, payload)
|
||||
return _handle_anthropic_backend(ctx, payload)
|
||||
return _handle_responses_backend(ctx, payload, turn)
|
||||
if ctx.backend == 'gemini':
|
||||
return _handle_gemini_backend(ctx, payload, turn)
|
||||
return _handle_anthropic_backend(ctx, payload, turn)
|
||||
|
||||
|
||||
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
|
||||
|
|
@ -100,7 +136,7 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in
|
|||
return payload, message_count
|
||||
|
||||
|
||||
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]):
|
||||
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
|
||||
_dbg(
|
||||
'原始请求字段=' + str(list(payload.keys())) + ' '
|
||||
|
|
@ -124,8 +160,8 @@ def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_openai_stream(ctx, payload, url, headers)
|
||||
return _handle_openai_non_stream(ctx, payload, url, headers)
|
||||
return _handle_openai_stream(ctx, payload, url, headers, turn)
|
||||
return _handle_openai_non_stream(ctx, payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_openai_non_stream(
|
||||
|
|
@ -133,18 +169,23 @@ def _handle_openai_non_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = fix_response(raw)
|
||||
return _finalize_chat_response(ctx, data, debug_label='修复后响应')
|
||||
return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应')
|
||||
|
||||
|
||||
def _handle_openai_stream(
|
||||
|
|
@ -152,29 +193,56 @@ def _handle_openai_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
|
||||
def generate():
|
||||
"""消费上游 OpenAI SSE,并逐段产出给 Cursor 的聊天补全流。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
think_extractor = ThinkTagExtractor()
|
||||
chunk_count = 0
|
||||
last_usage = None
|
||||
client_chunks: list[dict[str, Any]] = []
|
||||
|
||||
for chunk in iter_openai_sse(resp):
|
||||
if chunk is None:
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
close_chunk = think_extractor.finalize()
|
||||
if close_chunk:
|
||||
client_chunks.append(close_chunk)
|
||||
append_client_event(turn, {'type': 'chat_chunk', 'data': close_chunk})
|
||||
yield sse_data_message(close_chunk)
|
||||
append_client_event(turn, {'type': 'done'})
|
||||
yield sse_data_message('[DONE]')
|
||||
usage_tracker.record(ctx.client_model, last_usage)
|
||||
set_stream_summary(turn, {
|
||||
'chunk_count': chunk_count,
|
||||
'client_chunk_count': len(client_chunks),
|
||||
'usage': last_usage,
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'chat.completion.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'chunks': client_chunks,
|
||||
'usage': last_usage,
|
||||
})
|
||||
finalize_turn(turn, usage=last_usage)
|
||||
return
|
||||
|
||||
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
||||
if chunk.get('usage'):
|
||||
last_usage = chunk['usage']
|
||||
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游原始片段#{chunk_count}='
|
||||
|
|
@ -185,6 +253,8 @@ def _handle_openai_stream(
|
|||
chunk['model'] = ctx.client_model
|
||||
|
||||
for out in think_extractor.process_chunk(chunk):
|
||||
client_chunks.append(out)
|
||||
append_client_event(turn, {'type': 'chat_chunk', 'data': out})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'返回片段#{chunk_count}='
|
||||
|
|
@ -194,10 +264,25 @@ def _handle_openai_stream(
|
|||
|
||||
chunk_count += 1
|
||||
|
||||
usage_tracker.record(ctx.client_model, last_usage)
|
||||
set_stream_summary(turn, {
|
||||
'chunk_count': chunk_count,
|
||||
'client_chunk_count': len(client_chunks),
|
||||
'usage': last_usage,
|
||||
'ended_without_done': True,
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'chat.completion.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'chunks': client_chunks,
|
||||
'usage': last_usage,
|
||||
})
|
||||
finalize_turn(turn, usage=last_usage)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走原生 Responses 后端的聊天补全请求。
|
||||
|
||||
当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求,
|
||||
|
|
@ -216,8 +301,8 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_responses_stream(ctx, responses_payload, url, headers)
|
||||
return _handle_responses_non_stream(ctx, responses_payload, url, headers)
|
||||
return _handle_responses_stream(ctx, responses_payload, url, headers, turn)
|
||||
return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_responses_non_stream(
|
||||
|
|
@ -225,18 +310,23 @@ def _handle_responses_non_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理原生 Responses 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = responses_to_cc_response(raw, ctx.client_model)
|
||||
return _finalize_chat_response(ctx, data, debug_label='Responses 转回聊天补全后')
|
||||
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后')
|
||||
|
||||
|
||||
def _handle_responses_stream(
|
||||
|
|
@ -244,6 +334,7 @@ def _handle_responses_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理原生 Responses 后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
|
|
@ -251,13 +342,19 @@ def _handle_responses_stream(
|
|||
|
||||
def generate():
|
||||
"""消费上游 Responses 事件,并实时转换成聊天补全 chunk。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
client_chunks: list[Any] = []
|
||||
for event_type, event_data in iter_responses_sse(resp):
|
||||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
|
|
@ -265,6 +362,8 @@ def _handle_responses_stream(
|
|||
)
|
||||
|
||||
for chunk in converter.process_event(event_type, event_data):
|
||||
client_chunks.append(chunk)
|
||||
append_client_event(turn, {'type': 'chat_chunk', 'data': chunk})
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'返回片段#{event_count}='
|
||||
|
|
@ -275,12 +374,126 @@ def _handle_responses_stream(
|
|||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
append_client_event(turn, {'type': 'done'})
|
||||
yield sse_data_message('[DONE]')
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'event_count': event_count,
|
||||
'client_chunk_count': len(client_chunks),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'chat.completion.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'chunks': client_chunks,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走 Gemini Contents 后端的聊天补全请求。"""
|
||||
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
|
||||
gemini_payload = cc_to_gemini_request(payload)
|
||||
_dbg(
|
||||
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
|
||||
+ f' 内容数={len(gemini_payload.get("contents", []))}'
|
||||
)
|
||||
|
||||
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
|
||||
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
|
||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
|
||||
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_gemini_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Gemini 后端的非流式返回。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = gemini_to_cc_response(raw)
|
||||
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后')
|
||||
|
||||
|
||||
def _handle_gemini_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Gemini 后端的流式返回。"""
|
||||
converter = GeminiStreamConverter()
|
||||
|
||||
def generate():
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
chunk_count = 0
|
||||
client_chunks: list[Any] = []
|
||||
for gemini_chunk in iter_gemini_sse(resp):
|
||||
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游 Gemini 片段#{chunk_count}='
|
||||
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
for cc_chunk in converter.process_chunk(gemini_chunk):
|
||||
cc_chunk['model'] = ctx.client_model
|
||||
client_chunks.append(cc_chunk)
|
||||
append_client_event(turn, {'type': 'chat_chunk', 'data': cc_chunk})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'返回片段#{chunk_count}='
|
||||
+ json.dumps(cc_chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield sse_data_message(cc_chunk)
|
||||
|
||||
chunk_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
append_client_event(turn, {'type': 'done'})
|
||||
yield sse_data_message('[DONE]')
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'chunk_count': chunk_count,
|
||||
'client_chunk_count': len(client_chunks),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'chat.completion.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'chunks': client_chunks,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
|
||||
payload['model'] = ctx.upstream_model
|
||||
anthropic_payload = cc_to_messages_request(payload)
|
||||
|
|
@ -295,8 +508,8 @@ def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_anthropic_non_stream(
|
||||
|
|
@ -304,18 +517,23 @@ def _handle_anthropic_non_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Anthropic 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
data = messages_to_cc_response(raw)
|
||||
return _finalize_chat_response(ctx, data, debug_label='Messages 转回聊天补全后')
|
||||
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后')
|
||||
|
||||
|
||||
def _handle_anthropic_stream(
|
||||
|
|
@ -323,6 +541,7 @@ def _handle_anthropic_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Anthropic 后端的流式返回。
|
||||
|
||||
|
|
@ -334,13 +553,19 @@ def _handle_anthropic_stream(
|
|||
|
||||
def generate():
|
||||
"""消费上游 Anthropic 事件流,并逐步映射为聊天补全 SSE。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield chat_error_chunk(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
client_chunks: list[Any] = []
|
||||
for event_type, event_data in iter_anthropic_sse(resp):
|
||||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
|
|
@ -355,6 +580,8 @@ def _handle_anthropic_stream(
|
|||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
client_chunks.append(chunk_str)
|
||||
append_client_event(turn, {'type': 'chat_chunk', 'data': chunk_str})
|
||||
if event_count < 10:
|
||||
_dbg(f'返回片段#{event_count}={chunk_str[:500]}')
|
||||
yield sse_data_message(chunk_str)
|
||||
|
|
@ -362,7 +589,19 @@ def _handle_anthropic_stream(
|
|||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
append_client_event(turn, {'type': 'done'})
|
||||
yield sse_data_message('[DONE]')
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'event_count': event_count,
|
||||
'client_chunk_count': len(client_chunks),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'chat.completion.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'chunks': client_chunks,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
|
@ -371,6 +610,7 @@ def _finalize_chat_response(
|
|||
ctx: RouteContext,
|
||||
data: dict[str, Any],
|
||||
*,
|
||||
turn: dict[str, Any] | None,
|
||||
debug_label: str,
|
||||
):
|
||||
"""统一收尾非流式聊天补全响应。
|
||||
|
|
@ -383,6 +623,20 @@ def _finalize_chat_response(
|
|||
data['model'] = ctx.client_model
|
||||
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
|
||||
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
|
||||
|
||||
usage_tracker.record(ctx.client_model, data.get('usage'))
|
||||
attach_client_response(turn, data)
|
||||
finalize_turn(turn, usage=data.get('usage'))
|
||||
|
||||
for choice in data.get('choices', []):
|
||||
msg = choice.get('message', {})
|
||||
if msg.get('reasoning_content'):
|
||||
thinking_cache.store_from_response(
|
||||
request.get_json(silent=True, force=True).get('messages', []),
|
||||
msg['reasoning_content'],
|
||||
)
|
||||
break
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import logging
|
|||
from typing import Any
|
||||
|
||||
import settings
|
||||
from utils.http import build_anthropic_headers, build_openai_headers
|
||||
from utils.http import build_anthropic_headers, build_gemini_headers, build_openai_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -76,6 +76,22 @@ def build_anthropic_target(ctx: RouteContext) -> tuple[str, dict[str, str]]:
|
|||
return url, headers
|
||||
|
||||
|
||||
def build_gemini_target(ctx: RouteContext, stream: bool = False) -> tuple[str, dict[str, str]]:
|
||||
"""根据路由上下文生成 Gemini 后端的地址和请求头。
|
||||
|
||||
Gemini URL 格式: {base}/v1/models/{model}:generateContent
|
||||
流式: {base}/v1/models/{model}:streamGenerateContent?alt=sse
|
||||
"""
|
||||
base = ctx.target_url.rstrip('/')
|
||||
model = ctx.upstream_model
|
||||
if stream:
|
||||
url = f'{base}/v1/models/{model}:streamGenerateContent?alt=sse'
|
||||
else:
|
||||
url = f'{base}/v1/models/{model}:generateContent'
|
||||
headers = build_gemini_headers(ctx.api_key)
|
||||
return url, headers
|
||||
|
||||
|
||||
def log_route_context(route_name: str, ctx: RouteContext, *, extra: str = '') -> None:
|
||||
"""统一输出路由级日志,避免不同入口的日志格式逐渐漂移。"""
|
||||
parts = [
|
||||
|
|
|
|||
|
|
@ -15,6 +15,17 @@ import settings
|
|||
from config import Config
|
||||
from routes.common import apply_body_modifications, apply_header_modifications, inject_instructions_anthropic
|
||||
from utils.http import build_anthropic_headers, forward_request, sse_response
|
||||
from utils.request_logger import (
|
||||
append_client_event,
|
||||
append_upstream_event,
|
||||
attach_client_response,
|
||||
attach_error,
|
||||
attach_upstream_request,
|
||||
attach_upstream_response,
|
||||
finalize_turn,
|
||||
set_stream_summary,
|
||||
start_turn,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -24,7 +35,8 @@ bp = Blueprint('messages', __name__)
|
|||
@bp.route('/v1/messages', methods=['POST'])
|
||||
def messages_passthrough():
|
||||
"""透传 Anthropic Messages 请求,并在必要时补齐 thinking 兼容层。"""
|
||||
payload = request.get_json(force=True)
|
||||
original_payload = request.get_json(force=True)
|
||||
payload = json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))
|
||||
model = payload.get('model', 'unknown')
|
||||
is_stream = payload.get('stream', False)
|
||||
|
||||
|
|
@ -41,20 +53,37 @@ def messages_passthrough():
|
|||
headers = apply_header_modifications(headers, header_mods)
|
||||
url = f'{url_base.rstrip("/")}/v1/messages'
|
||||
|
||||
turn = start_turn(
|
||||
route='messages',
|
||||
client_model=model,
|
||||
backend='anthropic',
|
||||
stream=is_stream,
|
||||
client_request=original_payload,
|
||||
request_headers=dict(request.headers),
|
||||
target_url=url_base,
|
||||
upstream_model=model,
|
||||
)
|
||||
|
||||
payload = inject_instructions_anthropic(payload, custom_instructions, instructions_position)
|
||||
payload = apply_body_modifications(payload, body_mods)
|
||||
|
||||
if not is_stream:
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
data = resp.json()
|
||||
attach_upstream_response(turn, data)
|
||||
_inject_thinking(data)
|
||||
attach_client_response(turn, data)
|
||||
finalize_turn(turn)
|
||||
return jsonify(data)
|
||||
|
||||
# 流式透传
|
||||
def generate():
|
||||
"""建立上游流式连接并逐段回传处理后的 SSE 数据。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
try:
|
||||
resp = req_lib.post(
|
||||
url, headers=headers, json=payload,
|
||||
|
|
@ -63,12 +92,28 @@ def messages_passthrough():
|
|||
if resp.status_code != 200:
|
||||
body = resp.content.decode('utf-8', errors='replace')
|
||||
logger.warning(f'上游返回 {resp.status_code}: {body[:300]}')
|
||||
attach_error(turn, {'stage': 'upstream_status', 'status_code': resp.status_code, 'message': body})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield f'data: {json.dumps({"error": {"message": body, "type": "upstream_error"}})}\n\n'
|
||||
return
|
||||
|
||||
yield from _process_stream(resp)
|
||||
summary = {'upstream_event_count': 0, 'client_event_count': 0}
|
||||
client_events = []
|
||||
for out in _process_stream(resp, turn=turn, summary=summary):
|
||||
client_events.append(out)
|
||||
yield out
|
||||
set_stream_summary(turn, summary)
|
||||
attach_client_response(turn, {
|
||||
'type': 'messages.stream.summary',
|
||||
'events': client_events,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
except req_lib.RequestException as e:
|
||||
logger.error(f'请求上游失败: {e}')
|
||||
attach_error(turn, {'stage': 'request_exception', 'message': str(e)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield f'data: {json.dumps({"error": {"message": str(e), "type": "proxy_error"}})}\n\n'
|
||||
|
||||
return sse_response(generate())
|
||||
|
|
@ -96,7 +141,7 @@ def _inject_thinking(data):
|
|||
logger.info(f'已注入 thinking block ({len(rc)} 字符)')
|
||||
|
||||
|
||||
def _process_stream(resp):
|
||||
def _process_stream(resp, *, turn=None, summary: dict[str, int] | None = None):
|
||||
"""处理 /v1/messages 流式响应,检测并注入 thinking 事件
|
||||
|
||||
追踪上游 content block 的 index,在注入 thinking blocks 时使用独立的 index,
|
||||
|
|
@ -105,11 +150,15 @@ def _process_stream(resp):
|
|||
reasoning_buf = ''
|
||||
injected = False
|
||||
index_offset = 0
|
||||
if summary is None:
|
||||
summary = {'upstream_event_count': 0, 'client_event_count': 0}
|
||||
|
||||
for line in resp.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode('utf-8', errors='replace')
|
||||
append_upstream_event(turn, {'raw': decoded})
|
||||
summary['upstream_event_count'] += 1
|
||||
|
||||
if not decoded.startswith('data:'):
|
||||
yield decoded + '\n\n'
|
||||
|
|
@ -140,7 +189,10 @@ def _process_stream(resp):
|
|||
if reasoning_buf and not injected:
|
||||
if event_data.get('delta', {}).get('type') == 'text_delta':
|
||||
injected = True
|
||||
yield from _emit_thinking_blocks(reasoning_buf)
|
||||
for injected_event in _emit_thinking_blocks(reasoning_buf):
|
||||
append_client_event(turn, {'raw': injected_event})
|
||||
summary['client_event_count'] += 1
|
||||
yield injected_event
|
||||
index_offset = 1
|
||||
reasoning_buf = ''
|
||||
|
||||
|
|
@ -148,7 +200,10 @@ def _process_stream(resp):
|
|||
event_data['index'] = event_data['index'] + index_offset
|
||||
modified = True
|
||||
|
||||
yield f'data: {json.dumps(event_data)}\n\n' if modified else decoded + '\n\n'
|
||||
output = f'data: {json.dumps(event_data)}\n\n' if modified else decoded + '\n\n'
|
||||
append_client_event(turn, {'raw': output})
|
||||
summary['client_event_count'] += 1
|
||||
yield output
|
||||
|
||||
|
||||
def _emit_thinking_blocks(text):
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from typing import Any
|
|||
from flask import Blueprint, jsonify, request
|
||||
|
||||
from adapters.cc_anthropic_adapter import cc_to_messages_request, messages_to_cc_response
|
||||
from adapters.cc_gemini_adapter import GeminiStreamConverter, cc_to_gemini_request, gemini_to_cc_response
|
||||
from adapters.openai_compat_fixer import fix_response, fix_stream_chunk, normalize_request
|
||||
from adapters.responses_cc_adapter import ResponsesStreamConverter, cc_to_responses, responses_to_cc
|
||||
from config import Config
|
||||
|
|
@ -21,6 +22,7 @@ from routes.common import (
|
|||
apply_body_modifications,
|
||||
apply_header_modifications,
|
||||
build_anthropic_target,
|
||||
build_gemini_target,
|
||||
build_openai_target,
|
||||
build_responses_target,
|
||||
build_route_context,
|
||||
|
|
@ -33,12 +35,27 @@ from routes.common import (
|
|||
)
|
||||
from utils.http import (
|
||||
forward_request,
|
||||
gen_id,
|
||||
iter_anthropic_sse,
|
||||
iter_gemini_sse,
|
||||
iter_openai_sse,
|
||||
iter_responses_sse,
|
||||
sse_response,
|
||||
)
|
||||
from utils.request_logger import (
|
||||
append_client_event,
|
||||
append_upstream_event,
|
||||
attach_client_response,
|
||||
attach_error,
|
||||
attach_upstream_request,
|
||||
attach_upstream_response,
|
||||
finalize_turn,
|
||||
set_stream_summary,
|
||||
start_turn,
|
||||
)
|
||||
from utils.think_tag import ThinkTagExtractor
|
||||
from utils.thinking_cache import thinking_cache
|
||||
from utils.usage_tracker import usage_tracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -54,20 +71,33 @@ def _dbg(message: str) -> None:
|
|||
@bp.route('/v1/responses', methods=['POST'])
|
||||
def responses_endpoint():
|
||||
"""处理 Responses 请求并按模型映射分发。"""
|
||||
payload = request.get_json(force=True)
|
||||
original_payload = request.get_json(force=True)
|
||||
payload = json.loads(json.dumps(original_payload, ensure_ascii=False, default=str))
|
||||
client_model = payload.get('model', 'unknown')
|
||||
is_stream = payload.get('stream', False)
|
||||
|
||||
ctx = build_route_context(client_model, is_stream)
|
||||
turn = start_turn(
|
||||
route='responses',
|
||||
client_model=client_model,
|
||||
backend=ctx.backend,
|
||||
stream=is_stream,
|
||||
client_request=original_payload,
|
||||
request_headers=dict(request.headers),
|
||||
target_url=ctx.target_url,
|
||||
upstream_model=ctx.upstream_model,
|
||||
)
|
||||
log_route_context('响应生成', ctx)
|
||||
|
||||
cc_payload = _build_cc_payload(payload, ctx)
|
||||
|
||||
if ctx.backend == 'openai':
|
||||
return _handle_openai_backend(ctx, cc_payload)
|
||||
return _handle_openai_backend(ctx, cc_payload, turn)
|
||||
if ctx.backend == 'responses':
|
||||
return _handle_responses_backend(ctx, payload)
|
||||
return _handle_anthropic_backend(ctx, cc_payload)
|
||||
return _handle_responses_backend(ctx, payload, turn)
|
||||
if ctx.backend == 'gemini':
|
||||
return _handle_gemini_backend(ctx, cc_payload, turn)
|
||||
return _handle_anthropic_backend(ctx, cc_payload, turn)
|
||||
|
||||
|
||||
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
|
||||
|
|
@ -78,6 +108,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A
|
|||
"""
|
||||
cc_payload = responses_to_cc(payload)
|
||||
cc_payload['model'] = ctx.upstream_model
|
||||
cc_payload['messages'] = thinking_cache.inject(cc_payload.get('messages', []))
|
||||
cc_payload = inject_instructions_cc(cc_payload, ctx.custom_instructions, ctx.instructions_position)
|
||||
_dbg(
|
||||
'已转换为聊天补全中间表示:字段=' + str(list(cc_payload.keys()))
|
||||
|
|
@ -86,7 +117,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A
|
|||
return cc_payload
|
||||
|
||||
|
||||
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
||||
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]):
|
||||
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
|
||||
cc_payload = normalize_request(cc_payload)
|
||||
_dbg(
|
||||
|
|
@ -99,8 +130,8 @@ def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_openai_stream(ctx, cc_payload, url, headers)
|
||||
return _handle_openai_non_stream(ctx, cc_payload, url, headers)
|
||||
return _handle_openai_stream(ctx, cc_payload, url, headers, turn)
|
||||
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_openai_non_stream(
|
||||
|
|
@ -108,19 +139,24 @@ def _handle_openai_non_stream(
|
|||
cc_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any],
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
|
||||
cc_payload['stream'] = False
|
||||
attach_upstream_request(turn, cc_payload, headers)
|
||||
resp, err = forward_request(url, headers, cc_payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
fixed = fix_response(raw)
|
||||
response_data = cc_to_responses(fixed, ctx.client_model)
|
||||
return _finalize_responses_response(response_data, debug_label='转换为 Responses 后')
|
||||
return _finalize_responses_response(response_data, turn=turn, debug_label='转换为 Responses 后')
|
||||
|
||||
|
||||
def _handle_openai_stream(
|
||||
|
|
@ -128,6 +164,7 @@ def _handle_openai_stream(
|
|||
cc_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
|
||||
cc_payload['stream'] = True
|
||||
|
|
@ -137,20 +174,41 @@ def _handle_openai_stream(
|
|||
"""消费 OpenAI 聊天补全流,并实时改写为 Responses SSE。"""
|
||||
yield from converter.start_events()
|
||||
|
||||
attach_upstream_request(turn, cc_payload, headers)
|
||||
resp, err = forward_request(url, headers, cc_payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
think_extractor = ThinkTagExtractor()
|
||||
chunk_count = 0
|
||||
client_events: list[str] = []
|
||||
|
||||
for chunk in iter_openai_sse(resp):
|
||||
if chunk is None:
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
yield from converter.finalize()
|
||||
finalized_events = converter.finalize()
|
||||
for item in finalized_events:
|
||||
client_events.append(item)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': item})
|
||||
yield item
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'chunk_count': chunk_count,
|
||||
'client_event_count': len(client_events),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'responses.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'events': client_events,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
return
|
||||
|
||||
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游原始片段#{chunk_count}='
|
||||
|
|
@ -159,19 +217,22 @@ def _handle_openai_stream(
|
|||
|
||||
chunk = fix_stream_chunk(chunk)
|
||||
for out in think_extractor.process_chunk(chunk):
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'转换后片段#{chunk_count}='
|
||||
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield from converter.process_cc_chunk(out)
|
||||
for evt in converter.process_cc_chunk(out):
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'转换后片段#{chunk_count}='
|
||||
+ json.dumps(out, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield evt
|
||||
|
||||
chunk_count += 1
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
||||
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走原生 Responses 后端的请求。
|
||||
|
||||
当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议,
|
||||
|
|
@ -185,8 +246,8 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_responses_stream(ctx, payload, url, headers)
|
||||
return _handle_responses_non_stream(ctx, payload, url, headers)
|
||||
return _handle_responses_stream(ctx, payload, url, headers, turn)
|
||||
return _handle_responses_non_stream(ctx, payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_responses_non_stream(
|
||||
|
|
@ -194,16 +255,21 @@ def _handle_responses_non_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理原生 Responses 后端的非流式返回。"""
|
||||
payload['stream'] = False
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
response_data = resp.json()
|
||||
attach_upstream_response(turn, response_data)
|
||||
response_data['model'] = ctx.client_model
|
||||
return _finalize_responses_response(response_data, debug_label='原生 Responses 返回后')
|
||||
return _finalize_responses_response(response_data, turn=turn, debug_label='原生 Responses 返回后')
|
||||
|
||||
|
||||
def _handle_responses_stream(
|
||||
|
|
@ -211,6 +277,7 @@ def _handle_responses_stream(
|
|||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理原生 Responses 后端的流式返回。"""
|
||||
payload['stream'] = True
|
||||
|
|
@ -218,27 +285,151 @@ def _handle_responses_stream(
|
|||
|
||||
def generate():
|
||||
"""透传上游原生 Responses 流,并做轻量模型名改写。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
client_events: list[str] = []
|
||||
for event_type, event_data in iter_responses_sse(resp):
|
||||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
yield from converter.process_responses_event(event_type, event_data)
|
||||
produced = converter.process_responses_event(event_type, event_data)
|
||||
for evt in produced:
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
yield evt
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'event_count': event_count,
|
||||
'client_event_count': len(client_events),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'responses.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'events': client_events,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
||||
def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走 Gemini Contents 后端的 Responses 请求。"""
|
||||
gemini_payload = cc_to_gemini_request(cc_payload)
|
||||
_dbg(
|
||||
'已转换为 Gemini 请求:字段=' + str(list(gemini_payload.keys()))
|
||||
+ f' 内容数={len(gemini_payload.get("contents", []))}'
|
||||
)
|
||||
|
||||
url, headers = build_gemini_target(ctx, stream=ctx.is_stream)
|
||||
gemini_payload = apply_body_modifications(gemini_payload, ctx.body_modifications)
|
||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
|
||||
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_gemini_non_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Gemini 后端的非流式 Responses 返回。"""
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
cc_data = gemini_to_cc_response(raw)
|
||||
response_data = cc_to_responses(cc_data, ctx.client_model)
|
||||
return _finalize_responses_response(response_data, turn=turn, debug_label='Gemini 转回 Responses 后')
|
||||
|
||||
|
||||
def _handle_gemini_stream(
|
||||
ctx: RouteContext,
|
||||
payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Gemini 后端的流式 Responses 返回。"""
|
||||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||||
gemini_converter = GeminiStreamConverter()
|
||||
|
||||
def generate():
|
||||
yield from converter.start_events()
|
||||
|
||||
attach_upstream_request(turn, payload, headers)
|
||||
resp, err = forward_request(url, headers, payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
chunk_count = 0
|
||||
client_events: list[str] = []
|
||||
for gemini_chunk in iter_gemini_sse(resp):
|
||||
append_upstream_event(turn, {'type': 'gemini_chunk', 'data': gemini_chunk})
|
||||
if chunk_count < 10:
|
||||
_dbg(
|
||||
f'上游 Gemini 片段#{chunk_count}='
|
||||
+ json.dumps(gemini_chunk, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
for cc_chunk in gemini_converter.process_chunk(gemini_chunk):
|
||||
for evt in converter.process_cc_chunk(cc_chunk):
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
yield evt
|
||||
|
||||
chunk_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||
finalized_events = converter.finalize()
|
||||
for evt in finalized_events:
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
yield evt
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'chunk_count': chunk_count,
|
||||
'client_event_count': len(client_events),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'responses.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'events': client_events,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||
"""处理走 Anthropic 后端的 Responses 请求。"""
|
||||
anthropic_payload = cc_to_messages_request(cc_payload)
|
||||
_dbg(
|
||||
|
|
@ -251,8 +442,8 @@ def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any]):
|
|||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||
|
||||
if ctx.is_stream:
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers)
|
||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
|
||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
|
||||
|
||||
|
||||
def _handle_anthropic_non_stream(
|
||||
|
|
@ -260,19 +451,24 @@ def _handle_anthropic_non_stream(
|
|||
anthropic_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Anthropic 后端的非流式 Responses 返回。"""
|
||||
anthropic_payload['stream'] = False
|
||||
attach_upstream_request(turn, anthropic_payload, headers)
|
||||
resp, err = forward_request(url, headers, anthropic_payload)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': 'upstream request failed'})
|
||||
finalize_turn(turn)
|
||||
return err
|
||||
|
||||
raw = resp.json()
|
||||
attach_upstream_response(turn, raw)
|
||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||
|
||||
cc_data = messages_to_cc_response(raw)
|
||||
response_data = cc_to_responses(cc_data, ctx.client_model)
|
||||
return _finalize_responses_response(response_data, debug_label='Messages 转回 Responses 后')
|
||||
return _finalize_responses_response(response_data, turn=turn, debug_label='Messages 转回 Responses 后')
|
||||
|
||||
|
||||
def _handle_anthropic_stream(
|
||||
|
|
@ -280,6 +476,7 @@ def _handle_anthropic_stream(
|
|||
anthropic_payload: dict[str, Any],
|
||||
url: str,
|
||||
headers: dict[str, str],
|
||||
turn: dict[str, Any] | None,
|
||||
):
|
||||
"""处理 Anthropic 后端的流式 Responses 返回。
|
||||
|
||||
|
|
@ -293,29 +490,54 @@ def _handle_anthropic_stream(
|
|||
"""消费 Anthropic SSE,并直接映射为 Responses 事件序列。"""
|
||||
yield from converter.start_events()
|
||||
|
||||
attach_upstream_request(turn, anthropic_payload, headers)
|
||||
resp, err = forward_request(url, headers, anthropic_payload, stream=True)
|
||||
if err:
|
||||
attach_error(turn, {'stage': 'forward_request', 'message': str(err)})
|
||||
set_stream_summary(turn, {'status': 'error'})
|
||||
finalize_turn(turn)
|
||||
yield responses_error_event(str(err))
|
||||
return
|
||||
|
||||
event_count = 0
|
||||
client_events: list[str] = []
|
||||
for event_type, event_data in iter_anthropic_sse(resp):
|
||||
append_upstream_event(turn, {'type': event_type, 'data': event_data})
|
||||
if event_count < 10:
|
||||
_dbg(
|
||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||
+ json.dumps(event_data, ensure_ascii=False, default=str)[:500]
|
||||
)
|
||||
|
||||
yield from converter.process_anthropic_event(event_type, event_data)
|
||||
produced = converter.process_anthropic_event(event_type, event_data)
|
||||
for evt in produced:
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
yield evt
|
||||
event_count += 1
|
||||
|
||||
_dbg(f'流式响应结束,共 {event_count} 个事件')
|
||||
yield from converter.finalize()
|
||||
finalized_events = converter.finalize()
|
||||
for evt in finalized_events:
|
||||
client_events.append(evt)
|
||||
append_client_event(turn, {'type': 'responses_event', 'data': evt})
|
||||
yield evt
|
||||
usage_tracker.record(ctx.client_model)
|
||||
set_stream_summary(turn, {
|
||||
'event_count': event_count,
|
||||
'client_event_count': len(client_events),
|
||||
})
|
||||
attach_client_response(turn, {
|
||||
'type': 'responses.stream.summary',
|
||||
'model': ctx.client_model,
|
||||
'events': client_events,
|
||||
})
|
||||
finalize_turn(turn)
|
||||
|
||||
return sse_response(generate())
|
||||
|
||||
|
||||
def _finalize_responses_response(response_data: dict[str, Any], *, debug_label: str):
|
||||
def _finalize_responses_response(response_data: dict[str, Any], *, turn: dict[str, Any], debug_label: str):
|
||||
"""统一收尾非流式 Responses 响应。
|
||||
|
||||
两条转换链路和一条原生 Responses 链路最终都会回到 Responses 对象,因此这里集中
|
||||
|
|
@ -324,4 +546,15 @@ def _finalize_responses_response(response_data: dict[str, Any], *, debug_label:
|
|||
response_data['model'] = response_data.get('model') or ''
|
||||
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
|
||||
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
|
||||
|
||||
usage_tracker.record(
|
||||
response_data.get('model', ''),
|
||||
response_data.get('usage'),
|
||||
input_key='input_tokens',
|
||||
output_key='output_tokens',
|
||||
)
|
||||
|
||||
attach_client_response(turn, response_data)
|
||||
finalize_turn(turn, usage=response_data.get('usage'))
|
||||
|
||||
return jsonify(response_data)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue