Compare commits
No commits in common. "4c6bede153457d2ceaf97be9c8c3243d56b4df58" and "72223ef4125aa452eecb9719cb407dc8b94d7205" have entirely different histories.
4c6bede153
...
72223ef412
13 changed files with 349 additions and 605 deletions
12
README.md
12
README.md
|
|
@ -159,18 +159,6 @@ api2cursor/
|
||||||
- `file_path` → `path` 字段映射
|
- `file_path` → `path` 字段映射
|
||||||
- `finish_reason` 修正
|
- `finish_reason` 修正
|
||||||
|
|
||||||
|
|
||||||
============================
|
|
||||||
增加缓存,在api2cursor里面的body修改中加个你喜欢的随意字段:
|
|
||||||
{
|
|
||||||
"prompt_cache_key": "GPT5-4-xxx-xxx"
|
|
||||||
}
|
|
||||||
openai 开 fast 模式
|
|
||||||
{
|
|
||||||
"service_tier": "priority"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
## 许可证
|
## 许可证
|
||||||
|
|
||||||
[MIT](LICENSE)
|
[MIT](LICENSE)
|
||||||
|
|
|
||||||
|
|
@ -261,12 +261,6 @@ def _convert_request_message(message: Any) -> tuple[JsonDict | None, str | None]
|
||||||
anthropic_role = 'assistant' if role == 'assistant' else 'user'
|
anthropic_role = 'assistant' if role == 'assistant' else 'user'
|
||||||
anthropic_content = _convert_content(message)
|
anthropic_content = _convert_content(message)
|
||||||
|
|
||||||
if role == 'assistant' and message.get('reasoning_content'):
|
|
||||||
thinking_block = {'type': 'thinking', 'thinking': message['reasoning_content']}
|
|
||||||
blocks = _to_blocks(anthropic_content)
|
|
||||||
blocks.insert(0, thinking_block)
|
|
||||||
anthropic_content = blocks
|
|
||||||
|
|
||||||
if role == 'assistant' and 'tool_calls' in message:
|
if role == 'assistant' and 'tool_calls' in message:
|
||||||
anthropic_content = _append_tool_use_blocks(anthropic_content, message.get('tool_calls', []))
|
anthropic_content = _append_tool_use_blocks(anthropic_content, message.get('tool_calls', []))
|
||||||
|
|
||||||
|
|
@ -469,8 +463,6 @@ def _convert_content_part(part: Any) -> JsonDict | None:
|
||||||
return {'type': 'text', 'text': part.get('text', '')}
|
return {'type': 'text', 'text': part.get('text', '')}
|
||||||
if part_type == 'image_url':
|
if part_type == 'image_url':
|
||||||
return _convert_image(part)
|
return _convert_image(part)
|
||||||
if part_type == 'image':
|
|
||||||
return part
|
|
||||||
if part_type in ('tool_use', 'tool_result'):
|
if part_type in ('tool_use', 'tool_result'):
|
||||||
return part
|
return part
|
||||||
return None
|
return None
|
||||||
|
|
@ -582,16 +574,38 @@ _EPHEMERAL = {'type': 'ephemeral'}
|
||||||
|
|
||||||
|
|
||||||
def optimize_cache_control(request: JsonDict) -> None:
|
def optimize_cache_control(request: JsonDict) -> None:
|
||||||
"""为 Anthropic Messages 请求启用顶层自动 prompt caching。
|
"""自动设置最优的 Anthropic cache_control 断点。
|
||||||
|
|
||||||
2026 版 Claude API 已支持在请求顶层使用 `cache_control` 开启自动缓存,
|
算法移植自 CursorProxy 的 ensure_cache_control.go:
|
||||||
由上游自动把断点放到最后一个可缓存块并随多轮对话前移。相比手动在嵌套
|
1. 归一化所有消息 content 为数组格式
|
||||||
content blocks 上打断点,这种方式对 Anthropic 兼容中转站更稳定,也更接近
|
2. 清空所有已有 cache_control
|
||||||
`/v1/responses` 通过顶层字段启用缓存的思路。
|
3. 注入结构锚点(tools 末尾 + system 末尾)
|
||||||
|
4. 注入消息锚点(最后一个可缓存块 + 窗口边界)
|
||||||
|
5. 总断点数不超过 4 个
|
||||||
"""
|
"""
|
||||||
_normalize_message_contents(request)
|
_normalize_message_contents(request)
|
||||||
_clear_all_cache_controls(request)
|
_clear_all_cache_controls(request)
|
||||||
request['cache_control'] = dict(_EPHEMERAL)
|
|
||||||
|
structural = _inject_structural_anchors(request)
|
||||||
|
remaining = _MAX_BREAKPOINTS - structural
|
||||||
|
if remaining <= 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
refs = _collect_cacheable_block_refs(request)
|
||||||
|
if not refs:
|
||||||
|
return
|
||||||
|
|
||||||
|
desired = 1 if len(refs) < _BLOCK_WINDOW else 2
|
||||||
|
anchors = min(desired, remaining)
|
||||||
|
|
||||||
|
if anchors >= 1 and refs:
|
||||||
|
refs[-1]['cache_control'] = _EPHEMERAL
|
||||||
|
|
||||||
|
if anchors >= 2 and len(refs) > 1:
|
||||||
|
target = len(refs) - _BLOCK_WINDOW
|
||||||
|
idx = _pick_window_anchor(refs, target)
|
||||||
|
if idx is not None and idx != len(refs) - 1:
|
||||||
|
refs[idx]['cache_control'] = _EPHEMERAL
|
||||||
|
|
||||||
|
|
||||||
def _normalize_message_contents(request: JsonDict) -> None:
|
def _normalize_message_contents(request: JsonDict) -> None:
|
||||||
|
|
@ -606,7 +620,6 @@ def _normalize_message_contents(request: JsonDict) -> None:
|
||||||
|
|
||||||
def _clear_all_cache_controls(request: JsonDict) -> None:
|
def _clear_all_cache_controls(request: JsonDict) -> None:
|
||||||
"""清空所有已有的 cache_control 字段。"""
|
"""清空所有已有的 cache_control 字段。"""
|
||||||
request.pop('cache_control', None)
|
|
||||||
for tool in request.get('tools', []):
|
for tool in request.get('tools', []):
|
||||||
tool.pop('cache_control', None)
|
tool.pop('cache_control', None)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -654,6 +654,10 @@ class ResponsesToCCStreamConverter:
|
||||||
'completion_tokens': self._usage.get('output_tokens', 0),
|
'completion_tokens': self._usage.get('output_tokens', 0),
|
||||||
'total_tokens': self._usage.get('total_tokens', 0),
|
'total_tokens': self._usage.get('total_tokens', 0),
|
||||||
}
|
}
|
||||||
|
if isinstance(self._usage.get('input_tokens_details'), dict):
|
||||||
|
chunk['usage']['prompt_tokens_details'] = dict(self._usage['input_tokens_details'])
|
||||||
|
if isinstance(self._usage.get('output_tokens_details'), dict):
|
||||||
|
chunk['usage']['completion_tokens_details'] = dict(self._usage['output_tokens_details'])
|
||||||
return [chunk]
|
return [chunk]
|
||||||
|
|
||||||
def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict:
|
def _make_chunk(self, delta: JsonDict, finish_reason: str | None = None) -> JsonDict:
|
||||||
|
|
@ -678,20 +682,44 @@ def _copy_request_options(payload: JsonDict, result: JsonDict) -> None:
|
||||||
"""将 Responses 请求中的通用选项复制到 CC 请求体。"""
|
"""将 Responses 请求中的通用选项复制到 CC 请求体。"""
|
||||||
if 'tools' in payload:
|
if 'tools' in payload:
|
||||||
result['tools'] = _convert_tools(payload['tools'])
|
result['tools'] = _convert_tools(payload['tools'])
|
||||||
for key in ('temperature', 'top_p'):
|
for key in (
|
||||||
|
'temperature',
|
||||||
|
'top_p',
|
||||||
|
'tool_choice',
|
||||||
|
'parallel_tool_calls',
|
||||||
|
'truncation',
|
||||||
|
'store',
|
||||||
|
'metadata',
|
||||||
|
'conversation',
|
||||||
|
'previous_response_id',
|
||||||
|
'prompt_cache_key',
|
||||||
|
'service_tier',
|
||||||
|
'user',
|
||||||
|
):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
result[key] = payload[key]
|
result[key] = payload[key]
|
||||||
if 'max_output_tokens' in payload:
|
if 'max_output_tokens' in payload:
|
||||||
result['max_tokens'] = payload['max_output_tokens']
|
result['max_tokens'] = payload['max_output_tokens']
|
||||||
if 'tool_choice' in payload:
|
|
||||||
result['tool_choice'] = payload['tool_choice']
|
|
||||||
|
|
||||||
|
|
||||||
def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None:
|
def _copy_responses_request_options(payload: JsonDict, result: JsonDict) -> None:
|
||||||
"""将聊天补全请求中的通用选项复制到原生 Responses 请求体。"""
|
"""将聊天补全请求中的通用选项复制到原生 Responses 请求体。"""
|
||||||
if 'tools' in payload:
|
if 'tools' in payload:
|
||||||
result['tools'] = _convert_cc_tools_to_responses(payload['tools'])
|
result['tools'] = _convert_cc_tools_to_responses(payload['tools'])
|
||||||
for key in ('temperature', 'top_p', 'tool_choice'):
|
for key in (
|
||||||
|
'temperature',
|
||||||
|
'top_p',
|
||||||
|
'tool_choice',
|
||||||
|
'parallel_tool_calls',
|
||||||
|
'truncation',
|
||||||
|
'store',
|
||||||
|
'metadata',
|
||||||
|
'conversation',
|
||||||
|
'previous_response_id',
|
||||||
|
'prompt_cache_key',
|
||||||
|
'service_tier',
|
||||||
|
'user',
|
||||||
|
):
|
||||||
if key in payload:
|
if key in payload:
|
||||||
result[key] = payload[key]
|
result[key] = payload[key]
|
||||||
if 'max_tokens' in payload:
|
if 'max_tokens' in payload:
|
||||||
|
|
@ -703,11 +731,7 @@ def _append_responses_input_item(
|
||||||
instructions: list[str],
|
instructions: list[str],
|
||||||
input_items: list[JsonDict],
|
input_items: list[JsonDict],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""将单条 Chat Completions 消息追加为 Responses `input` 项。
|
"""将单条 Chat Completions 消息追加为 Responses `input` 项。"""
|
||||||
|
|
||||||
尽量使用 EasyInputMessage 格式({role, content})以减少 token 开销,
|
|
||||||
提高上游 prompt caching 的前缀匹配命中率。
|
|
||||||
"""
|
|
||||||
if not isinstance(message, dict):
|
if not isinstance(message, dict):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -728,26 +752,21 @@ def _append_responses_input_item(
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
text = _content_to_text(content)
|
item: JsonDict = {
|
||||||
has_tool_calls = bool(message.get('tool_calls'))
|
|
||||||
|
|
||||||
if role == 'assistant' and has_tool_calls:
|
|
||||||
if text:
|
|
||||||
input_items.append({
|
|
||||||
'type': 'message',
|
'type': 'message',
|
||||||
'role': 'assistant',
|
'role': role or 'user',
|
||||||
'content': [{'type': 'output_text', 'text': text}],
|
'content': _content_to_responses_parts(content, role),
|
||||||
})
|
}
|
||||||
|
input_items.append(item)
|
||||||
|
|
||||||
|
if role == 'assistant':
|
||||||
for tool_call in message.get('tool_calls') or []:
|
for tool_call in message.get('tool_calls') or []:
|
||||||
input_items.append(_build_responses_function_call_item(tool_call))
|
input_items.append(_build_responses_function_call_item(tool_call))
|
||||||
else:
|
|
||||||
input_items.append({'role': role or 'user', 'content': text or ''})
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
||||||
"""将 Responses `input` 数组重建为 Chat Completions `messages` 列表。"""
|
"""将 Responses `input` 数组重建为 Chat Completions `messages` 列表。"""
|
||||||
index = 0
|
index = 0
|
||||||
pending_reasoning: str | None = None
|
|
||||||
while index < len(items):
|
while index < len(items):
|
||||||
item = items[index]
|
item = items[index]
|
||||||
|
|
||||||
|
|
@ -763,35 +782,20 @@ def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
||||||
item_type = item.get('type', '')
|
item_type = item.get('type', '')
|
||||||
role = item.get('role', '')
|
role = item.get('role', '')
|
||||||
|
|
||||||
if item_type == 'reasoning':
|
|
||||||
pending_reasoning = _extract_reasoning_text(item)
|
|
||||||
index += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if role and not item_type:
|
if role and not item_type:
|
||||||
msg: JsonDict = {
|
messages.append({
|
||||||
'role': role,
|
'role': role,
|
||||||
'content': _normalize_simple_content(item.get('content', '')),
|
'content': _normalize_simple_content(item.get('content', '')),
|
||||||
}
|
})
|
||||||
if role == 'assistant' and pending_reasoning:
|
|
||||||
msg['reasoning_content'] = pending_reasoning
|
|
||||||
pending_reasoning = None
|
|
||||||
messages.append(msg)
|
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item_type == 'message':
|
if item_type == 'message':
|
||||||
consumed = _append_message_item(items, start=index, messages=messages)
|
consumed = _append_message_item(items, start=index, messages=messages)
|
||||||
if item.get('role') == 'assistant' and pending_reasoning and messages:
|
|
||||||
messages[-1]['reasoning_content'] = pending_reasoning
|
|
||||||
pending_reasoning = None
|
|
||||||
index += consumed
|
index += consumed
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item_type == 'function_call':
|
if item_type == 'function_call':
|
||||||
if pending_reasoning and messages and messages[-1].get('role') == 'assistant':
|
|
||||||
messages[-1]['reasoning_content'] = pending_reasoning
|
|
||||||
pending_reasoning = None
|
|
||||||
_append_function_call_item(item, messages)
|
_append_function_call_item(item, messages)
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
@ -938,11 +942,18 @@ def _make_function_call_output_item(tool_call: JsonDict) -> JsonDict:
|
||||||
|
|
||||||
def _build_responses_usage(usage: JsonDict) -> JsonDict:
|
def _build_responses_usage(usage: JsonDict) -> JsonDict:
|
||||||
"""将 Chat Completions 的 usage 字段映射为 Responses usage 结构。"""
|
"""将 Chat Completions 的 usage 字段映射为 Responses usage 结构。"""
|
||||||
return {
|
result = {
|
||||||
'input_tokens': usage.get('prompt_tokens', 0),
|
'input_tokens': usage.get('prompt_tokens', 0),
|
||||||
'output_tokens': usage.get('completion_tokens', 0),
|
'output_tokens': usage.get('completion_tokens', 0),
|
||||||
'total_tokens': usage.get('total_tokens', 0),
|
'total_tokens': usage.get('total_tokens', 0),
|
||||||
}
|
}
|
||||||
|
prompt_details = usage.get('prompt_tokens_details')
|
||||||
|
if isinstance(prompt_details, dict):
|
||||||
|
result['input_tokens_details'] = dict(prompt_details)
|
||||||
|
completion_details = usage.get('completion_tokens_details')
|
||||||
|
if isinstance(completion_details, dict):
|
||||||
|
result['output_tokens_details'] = dict(completion_details)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]:
|
def _collect_cc_parts_from_responses_output(output_items: Any) -> tuple[str, str, list[JsonDict]]:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
services:
|
services:
|
||||||
api2cursor:
|
api2cursor:
|
||||||
build: .
|
build: .
|
||||||
container_name: api2cursor
|
|
||||||
ports:
|
ports:
|
||||||
- "${PROXY_PORT:-3029}:${PROXY_PORT:-3029}"
|
- "${PROXY_PORT:-3029}:${PROXY_PORT:-3029}"
|
||||||
env_file:
|
env_file:
|
||||||
|
|
@ -13,7 +13,6 @@ from flask import Blueprint, request, jsonify, send_from_directory
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from config import Config
|
from config import Config
|
||||||
from utils.request_history import request_history
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -203,15 +202,6 @@ def get_stats():
|
||||||
return jsonify(usage_tracker.get_stats())
|
return jsonify(usage_tracker.get_stats())
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/api/admin/request-logs', methods=['GET'])
|
|
||||||
def get_request_logs():
|
|
||||||
"""返回最近 500 条请求日志。"""
|
|
||||||
err = _check_auth()
|
|
||||||
if err:
|
|
||||||
return err
|
|
||||||
return jsonify({'items': request_history.get_recent(500)})
|
|
||||||
|
|
||||||
|
|
||||||
# ─── 内部辅助 ─────────────────────────────────────
|
# ─── 内部辅助 ─────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
195
routes/chat.py
195
routes/chat.py
|
|
@ -9,7 +9,6 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from time import perf_counter
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
|
|
@ -43,7 +42,9 @@ from routes.common import (
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
chat_error_chunk,
|
chat_error_chunk,
|
||||||
ensure_prompt_cache_key,
|
ensure_responses_cache_control,
|
||||||
|
attach_previous_response_id,
|
||||||
|
remember_response_id,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -60,7 +61,6 @@ from utils.http import (
|
||||||
iter_responses_sse,
|
iter_responses_sse,
|
||||||
sse_response,
|
sse_response,
|
||||||
)
|
)
|
||||||
from utils.request_history import request_history
|
|
||||||
from utils.request_logger import (
|
from utils.request_logger import (
|
||||||
append_client_event,
|
append_client_event,
|
||||||
append_upstream_event,
|
append_upstream_event,
|
||||||
|
|
@ -115,7 +115,6 @@ def chat_completions():
|
||||||
client_model = payload.get('model', 'unknown')
|
client_model = payload.get('model', 'unknown')
|
||||||
is_stream = payload.get('stream', False)
|
is_stream = payload.get('stream', False)
|
||||||
ctx = build_route_context(client_model, is_stream)
|
ctx = build_route_context(client_model, is_stream)
|
||||||
request_started_at = perf_counter()
|
|
||||||
turn = start_turn(
|
turn = start_turn(
|
||||||
route='chat',
|
route='chat',
|
||||||
client_model=client_model,
|
client_model=client_model,
|
||||||
|
|
@ -131,16 +130,15 @@ def chat_completions():
|
||||||
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
||||||
_log_messages(payload)
|
_log_messages(payload)
|
||||||
|
|
||||||
if ctx.backend != 'responses':
|
|
||||||
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
|
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
|
||||||
|
|
||||||
if ctx.backend == 'openai':
|
if ctx.backend == 'openai':
|
||||||
return _handle_openai_backend(ctx, payload, turn, request_started_at)
|
return _handle_openai_backend(ctx, payload, turn)
|
||||||
if ctx.backend == 'responses':
|
if ctx.backend == 'responses':
|
||||||
return _handle_responses_backend(ctx, payload, turn, request_started_at)
|
return _handle_responses_backend(ctx, payload, turn)
|
||||||
if ctx.backend == 'gemini':
|
if ctx.backend == 'gemini':
|
||||||
return _handle_gemini_backend(ctx, payload, turn, request_started_at)
|
return _handle_gemini_backend(ctx, payload, turn)
|
||||||
return _handle_anthropic_backend(ctx, payload, turn, request_started_at)
|
return _handle_anthropic_backend(ctx, payload, turn)
|
||||||
|
|
||||||
|
|
||||||
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
|
def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], int]:
|
||||||
|
|
@ -161,12 +159,7 @@ def _normalize_chat_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], in
|
||||||
return payload, message_count
|
return payload, message_count
|
||||||
|
|
||||||
|
|
||||||
def _handle_openai_backend(
|
def _handle_openai_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any]):
|
||||||
ctx: RouteContext,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any],
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
|
"""处理走 OpenAI 兼容后端的聊天补全请求。"""
|
||||||
_dbg(
|
_dbg(
|
||||||
'原始请求字段=' + str(list(payload.keys())) + ' '
|
'原始请求字段=' + str(list(payload.keys())) + ' '
|
||||||
|
|
@ -190,8 +183,8 @@ def _handle_openai_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_openai_stream(ctx, payload, url, headers, turn, request_started_at)
|
return _handle_openai_stream(ctx, payload, url, headers, turn)
|
||||||
return _handle_openai_non_stream(ctx, payload, url, headers, turn, request_started_at)
|
return _handle_openai_non_stream(ctx, payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_openai_non_stream(
|
def _handle_openai_non_stream(
|
||||||
|
|
@ -200,7 +193,6 @@ def _handle_openai_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any],
|
turn: dict[str, Any],
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 OpenAI 兼容后端的非流式返回。"""
|
"""处理 OpenAI 兼容后端的非流式返回。"""
|
||||||
payload['stream'] = False
|
payload['stream'] = False
|
||||||
|
|
@ -216,14 +208,7 @@ def _handle_openai_non_stream(
|
||||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||||
|
|
||||||
data = fix_response(raw)
|
data = fix_response(raw)
|
||||||
return _finalize_chat_response(
|
return _finalize_chat_response(ctx, data, turn=turn, debug_label='修复后响应')
|
||||||
ctx,
|
|
||||||
data,
|
|
||||||
turn=turn,
|
|
||||||
debug_label='修复后响应',
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _handle_openai_stream(
|
def _handle_openai_stream(
|
||||||
|
|
@ -232,7 +217,6 @@ def _handle_openai_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any],
|
turn: dict[str, Any],
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 OpenAI 兼容后端的流式返回。"""
|
"""处理 OpenAI 兼容后端的流式返回。"""
|
||||||
payload['stream'] = True
|
payload['stream'] = True
|
||||||
|
|
@ -275,18 +259,7 @@ def _handle_openai_stream(
|
||||||
'chunk_count': len(client_chunks),
|
'chunk_count': len(client_chunks),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
||||||
|
|
@ -327,28 +300,12 @@ def _handle_openai_stream(
|
||||||
'chunk_count': len(client_chunks),
|
'chunk_count': len(client_chunks),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
||||||
def _handle_responses_backend(
|
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走原生 Responses 后端的聊天补全请求。
|
"""处理走原生 Responses 后端的聊天补全请求。
|
||||||
|
|
||||||
当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求,
|
当上游只支持 `/v1/responses` 时,需要先把聊天补全请求转换为 Responses 请求,
|
||||||
|
|
@ -357,7 +314,8 @@ def _handle_responses_backend(
|
||||||
responses_payload = cc_to_responses_request(payload)
|
responses_payload = cc_to_responses_request(payload)
|
||||||
responses_payload['model'] = ctx.upstream_model
|
responses_payload['model'] = ctx.upstream_model
|
||||||
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
responses_payload = ensure_prompt_cache_key(responses_payload)
|
responses_payload = ensure_responses_cache_control(responses_payload)
|
||||||
|
responses_payload = attach_previous_response_id(responses_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
||||||
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
||||||
|
|
@ -368,8 +326,8 @@ def _handle_responses_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_responses_stream(ctx, responses_payload, url, headers, turn, request_started_at)
|
return _handle_responses_stream(ctx, responses_payload, url, headers, turn)
|
||||||
return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn, request_started_at)
|
return _handle_responses_non_stream(ctx, responses_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_responses_non_stream(
|
def _handle_responses_non_stream(
|
||||||
|
|
@ -378,7 +336,6 @@ def _handle_responses_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理原生 Responses 后端的非流式返回。"""
|
"""处理原生 Responses 后端的非流式返回。"""
|
||||||
payload['stream'] = False
|
payload['stream'] = False
|
||||||
|
|
@ -393,15 +350,9 @@ def _handle_responses_non_stream(
|
||||||
attach_upstream_response(turn, raw)
|
attach_upstream_response(turn, raw)
|
||||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||||
|
|
||||||
|
remember_response_id(payload, raw)
|
||||||
data = responses_to_cc_response(raw, ctx.client_model)
|
data = responses_to_cc_response(raw, ctx.client_model)
|
||||||
return _finalize_chat_response(
|
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Responses 转回聊天补全后')
|
||||||
ctx,
|
|
||||||
data,
|
|
||||||
turn=turn,
|
|
||||||
debug_label='Responses 转回聊天补全后',
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _handle_responses_stream(
|
def _handle_responses_stream(
|
||||||
|
|
@ -410,7 +361,6 @@ def _handle_responses_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理原生 Responses 后端的流式返回。"""
|
"""处理原生 Responses 后端的流式返回。"""
|
||||||
payload['stream'] = True
|
payload['stream'] = True
|
||||||
|
|
@ -439,6 +389,10 @@ def _handle_responses_stream(
|
||||||
'completion_tokens': extracted_usage.get('output_tokens', 0),
|
'completion_tokens': extracted_usage.get('output_tokens', 0),
|
||||||
'total_tokens': extracted_usage.get('total_tokens', 0),
|
'total_tokens': extracted_usage.get('total_tokens', 0),
|
||||||
}
|
}
|
||||||
|
if event_type == 'response.completed':
|
||||||
|
response_obj = event_data.get('response') if isinstance(event_data, dict) else None
|
||||||
|
if isinstance(response_obj, dict):
|
||||||
|
remember_response_id(payload, response_obj)
|
||||||
if event_count < 10:
|
if event_count < 10:
|
||||||
_dbg(
|
_dbg(
|
||||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||||
|
|
@ -474,28 +428,12 @@ def _handle_responses_stream(
|
||||||
'chunk_count': len(client_chunks),
|
'chunk_count': len(client_chunks),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
||||||
def _handle_gemini_backend(
|
def _handle_gemini_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 Gemini Contents 后端的聊天补全请求。"""
|
"""处理走 Gemini Contents 后端的聊天补全请求。"""
|
||||||
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
|
payload = inject_instructions_cc(payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
gemini_payload = cc_to_gemini_request(payload)
|
gemini_payload = cc_to_gemini_request(payload)
|
||||||
|
|
@ -509,8 +447,8 @@ def _handle_gemini_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn, request_started_at)
|
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
|
||||||
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn, request_started_at)
|
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_gemini_non_stream(
|
def _handle_gemini_non_stream(
|
||||||
|
|
@ -519,7 +457,6 @@ def _handle_gemini_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Gemini 后端的非流式返回。"""
|
"""处理 Gemini 后端的非流式返回。"""
|
||||||
attach_upstream_request(turn, payload, headers)
|
attach_upstream_request(turn, payload, headers)
|
||||||
|
|
@ -534,14 +471,7 @@ def _handle_gemini_non_stream(
|
||||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||||
|
|
||||||
data = gemini_to_cc_response(raw)
|
data = gemini_to_cc_response(raw)
|
||||||
return _finalize_chat_response(
|
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Gemini 转回聊天补全后')
|
||||||
ctx,
|
|
||||||
data,
|
|
||||||
turn=turn,
|
|
||||||
debug_label='Gemini 转回聊天补全后',
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _handle_gemini_stream(
|
def _handle_gemini_stream(
|
||||||
|
|
@ -550,7 +480,6 @@ def _handle_gemini_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Gemini 后端的流式返回。"""
|
"""处理 Gemini 后端的流式返回。"""
|
||||||
converter = GeminiStreamConverter()
|
converter = GeminiStreamConverter()
|
||||||
|
|
@ -613,28 +542,12 @@ def _handle_gemini_stream(
|
||||||
'chunk_count': len(client_chunks),
|
'chunk_count': len(client_chunks),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
||||||
def _handle_anthropic_backend(
|
def _handle_anthropic_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
|
"""处理走 Anthropic Messages 后端的聊天补全请求。"""
|
||||||
payload['model'] = ctx.upstream_model
|
payload['model'] = ctx.upstream_model
|
||||||
anthropic_payload = cc_to_messages_request(payload)
|
anthropic_payload = cc_to_messages_request(payload)
|
||||||
|
|
@ -649,8 +562,8 @@ def _handle_anthropic_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn, request_started_at)
|
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
|
||||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn, request_started_at)
|
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_anthropic_non_stream(
|
def _handle_anthropic_non_stream(
|
||||||
|
|
@ -659,7 +572,6 @@ def _handle_anthropic_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Anthropic 后端的非流式返回。"""
|
"""处理 Anthropic 后端的非流式返回。"""
|
||||||
payload['stream'] = False
|
payload['stream'] = False
|
||||||
|
|
@ -675,14 +587,7 @@ def _handle_anthropic_non_stream(
|
||||||
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
_dbg('上游原始响应=' + json.dumps(raw, ensure_ascii=False, default=str)[:1000])
|
||||||
|
|
||||||
data = messages_to_cc_response(raw)
|
data = messages_to_cc_response(raw)
|
||||||
return _finalize_chat_response(
|
return _finalize_chat_response(ctx, data, turn=turn, debug_label='Messages 转回聊天补全后')
|
||||||
ctx,
|
|
||||||
data,
|
|
||||||
turn=turn,
|
|
||||||
debug_label='Messages 转回聊天补全后',
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _handle_anthropic_stream(
|
def _handle_anthropic_stream(
|
||||||
|
|
@ -691,7 +596,6 @@ def _handle_anthropic_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Anthropic 后端的流式返回。
|
"""处理 Anthropic 后端的流式返回。
|
||||||
|
|
||||||
|
|
@ -776,18 +680,7 @@ def _handle_anthropic_stream(
|
||||||
'chunk_count': len(client_chunks),
|
'chunk_count': len(client_chunks),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
@ -798,8 +691,6 @@ def _finalize_chat_response(
|
||||||
*,
|
*,
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
debug_label: str,
|
debug_label: str,
|
||||||
request_started_at: float,
|
|
||||||
upstream_url: str,
|
|
||||||
):
|
):
|
||||||
"""统一收尾非流式聊天补全响应。
|
"""统一收尾非流式聊天补全响应。
|
||||||
|
|
||||||
|
|
@ -812,21 +703,9 @@ def _finalize_chat_response(
|
||||||
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
|
_dbg(debug_label + '=' + json.dumps(data, ensure_ascii=False, default=str)[:1000])
|
||||||
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
|
log_usage('聊天补全', data.get('usage', {}), input_key='prompt_tokens', output_key='completion_tokens')
|
||||||
|
|
||||||
usage = data.get('usage')
|
usage_tracker.record(ctx.client_model, data.get('usage'))
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
|
||||||
usage_tracker.record(ctx.client_model, usage)
|
|
||||||
request_history.record(
|
|
||||||
route='chat',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=upstream_url,
|
|
||||||
usage=usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
attach_client_response(turn, data)
|
attach_client_response(turn, data)
|
||||||
finalize_turn(turn, usage=usage, duration_ms=duration_ms)
|
finalize_turn(turn, usage=data.get('usage'))
|
||||||
|
|
||||||
for choice in data.get('choices', []):
|
for choice in data.get('choices', []):
|
||||||
msg = choice.get('message', {})
|
msg = choice.get('message', {})
|
||||||
|
|
|
||||||
195
routes/common.py
195
routes/common.py
|
|
@ -10,6 +10,8 @@ from dataclasses import dataclass
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
|
|
@ -17,6 +19,10 @@ from utils.http import build_anthropic_headers, build_gemini_headers, build_open
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_RESPONSES_PREV_ID_LOCK = threading.Lock()
|
||||||
|
_RESPONSES_PREV_ID_TTL = 86400
|
||||||
|
_RESPONSES_PREV_IDS: dict[str, tuple[str, float]] = {}
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class RouteContext:
|
class RouteContext:
|
||||||
|
|
@ -196,6 +202,178 @@ def inject_instructions_responses(payload: dict[str, Any], instructions: str, po
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_responses_cache_control(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""为 Responses 请求补齐自动 prompt caching 开关。
|
||||||
|
|
||||||
|
一些支持 `/v1/responses` 的上游会参考顶层 `cache_control` 来自动放置缓存断点。
|
||||||
|
Cursor 侧通常不会主动携带这个字段,因此这里在缺失时补一个保守的默认值,
|
||||||
|
同时允许调用方通过 body_modifications 或显式字段自行覆盖/关闭。
|
||||||
|
"""
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return payload
|
||||||
|
cache_control = payload.get('cache_control')
|
||||||
|
if isinstance(cache_control, dict) and cache_control.get('type'):
|
||||||
|
return payload
|
||||||
|
payload['cache_control'] = {'type': 'ephemeral'}
|
||||||
|
logger.info('已为 Responses 请求自动启用 cache_control=ephemeral')
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def attach_previous_response_id(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""为多轮 Responses 请求补齐上一轮 response_id。
|
||||||
|
|
||||||
|
某些上游在 `/v1/responses` 多轮场景下,只有沿用 `previous_response_id` 才能稳定复用
|
||||||
|
上一轮的服务端响应链与缓存。Cursor 通常会回传完整历史,但不会主动带这个字段,
|
||||||
|
因此代理需要基于稳定对话键做一次轻量补齐。
|
||||||
|
"""
|
||||||
|
if not isinstance(payload, dict) or payload.get('previous_response_id'):
|
||||||
|
return payload
|
||||||
|
key = _responses_prev_id_key(payload)
|
||||||
|
if not key:
|
||||||
|
return payload
|
||||||
|
previous_response_id = _get_previous_response_id(key)
|
||||||
|
if not previous_response_id:
|
||||||
|
return payload
|
||||||
|
payload['previous_response_id'] = previous_response_id
|
||||||
|
logger.info('已为 Responses 请求补齐 previous_response_id')
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def remember_response_id(payload: dict[str, Any], response_data: dict[str, Any]) -> None:
|
||||||
|
"""记住当前对话最近一次上游 Responses response_id。"""
|
||||||
|
if not isinstance(payload, dict) or not isinstance(response_data, dict):
|
||||||
|
return
|
||||||
|
response_id = response_data.get('id')
|
||||||
|
if not isinstance(response_id, str) or not response_id.strip():
|
||||||
|
return
|
||||||
|
key = _responses_prev_id_key(payload)
|
||||||
|
if not key:
|
||||||
|
return
|
||||||
|
with _RESPONSES_PREV_ID_LOCK:
|
||||||
|
_RESPONSES_PREV_IDS[key] = (response_id.strip(), time.time())
|
||||||
|
_cleanup_previous_response_ids_locked()
|
||||||
|
|
||||||
|
|
||||||
|
def _responses_prev_id_key(payload: dict[str, Any]) -> str:
|
||||||
|
"""基于 Responses 请求的“对话根信息”生成稳定键。
|
||||||
|
|
||||||
|
这里故意不直接使用完整 `input` 作为键,因为多轮对话每轮都会追加历史;
|
||||||
|
如果把整段历史都纳入哈希,键会在每一轮变化,导致无法稳定取回上一轮的
|
||||||
|
`previous_response_id`。当前策略只取 instructions 与首轮 user/assistant 根消息。
|
||||||
|
"""
|
||||||
|
instructions = payload.get('instructions') or ''
|
||||||
|
input_data = payload.get('input', [])
|
||||||
|
if isinstance(input_data, str):
|
||||||
|
seed_input = input_data
|
||||||
|
elif isinstance(input_data, list):
|
||||||
|
seed_input = _responses_root_seed_from_items(input_data)
|
||||||
|
else:
|
||||||
|
seed_input = json.dumps(input_data, ensure_ascii=False, default=str)
|
||||||
|
raw = instructions + '|' + seed_input
|
||||||
|
if not raw.strip('|'):
|
||||||
|
return ''
|
||||||
|
return hashlib.sha256(raw.encode('utf-8')).hexdigest()[:24]
|
||||||
|
|
||||||
|
|
||||||
|
def _responses_root_seed_from_items(items: list[Any]) -> str:
|
||||||
|
"""从 Responses `input` 中提取足够稳定的对话根片段。
|
||||||
|
|
||||||
|
目标不是完整还原会话,而是构造一个在同一段对话内尽量恒定、跨轮次可复用的
|
||||||
|
seed。这里沿用项目里 conversation seed 的思路:优先取第一条 user 与第一条
|
||||||
|
assistant;如果 assistant 还不存在,则只用第一条 user。
|
||||||
|
"""
|
||||||
|
first_user = None
|
||||||
|
first_assistant = None
|
||||||
|
for item in items:
|
||||||
|
if isinstance(item, str):
|
||||||
|
if first_user is None:
|
||||||
|
first_user = {'role': 'user', 'content': item}
|
||||||
|
continue
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
item_type = item.get('type', '')
|
||||||
|
role = item.get('role', '')
|
||||||
|
if item_type == 'message' and role in ('user', 'assistant'):
|
||||||
|
normalized = {
|
||||||
|
'role': role,
|
||||||
|
'content': _responses_normalize_content(item.get('content', [])),
|
||||||
|
}
|
||||||
|
if role == 'user' and first_user is None:
|
||||||
|
first_user = normalized
|
||||||
|
elif role == 'assistant' and first_assistant is None:
|
||||||
|
first_assistant = normalized
|
||||||
|
elif role in ('user', 'assistant') and not item_type:
|
||||||
|
normalized = {
|
||||||
|
'role': role,
|
||||||
|
'content': _responses_normalize_content(item.get('content', '')),
|
||||||
|
}
|
||||||
|
if role == 'user' and first_user is None:
|
||||||
|
first_user = normalized
|
||||||
|
elif role == 'assistant' and first_assistant is None:
|
||||||
|
first_assistant = normalized
|
||||||
|
if first_user is not None and first_assistant is not None:
|
||||||
|
break
|
||||||
|
parts = []
|
||||||
|
if first_user is not None:
|
||||||
|
parts.append(first_user)
|
||||||
|
if first_assistant is not None:
|
||||||
|
parts.append(first_assistant)
|
||||||
|
return json.dumps(parts, ensure_ascii=False, separators=(',', ':'))
|
||||||
|
|
||||||
|
|
||||||
|
def _responses_normalize_content(content: Any) -> str:
|
||||||
|
"""把 Responses 各种 content 形态折叠成稳定文本。
|
||||||
|
|
||||||
|
这里的目标不是保真展示,而是降低结构差异对 key 计算的影响;只抽取会影响
|
||||||
|
会话根语义的文本型内容,忽略无关字段,避免同一轮请求因格式细节不同而得到
|
||||||
|
不同的 previous_response_id 键。
|
||||||
|
"""
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content.strip()
|
||||||
|
if not isinstance(content, list):
|
||||||
|
return str(content).strip() if content is not None else ''
|
||||||
|
texts: list[str] = []
|
||||||
|
for part in content:
|
||||||
|
if isinstance(part, str):
|
||||||
|
texts.append(part)
|
||||||
|
continue
|
||||||
|
if not isinstance(part, dict):
|
||||||
|
continue
|
||||||
|
if part.get('type') in ('input_text', 'output_text', 'text'):
|
||||||
|
texts.append(part.get('text', ''))
|
||||||
|
elif part.get('type') == 'summary_text':
|
||||||
|
texts.append(part.get('text', ''))
|
||||||
|
return '\n'.join(texts).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_previous_response_id(key: str) -> str:
|
||||||
|
"""按稳定键读取上一轮 response_id,并在过期时顺手清理。"""
|
||||||
|
with _RESPONSES_PREV_ID_LOCK:
|
||||||
|
entry = _RESPONSES_PREV_IDS.get(key)
|
||||||
|
if not entry:
|
||||||
|
return ''
|
||||||
|
response_id, ts = entry
|
||||||
|
if (time.time() - ts) >= _RESPONSES_PREV_ID_TTL:
|
||||||
|
_RESPONSES_PREV_IDS.pop(key, None)
|
||||||
|
return ''
|
||||||
|
return response_id
|
||||||
|
|
||||||
|
|
||||||
|
def _cleanup_previous_response_ids_locked() -> None:
|
||||||
|
"""清理过期的 previous_response_id 缓存项。
|
||||||
|
|
||||||
|
这张表只用于短期多轮续接;一旦对话长时间不活跃,就不再需要继续保留,
|
||||||
|
以免常驻进程运行过久后累计过多失效状态。
|
||||||
|
"""
|
||||||
|
now = time.time()
|
||||||
|
expired = [
|
||||||
|
key for key, (_, ts) in _RESPONSES_PREV_IDS.items()
|
||||||
|
if (now - ts) >= _RESPONSES_PREV_ID_TTL
|
||||||
|
]
|
||||||
|
for key in expired:
|
||||||
|
_RESPONSES_PREV_IDS.pop(key, None)
|
||||||
|
|
||||||
|
|
||||||
def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]:
|
def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, position: str = 'prepend') -> dict[str, Any]:
|
||||||
"""向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。
|
"""向 Anthropic Messages 请求注入自定义指令(写入 system 字段)。
|
||||||
|
|
||||||
|
|
@ -219,23 +397,6 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po
|
||||||
# ─── Body / Header 修改 ──────────────────────────
|
# ─── Body / Header 修改 ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def ensure_prompt_cache_key(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
"""确保 Responses 请求携带 prompt_cache_key 以启用上游提示缓存。
|
|
||||||
|
|
||||||
上游(如 sub2api)对原生 /v1/responses 请求不会自动生成 prompt_cache_key,
|
|
||||||
导致提示缓存无法命中。这里根据模型名 + instructions 生成稳定的 cache key,
|
|
||||||
使得相同模型和系统提示的对话可以共享缓存前缀。
|
|
||||||
"""
|
|
||||||
if payload.get('prompt_cache_key'):
|
|
||||||
return payload
|
|
||||||
|
|
||||||
model = payload.get('model', '')
|
|
||||||
instructions = payload.get('instructions', '')
|
|
||||||
seed = f'{model}|{instructions}'
|
|
||||||
payload['prompt_cache_key'] = hashlib.sha256(seed.encode()).hexdigest()[:32]
|
|
||||||
return payload
|
|
||||||
|
|
||||||
|
|
||||||
def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]:
|
def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""对转发请求体应用字段级修改。
|
"""对转发请求体应用字段级修改。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@ Anthropic Messages API 透传。当 Cursor 直接发送 Anthropic 格式请求
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from time import perf_counter
|
|
||||||
|
|
||||||
import requests as req_lib
|
import requests as req_lib
|
||||||
from flask import Blueprint, request, jsonify
|
from flask import Blueprint, request, jsonify
|
||||||
|
|
@ -16,7 +15,6 @@ import settings
|
||||||
from config import Config
|
from config import Config
|
||||||
from routes.common import apply_body_modifications, apply_header_modifications, inject_instructions_anthropic
|
from routes.common import apply_body_modifications, apply_header_modifications, inject_instructions_anthropic
|
||||||
from utils.http import build_anthropic_headers, forward_request, sse_response
|
from utils.http import build_anthropic_headers, forward_request, sse_response
|
||||||
from utils.request_history import request_history
|
|
||||||
from utils.request_logger import (
|
from utils.request_logger import (
|
||||||
append_client_event,
|
append_client_event,
|
||||||
append_upstream_event,
|
append_upstream_event,
|
||||||
|
|
@ -42,7 +40,6 @@ def messages_passthrough():
|
||||||
model = payload.get('model', 'unknown')
|
model = payload.get('model', 'unknown')
|
||||||
is_stream = payload.get('stream', False)
|
is_stream = payload.get('stream', False)
|
||||||
|
|
||||||
request_started_at = perf_counter()
|
|
||||||
logger.info(f'[透传] model={model} 流式={is_stream}')
|
logger.info(f'[透传] model={model} 流式={is_stream}')
|
||||||
|
|
||||||
mapping = settings.resolve_model(model)
|
mapping = settings.resolve_model(model)
|
||||||
|
|
@ -81,18 +78,7 @@ def messages_passthrough():
|
||||||
attach_upstream_response(turn, data)
|
attach_upstream_response(turn, data)
|
||||||
_inject_thinking(data)
|
_inject_thinking(data)
|
||||||
attach_client_response(turn, data)
|
attach_client_response(turn, data)
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn)
|
||||||
request_history.record(
|
|
||||||
route='messages',
|
|
||||||
client_model=model,
|
|
||||||
actual_model=model,
|
|
||||||
backend='anthropic',
|
|
||||||
upstream_url=url,
|
|
||||||
usage=data.get('usage'),
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=data.get('usage'), duration_ms=duration_ms)
|
|
||||||
return jsonify(data)
|
return jsonify(data)
|
||||||
|
|
||||||
def generate():
|
def generate():
|
||||||
|
|
@ -122,18 +108,7 @@ def messages_passthrough():
|
||||||
'type': 'messages.stream.summary',
|
'type': 'messages.stream.summary',
|
||||||
'event_count': len(client_events),
|
'event_count': len(client_events),
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn)
|
||||||
request_history.record(
|
|
||||||
route='messages',
|
|
||||||
client_model=model,
|
|
||||||
actual_model=model,
|
|
||||||
backend='anthropic',
|
|
||||||
upstream_url=url,
|
|
||||||
usage=None,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, duration_ms=duration_ms)
|
|
||||||
except req_lib.RequestException as e:
|
except req_lib.RequestException as e:
|
||||||
logger.error(f'请求上游失败: {e}')
|
logger.error(f'请求上游失败: {e}')
|
||||||
attach_error(turn, {'stage': 'request_exception', 'message': str(e)})
|
attach_error(turn, {'stage': 'request_exception', 'message': str(e)})
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,6 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from time import perf_counter
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
|
|
@ -28,7 +27,9 @@ from routes.common import (
|
||||||
build_openai_target,
|
build_openai_target,
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
ensure_prompt_cache_key,
|
ensure_responses_cache_control,
|
||||||
|
attach_previous_response_id,
|
||||||
|
remember_response_id,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -45,7 +46,6 @@ from utils.http import (
|
||||||
iter_responses_sse,
|
iter_responses_sse,
|
||||||
sse_response,
|
sse_response,
|
||||||
)
|
)
|
||||||
from utils.request_history import request_history
|
|
||||||
from utils.request_logger import (
|
from utils.request_logger import (
|
||||||
append_client_event,
|
append_client_event,
|
||||||
append_upstream_event,
|
append_upstream_event,
|
||||||
|
|
@ -80,7 +80,6 @@ def responses_endpoint():
|
||||||
client_model = payload.get('model', 'unknown')
|
client_model = payload.get('model', 'unknown')
|
||||||
is_stream = payload.get('stream', False)
|
is_stream = payload.get('stream', False)
|
||||||
|
|
||||||
request_started_at = perf_counter()
|
|
||||||
ctx = build_route_context(client_model, is_stream)
|
ctx = build_route_context(client_model, is_stream)
|
||||||
turn = start_turn(
|
turn = start_turn(
|
||||||
route='responses',
|
route='responses',
|
||||||
|
|
@ -97,12 +96,12 @@ def responses_endpoint():
|
||||||
cc_payload = _build_cc_payload(payload, ctx)
|
cc_payload = _build_cc_payload(payload, ctx)
|
||||||
|
|
||||||
if ctx.backend == 'openai':
|
if ctx.backend == 'openai':
|
||||||
return _handle_openai_backend(ctx, cc_payload, turn, request_started_at)
|
return _handle_openai_backend(ctx, cc_payload, turn)
|
||||||
if ctx.backend == 'responses':
|
if ctx.backend == 'responses':
|
||||||
return _handle_responses_backend(ctx, payload, turn, request_started_at)
|
return _handle_responses_backend(ctx, payload, turn)
|
||||||
if ctx.backend == 'gemini':
|
if ctx.backend == 'gemini':
|
||||||
return _handle_gemini_backend(ctx, cc_payload, turn, request_started_at)
|
return _handle_gemini_backend(ctx, cc_payload, turn)
|
||||||
return _handle_anthropic_backend(ctx, cc_payload, turn, request_started_at)
|
return _handle_anthropic_backend(ctx, cc_payload, turn)
|
||||||
|
|
||||||
|
|
||||||
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
|
def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, Any]:
|
||||||
|
|
@ -122,12 +121,7 @@ def _build_cc_payload(payload: dict[str, Any], ctx: RouteContext) -> dict[str, A
|
||||||
return cc_payload
|
return cc_payload
|
||||||
|
|
||||||
|
|
||||||
def _handle_openai_backend(
|
def _handle_openai_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any]):
|
||||||
ctx: RouteContext,
|
|
||||||
cc_payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any],
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
|
"""处理走 OpenAI 兼容后端的 Responses 请求。"""
|
||||||
cc_payload = normalize_request(cc_payload)
|
cc_payload = normalize_request(cc_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
|
|
@ -140,8 +134,8 @@ def _handle_openai_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_openai_stream(ctx, cc_payload, url, headers, turn, request_started_at)
|
return _handle_openai_stream(ctx, cc_payload, url, headers, turn)
|
||||||
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn, request_started_at)
|
return _handle_openai_non_stream(ctx, cc_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_openai_non_stream(
|
def _handle_openai_non_stream(
|
||||||
|
|
@ -150,7 +144,6 @@ def _handle_openai_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any],
|
turn: dict[str, Any],
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
|
"""处理 OpenAI 兼容后端的非流式 Responses 返回。"""
|
||||||
cc_payload['stream'] = False
|
cc_payload['stream'] = False
|
||||||
|
|
@ -172,9 +165,6 @@ def _handle_openai_non_stream(
|
||||||
client_model=ctx.client_model,
|
client_model=ctx.client_model,
|
||||||
turn=turn,
|
turn=turn,
|
||||||
debug_label='转换为 Responses 后',
|
debug_label='转换为 Responses 后',
|
||||||
ctx=ctx,
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -184,7 +174,6 @@ def _handle_openai_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
|
"""处理 OpenAI 兼容后端的流式 Responses 返回。"""
|
||||||
cc_payload['stream'] = True
|
cc_payload['stream'] = True
|
||||||
|
|
@ -225,18 +214,7 @@ def _handle_openai_stream(
|
||||||
'model': ctx.client_model,
|
'model': ctx.client_model,
|
||||||
'event_count': len(client_events),
|
'event_count': len(client_events),
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn)
|
||||||
request_history.record(
|
|
||||||
route='responses',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=None,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, duration_ms=duration_ms)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
append_upstream_event(turn, {'type': 'openai_chunk', 'data': chunk})
|
||||||
|
|
@ -263,12 +241,7 @@ def _handle_openai_stream(
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
||||||
def _handle_responses_backend(
|
def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走原生 Responses 后端的请求。
|
"""处理走原生 Responses 后端的请求。
|
||||||
|
|
||||||
当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议,
|
当中转站本身就只支持 `/v1/responses` 时,不需要再绕到聊天补全中间协议,
|
||||||
|
|
@ -277,14 +250,15 @@ def _handle_responses_backend(
|
||||||
payload = dict(payload)
|
payload = dict(payload)
|
||||||
payload['model'] = ctx.upstream_model
|
payload['model'] = ctx.upstream_model
|
||||||
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
payload = ensure_prompt_cache_key(payload)
|
payload = ensure_responses_cache_control(payload)
|
||||||
|
payload = attach_previous_response_id(payload)
|
||||||
url, headers = build_responses_target(ctx)
|
url, headers = build_responses_target(ctx)
|
||||||
payload = apply_body_modifications(payload, ctx.body_modifications)
|
payload = apply_body_modifications(payload, ctx.body_modifications)
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_responses_stream(ctx, payload, url, headers, turn, request_started_at)
|
return _handle_responses_stream(ctx, payload, url, headers, turn)
|
||||||
return _handle_responses_non_stream(ctx, payload, url, headers, turn, request_started_at)
|
return _handle_responses_non_stream(ctx, payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_responses_non_stream(
|
def _handle_responses_non_stream(
|
||||||
|
|
@ -293,7 +267,6 @@ def _handle_responses_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理原生 Responses 后端的非流式返回。"""
|
"""处理原生 Responses 后端的非流式返回。"""
|
||||||
payload['stream'] = False
|
payload['stream'] = False
|
||||||
|
|
@ -306,15 +279,13 @@ def _handle_responses_non_stream(
|
||||||
|
|
||||||
response_data = resp.json()
|
response_data = resp.json()
|
||||||
attach_upstream_response(turn, response_data)
|
attach_upstream_response(turn, response_data)
|
||||||
|
remember_response_id(payload, response_data)
|
||||||
response_data['model'] = ctx.client_model
|
response_data['model'] = ctx.client_model
|
||||||
return _finalize_responses_response(
|
return _finalize_responses_response(
|
||||||
response_data,
|
response_data,
|
||||||
client_model=ctx.client_model,
|
client_model=ctx.client_model,
|
||||||
turn=turn,
|
turn=turn,
|
||||||
debug_label='原生 Responses 返回后',
|
debug_label='原生 Responses 返回后',
|
||||||
ctx=ctx,
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -324,7 +295,6 @@ def _handle_responses_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理原生 Responses 后端的流式返回。"""
|
"""处理原生 Responses 后端的流式返回。"""
|
||||||
payload['stream'] = True
|
payload['stream'] = True
|
||||||
|
|
@ -349,6 +319,10 @@ def _handle_responses_stream(
|
||||||
extracted_usage = _extract_responses_usage(event_data)
|
extracted_usage = _extract_responses_usage(event_data)
|
||||||
if extracted_usage:
|
if extracted_usage:
|
||||||
last_usage = extracted_usage
|
last_usage = extracted_usage
|
||||||
|
if event_type == 'response.completed':
|
||||||
|
response_obj = event_data.get('response') if isinstance(event_data, dict) else None
|
||||||
|
if isinstance(response_obj, dict):
|
||||||
|
remember_response_id(payload, response_obj)
|
||||||
if event_count < 10:
|
if event_count < 10:
|
||||||
_dbg(
|
_dbg(
|
||||||
f'上游事件#{event_count} 类型={event_type} 数据='
|
f'上游事件#{event_count} 类型={event_type} 数据='
|
||||||
|
|
@ -379,18 +353,7 @@ def _handle_responses_stream(
|
||||||
'event_count': len(client_events),
|
'event_count': len(client_events),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='responses',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
@ -414,12 +377,7 @@ def _extract_responses_usage(event_data: dict[str, Any]) -> dict[str, Any] | Non
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _handle_gemini_backend(
|
def _handle_gemini_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
cc_payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 Gemini Contents 后端的 Responses 请求。"""
|
"""处理走 Gemini Contents 后端的 Responses 请求。"""
|
||||||
gemini_payload = cc_to_gemini_request(cc_payload)
|
gemini_payload = cc_to_gemini_request(cc_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
|
|
@ -432,8 +390,8 @@ def _handle_gemini_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn, request_started_at)
|
return _handle_gemini_stream(ctx, gemini_payload, url, headers, turn)
|
||||||
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn, request_started_at)
|
return _handle_gemini_non_stream(ctx, gemini_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_gemini_non_stream(
|
def _handle_gemini_non_stream(
|
||||||
|
|
@ -442,7 +400,6 @@ def _handle_gemini_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Gemini 后端的非流式 Responses 返回。"""
|
"""处理 Gemini 后端的非流式 Responses 返回。"""
|
||||||
attach_upstream_request(turn, payload, headers)
|
attach_upstream_request(turn, payload, headers)
|
||||||
|
|
@ -463,9 +420,6 @@ def _handle_gemini_non_stream(
|
||||||
client_model=ctx.client_model,
|
client_model=ctx.client_model,
|
||||||
turn=turn,
|
turn=turn,
|
||||||
debug_label='Gemini 转回 Responses 后',
|
debug_label='Gemini 转回 Responses 后',
|
||||||
ctx=ctx,
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -475,7 +429,6 @@ def _handle_gemini_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Gemini 后端的流式 Responses 返回。"""
|
"""处理 Gemini 后端的流式 Responses 返回。"""
|
||||||
converter = ResponsesStreamConverter(model=ctx.client_model)
|
converter = ResponsesStreamConverter(model=ctx.client_model)
|
||||||
|
|
@ -542,28 +495,12 @@ def _handle_gemini_stream(
|
||||||
'event_count': len(client_events),
|
'event_count': len(client_events),
|
||||||
'usage': last_usage,
|
'usage': last_usage,
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn, usage=last_usage)
|
||||||
request_history.record(
|
|
||||||
route='responses',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=last_usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, usage=last_usage, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
||||||
def _handle_anthropic_backend(
|
def _handle_anthropic_backend(ctx: RouteContext, cc_payload: dict[str, Any], turn: dict[str, Any] | None):
|
||||||
ctx: RouteContext,
|
|
||||||
cc_payload: dict[str, Any],
|
|
||||||
turn: dict[str, Any] | None,
|
|
||||||
request_started_at: float,
|
|
||||||
):
|
|
||||||
"""处理走 Anthropic 后端的 Responses 请求。"""
|
"""处理走 Anthropic 后端的 Responses 请求。"""
|
||||||
anthropic_payload = cc_to_messages_request(cc_payload)
|
anthropic_payload = cc_to_messages_request(cc_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
|
|
@ -576,8 +513,8 @@ def _handle_anthropic_backend(
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
||||||
if ctx.is_stream:
|
if ctx.is_stream:
|
||||||
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn, request_started_at)
|
return _handle_anthropic_stream(ctx, anthropic_payload, url, headers, turn)
|
||||||
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn, request_started_at)
|
return _handle_anthropic_non_stream(ctx, anthropic_payload, url, headers, turn)
|
||||||
|
|
||||||
|
|
||||||
def _handle_anthropic_non_stream(
|
def _handle_anthropic_non_stream(
|
||||||
|
|
@ -586,7 +523,6 @@ def _handle_anthropic_non_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Anthropic 后端的非流式 Responses 返回。"""
|
"""处理 Anthropic 后端的非流式 Responses 返回。"""
|
||||||
anthropic_payload['stream'] = False
|
anthropic_payload['stream'] = False
|
||||||
|
|
@ -608,9 +544,6 @@ def _handle_anthropic_non_stream(
|
||||||
client_model=ctx.client_model,
|
client_model=ctx.client_model,
|
||||||
turn=turn,
|
turn=turn,
|
||||||
debug_label='Messages 转回 Responses 后',
|
debug_label='Messages 转回 Responses 后',
|
||||||
ctx=ctx,
|
|
||||||
request_started_at=request_started_at,
|
|
||||||
upstream_url=url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -620,7 +553,6 @@ def _handle_anthropic_stream(
|
||||||
url: str,
|
url: str,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
turn: dict[str, Any] | None,
|
turn: dict[str, Any] | None,
|
||||||
request_started_at: float,
|
|
||||||
):
|
):
|
||||||
"""处理 Anthropic 后端的流式 Responses 返回。
|
"""处理 Anthropic 后端的流式 Responses 返回。
|
||||||
|
|
||||||
|
|
@ -676,18 +608,7 @@ def _handle_anthropic_stream(
|
||||||
'model': ctx.client_model,
|
'model': ctx.client_model,
|
||||||
'event_count': len(client_events),
|
'event_count': len(client_events),
|
||||||
})
|
})
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
finalize_turn(turn)
|
||||||
request_history.record(
|
|
||||||
route='responses',
|
|
||||||
client_model=ctx.client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=url,
|
|
||||||
usage=None,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
finalize_turn(turn, duration_ms=duration_ms)
|
|
||||||
|
|
||||||
return sse_response(generate())
|
return sse_response(generate())
|
||||||
|
|
||||||
|
|
@ -698,9 +619,6 @@ def _finalize_responses_response(
|
||||||
client_model: str,
|
client_model: str,
|
||||||
turn: dict[str, Any],
|
turn: dict[str, Any],
|
||||||
debug_label: str,
|
debug_label: str,
|
||||||
ctx: RouteContext,
|
|
||||||
request_started_at: float,
|
|
||||||
upstream_url: str,
|
|
||||||
):
|
):
|
||||||
"""统一收尾非流式 Responses 响应。
|
"""统一收尾非流式 Responses 响应。
|
||||||
|
|
||||||
|
|
@ -711,26 +629,32 @@ def _finalize_responses_response(
|
||||||
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
|
_dbg(debug_label + '=' + json.dumps(response_data, ensure_ascii=False, default=str)[:1000])
|
||||||
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
|
log_usage('响应生成', response_data.get('usage', {}), input_key='input_tokens', output_key='output_tokens')
|
||||||
|
|
||||||
usage = response_data.get('usage')
|
|
||||||
duration_ms = int((perf_counter() - request_started_at) * 1000)
|
|
||||||
usage_tracker.record(
|
usage_tracker.record(
|
||||||
client_model,
|
client_model,
|
||||||
usage,
|
response_data.get('usage'),
|
||||||
input_key='input_tokens',
|
input_key='input_tokens',
|
||||||
output_key='output_tokens',
|
output_key='output_tokens',
|
||||||
)
|
)
|
||||||
request_history.record(
|
|
||||||
route='responses',
|
|
||||||
client_model=client_model,
|
|
||||||
actual_model=ctx.upstream_model,
|
|
||||||
backend=ctx.backend,
|
|
||||||
upstream_url=upstream_url,
|
|
||||||
usage=usage,
|
|
||||||
duration_ms=duration_ms,
|
|
||||||
started_at=(turn or {}).get('started_at'),
|
|
||||||
)
|
|
||||||
|
|
||||||
attach_client_response(turn, response_data)
|
attach_client_response(turn, response_data)
|
||||||
finalize_turn(turn, usage=usage, duration_ms=duration_ms)
|
finalize_turn(turn, usage=response_data.get('usage'))
|
||||||
|
|
||||||
|
output_items = response_data.get('output', [])
|
||||||
|
if isinstance(output_items, list):
|
||||||
|
for item in output_items:
|
||||||
|
if not isinstance(item, dict) or item.get('type') != 'reasoning':
|
||||||
|
continue
|
||||||
|
summary = item.get('summary', [])
|
||||||
|
if not isinstance(summary, list):
|
||||||
|
continue
|
||||||
|
reasoning_text = ''.join(
|
||||||
|
part.get('text', '')
|
||||||
|
for part in summary
|
||||||
|
if isinstance(part, dict) and part.get('type') == 'summary_text'
|
||||||
|
)
|
||||||
|
if reasoning_text:
|
||||||
|
cc_messages = responses_to_cc(request.get_json(silent=True, force=True) or {}).get('messages', [])
|
||||||
|
thinking_cache.store_from_response(cc_messages, reasoning_text)
|
||||||
|
break
|
||||||
|
|
||||||
return jsonify(response_data)
|
return jsonify(response_data)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI','PingFang SC','Micr
|
||||||
input,select,button,textarea{font-family:inherit;font-size:inherit}
|
input,select,button,textarea{font-family:inherit;font-size:inherit}
|
||||||
a{color:var(--primary);text-decoration:none}
|
a{color:var(--primary);text-decoration:none}
|
||||||
code{background:var(--input);padding:1px 5px;border-radius:4px;font-size:12px;font-family:Consolas,Monaco,monospace}
|
code{background:var(--input);padding:1px 5px;border-radius:4px;font-size:12px;font-family:Consolas,Monaco,monospace}
|
||||||
.container{width:min(100%,1680px);margin:0 auto;padding:0 20px}
|
.container{max-width:960px;margin:0 auto;padding:0 20px}
|
||||||
|
|
||||||
#login{display:flex;align-items:center;justify-content:center;min-height:100vh;background:linear-gradient(145deg,#0b1120 0%,#121a2e 50%,#0b1120 100%)}
|
#login{display:flex;align-items:center;justify-content:center;min-height:100vh;background:linear-gradient(145deg,#0b1120 0%,#121a2e 50%,#0b1120 100%)}
|
||||||
.login-card{background:var(--card);border:1px solid var(--border);border-radius:16px;padding:40px;width:380px;box-shadow:0 20px 60px rgba(0,0,0,.4)}
|
.login-card{background:var(--card);border:1px solid var(--border);border-radius:16px;padding:40px;width:380px;box-shadow:0 20px 60px rgba(0,0,0,.4)}
|
||||||
|
|
@ -83,11 +83,3 @@ main{padding:28px 0 60px}
|
||||||
.toast-ok{background:#065f46;color:#a7f3d0}
|
.toast-ok{background:#065f46;color:#a7f3d0}
|
||||||
.toast-err{background:#7f1d1d;color:#fca5a5}
|
.toast-err{background:#7f1d1d;color:#fca5a5}
|
||||||
@keyframes slideIn{from{transform:translateX(100px);opacity:0}to{transform:none;opacity:1}}
|
@keyframes slideIn{from{transform:translateX(100px);opacity:0}to{transform:none;opacity:1}}
|
||||||
|
|
||||||
.request-logs-wrap{overflow:auto}
|
|
||||||
.request-logs-table{min-width:1100px}
|
|
||||||
.request-logs-table td{vertical-align:top}
|
|
||||||
.log-url{max-width:320px;word-break:break-all;color:var(--muted)}
|
|
||||||
.log-status{display:inline-flex;align-items:center;padding:2px 8px;border-radius:999px;font-size:12px;font-weight:600}
|
|
||||||
.status-ok{background:rgba(34,197,94,.15);color:var(--green)}
|
|
||||||
.status-error{background:rgba(239,68,68,.15);color:var(--red)}
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>API 2 Cursor - 管理面板</title>
|
<title>API 2 Cursor - 管理面板</title>
|
||||||
<link rel="stylesheet" href="/static/admin.css?v=20260505-2">
|
<link rel="stylesheet" href="/static/admin.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
|
|
@ -90,16 +90,6 @@
|
||||||
</div>
|
</div>
|
||||||
<div id="statsContent"><div class="empty">加载中…</div></div>
|
<div id="statsContent"><div class="empty">加载中…</div></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 请求日志 -->
|
|
||||||
<div class="card">
|
|
||||||
<div class="card-header">
|
|
||||||
<h2>最近 500 条请求日志</h2>
|
|
||||||
<button class="btn btn-ghost btn-sm" onclick="loadRequestLogs()">刷新</button>
|
|
||||||
</div>
|
|
||||||
<div class="hint" style="margin-top:-12px;margin-bottom:16px">显示请求时间、请求模型、实际上游模型、上游 URL、Token 统计、耗时和状态。</div>
|
|
||||||
<div id="requestLogsContent"><div class="empty">加载中…</div></div>
|
|
||||||
</div>
|
|
||||||
</main>
|
</main>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -190,6 +180,6 @@
|
||||||
|
|
||||||
<div class="toast-area" id="toasts"></div>
|
<div class="toast-area" id="toasts"></div>
|
||||||
|
|
||||||
<script src="/static/admin.js?v=20260505-2"></script>
|
<script src="/static/admin.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,6 @@ async function loadDashboard() {
|
||||||
await loadMappings();
|
await loadMappings();
|
||||||
checkHealth();
|
checkHealth();
|
||||||
loadStats();
|
loadStats();
|
||||||
loadRequestLogs();
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
toast('加载设置失败: ' + e.message, false);
|
toast('加载设置失败: ' + e.message, false);
|
||||||
}
|
}
|
||||||
|
|
@ -105,58 +104,6 @@ async function loadStats() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadRequestLogs() {
|
|
||||||
const el = document.getElementById('requestLogsContent');
|
|
||||||
try {
|
|
||||||
const data = await api('/api/admin/request-logs');
|
|
||||||
const items = data.items || [];
|
|
||||||
if (!items.length) {
|
|
||||||
el.innerHTML = '<div class="empty">暂无请求日志</div>';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let html = '<div class="request-logs-wrap"><table class="stats-table request-logs-table"><thead><tr><th>请求时间</th><th>请求模型</th><th>实际模型</th><th>上游 URL</th><th>Tokens</th><th>耗时</th><th>状态</th></tr></thead><tbody>';
|
|
||||||
for (const item of items) {
|
|
||||||
const usage = item.usage || {};
|
|
||||||
let tokens = '输 ' + fmtNum(usage.input_tokens) + ' / 出 ' + fmtNum(usage.output_tokens) + ' / 总 ' + fmtNum(usage.total_tokens);
|
|
||||||
if (Number(usage.cache_read_tokens || 0) > 0 || Number(usage.cache_write_tokens || 0) > 0) {
|
|
||||||
tokens += ' / 缓存读 ' + fmtNum(usage.cache_read_tokens) + ' / 缓存写 ' + fmtNum(usage.cache_write_tokens);
|
|
||||||
}
|
|
||||||
const statusClass = item.status === 'ok' ? 'status-ok' : 'status-error';
|
|
||||||
const statusText = item.status === 'ok' ? '成功' : '异常';
|
|
||||||
html += '<tr>'
|
|
||||||
+ '<td>' + esc(fmtTime(item.requested_at)) + '</td>'
|
|
||||||
+ '<td>' + esc(item.requested_model || '-') + '</td>'
|
|
||||||
+ '<td>' + esc(item.actual_model || '-') + '</td>'
|
|
||||||
+ '<td class="log-url" title="' + esc(item.upstream_url || '') + '">' + esc(item.upstream_url || '-') + '</td>'
|
|
||||||
+ '<td>' + esc(tokens) + '</td>'
|
|
||||||
+ '<td>' + fmtNum(item.duration_ms) + ' ms</td>'
|
|
||||||
+ '<td><span class="log-status ' + statusClass + '">' + statusText + '</span></td>'
|
|
||||||
+ '</tr>';
|
|
||||||
}
|
|
||||||
html += '</tbody></table></div>';
|
|
||||||
el.innerHTML = html;
|
|
||||||
} catch (e) {
|
|
||||||
el.innerHTML = '<div class="empty">加载请求日志失败</div>';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function fmtNum(value) {
|
|
||||||
return Number(value || 0).toLocaleString();
|
|
||||||
}
|
|
||||||
|
|
||||||
function fmtTime(value) {
|
|
||||||
if (!value) return '-';
|
|
||||||
const d = new Date(value);
|
|
||||||
if (Number.isNaN(d.getTime())) return String(value);
|
|
||||||
const pad = n => String(n).padStart(2, '0');
|
|
||||||
return d.getFullYear() + '-'
|
|
||||||
+ pad(d.getMonth() + 1) + '-'
|
|
||||||
+ pad(d.getDate()) + ' '
|
|
||||||
+ pad(d.getHours()) + ':'
|
|
||||||
+ pad(d.getMinutes()) + ':'
|
|
||||||
+ pad(d.getSeconds());
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkHealth() {
|
async function checkHealth() {
|
||||||
try {
|
try {
|
||||||
const r = await fetch(API + '/health');
|
const r = await fetch(API + '/health');
|
||||||
|
|
|
||||||
|
|
@ -1,125 +0,0 @@
|
||||||
"""请求历史记录。
|
|
||||||
|
|
||||||
为管理后台提供最近请求查询能力,默认仅保留最近 500 条,
|
|
||||||
重启后会从磁盘恢复最近一次快照。
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from collections import deque
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from settings import DATA_DIR
|
|
||||||
|
|
||||||
_MAX_RECORDS = 500
|
|
||||||
_FILE_PATH = os.path.join(DATA_DIR, 'request_logs.json')
|
|
||||||
|
|
||||||
|
|
||||||
def _now_iso() -> str:
|
|
||||||
return datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
|
|
||||||
|
|
||||||
|
|
||||||
def _safe_int(value: Any) -> int:
|
|
||||||
try:
|
|
||||||
return int(value or 0)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_usage(usage: dict[str, Any] | None) -> dict[str, int]:
|
|
||||||
usage = usage or {}
|
|
||||||
input_tokens = _safe_int(
|
|
||||||
usage.get('prompt_tokens', usage.get('input_tokens', 0))
|
|
||||||
)
|
|
||||||
output_tokens = _safe_int(
|
|
||||||
usage.get('completion_tokens', usage.get('output_tokens', 0))
|
|
||||||
)
|
|
||||||
total_tokens = _safe_int(usage.get('total_tokens', input_tokens + output_tokens))
|
|
||||||
|
|
||||||
prompt_details = usage.get('prompt_tokens_details')
|
|
||||||
input_details = usage.get('input_tokens_details')
|
|
||||||
|
|
||||||
cache_read_tokens = _safe_int(usage.get('cache_read_input_tokens', 0))
|
|
||||||
cache_write_tokens = _safe_int(usage.get('cache_creation_input_tokens', 0))
|
|
||||||
|
|
||||||
if isinstance(prompt_details, dict):
|
|
||||||
cache_read_tokens = max(cache_read_tokens, _safe_int(prompt_details.get('cached_tokens', 0)))
|
|
||||||
if isinstance(input_details, dict):
|
|
||||||
cache_read_tokens = max(cache_read_tokens, _safe_int(input_details.get('cached_tokens', 0)))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'input_tokens': input_tokens,
|
|
||||||
'output_tokens': output_tokens,
|
|
||||||
'total_tokens': total_tokens,
|
|
||||||
'cache_read_tokens': cache_read_tokens,
|
|
||||||
'cache_write_tokens': cache_write_tokens,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RequestHistory:
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._records: deque[dict[str, Any]] = deque(maxlen=_MAX_RECORDS)
|
|
||||||
self._load()
|
|
||||||
|
|
||||||
def record(
|
|
||||||
self,
|
|
||||||
*,
|
|
||||||
route: str,
|
|
||||||
client_model: str,
|
|
||||||
actual_model: str,
|
|
||||||
backend: str,
|
|
||||||
upstream_url: str,
|
|
||||||
usage: dict[str, Any] | None,
|
|
||||||
duration_ms: int,
|
|
||||||
started_at: str | None = None,
|
|
||||||
status: str = 'ok',
|
|
||||||
error_message: str = '',
|
|
||||||
) -> None:
|
|
||||||
record = {
|
|
||||||
'requested_at': started_at or _now_iso(),
|
|
||||||
'route': route,
|
|
||||||
'requested_model': client_model or '',
|
|
||||||
'actual_model': actual_model or '',
|
|
||||||
'backend': backend or '',
|
|
||||||
'upstream_url': upstream_url or '',
|
|
||||||
'duration_ms': max(_safe_int(duration_ms), 0),
|
|
||||||
'status': status or 'ok',
|
|
||||||
'error_message': error_message or '',
|
|
||||||
'usage': _normalize_usage(usage),
|
|
||||||
'recorded_at': _now_iso(),
|
|
||||||
}
|
|
||||||
with self._lock:
|
|
||||||
self._records.appendleft(record)
|
|
||||||
self._persist_locked()
|
|
||||||
|
|
||||||
def get_recent(self, limit: int = _MAX_RECORDS) -> list[dict[str, Any]]:
|
|
||||||
size = max(1, min(_safe_int(limit), _MAX_RECORDS))
|
|
||||||
with self._lock:
|
|
||||||
return list(self._records)[:size]
|
|
||||||
|
|
||||||
def _load(self) -> None:
|
|
||||||
if not os.path.exists(_FILE_PATH):
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
with open(_FILE_PATH, 'r', encoding='utf-8') as f:
|
|
||||||
data = json.load(f)
|
|
||||||
if not isinstance(data, list):
|
|
||||||
return
|
|
||||||
for item in data[:_MAX_RECORDS]:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
self._records.append(item)
|
|
||||||
except (OSError, json.JSONDecodeError):
|
|
||||||
self._records.clear()
|
|
||||||
|
|
||||||
def _persist_locked(self) -> None:
|
|
||||||
os.makedirs(DATA_DIR, exist_ok=True)
|
|
||||||
with open(_FILE_PATH, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(list(self._records), f, ensure_ascii=False, indent=2)
|
|
||||||
|
|
||||||
|
|
||||||
request_history = RequestHistory()
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue