支持gimini格式,优化debug日志
This commit is contained in:
parent
e726f11bad
commit
4de6db13f9
16 changed files with 1783 additions and 55 deletions
|
|
@ -44,6 +44,16 @@ def build_anthropic_headers(api_key: str) -> dict[str, str]:
|
|||
return headers
|
||||
|
||||
|
||||
def build_gemini_headers(api_key: str) -> dict[str, str]:
|
||||
"""构建 Gemini 请求头,根据密钥前缀选择鉴权方式"""
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if api_key.startswith('AIza'):
|
||||
headers['x-goog-api-key'] = api_key
|
||||
else:
|
||||
headers['Authorization'] = f'Bearer {api_key}'
|
||||
return headers
|
||||
|
||||
|
||||
# ─── 响应构建 ──────────────────────────────────────
|
||||
|
||||
|
||||
|
|
@ -125,6 +135,26 @@ def iter_responses_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
|
|||
yield from _iter_event_sse(response)
|
||||
|
||||
|
||||
def iter_gemini_sse(response) -> Iterator[dict[str, Any]]:
|
||||
"""解析 Gemini SSE 流,yield 完整的 GenerateContentResponse 字典。
|
||||
|
||||
Gemini 流式使用 ?alt=sse,每个 data: 行是一个完整的 JSON 响应。
|
||||
"""
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode('utf-8', errors='replace')
|
||||
if not decoded.startswith('data:'):
|
||||
continue
|
||||
data_str = decoded[5:].strip()
|
||||
if not data_str:
|
||||
continue
|
||||
try:
|
||||
yield json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
|
||||
def _iter_event_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||
"""解析带 event/data 的通用 SSE 流。
|
||||
|
||||
|
|
|
|||
331
utils/request_logger.py
Normal file
331
utils/request_logger.py
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
"""对话级文件日志
|
||||
|
||||
将同一段多轮对话聚合到一个 JSON 文件中,而不是按单次请求散落成多个文件。
|
||||
仅在 DEBUG 开启时记录。
|
||||
日志目录: data/conversations/YYYY-MM-DD/{conversation_id}.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from config import Config
|
||||
from settings import DATA_DIR
|
||||
from utils.http import gen_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_LOG_DIR = os.path.join(DATA_DIR, 'conversations')
|
||||
_LOCKS: dict[str, threading.Lock] = {}
|
||||
_LOCKS_GUARD = threading.Lock()
|
||||
|
||||
|
||||
def start_turn(
|
||||
*,
|
||||
route: str,
|
||||
client_model: str,
|
||||
backend: str,
|
||||
stream: bool,
|
||||
client_request: dict[str, Any],
|
||||
request_headers: dict[str, Any] | None = None,
|
||||
target_url: str = '',
|
||||
upstream_model: str = '',
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any] | None:
|
||||
"""创建一条新的对话 turn 上下文。"""
|
||||
if not Config.DEBUG:
|
||||
return None
|
||||
|
||||
now = datetime.utcnow().isoformat() + 'Z'
|
||||
conversation_id = get_conversation_id(route=route, payload=client_request)
|
||||
turn_id = gen_id('turn_')
|
||||
return {
|
||||
'conversation_id': conversation_id,
|
||||
'turn_id': turn_id,
|
||||
'route': route,
|
||||
'client_model': client_model,
|
||||
'backend': backend,
|
||||
'stream': stream,
|
||||
'target_url': target_url,
|
||||
'upstream_model': upstream_model,
|
||||
'started_at': now,
|
||||
'updated_at': now,
|
||||
'request_headers': sanitize_headers(request_headers or {}),
|
||||
'client_request': deep_copy_jsonable(client_request),
|
||||
'metadata': deep_copy_jsonable(metadata or {}),
|
||||
'upstream_request': None,
|
||||
'upstream_response': None,
|
||||
'client_response': None,
|
||||
'stream_trace': {
|
||||
'upstream_events': [],
|
||||
'client_events': [],
|
||||
'summary': {},
|
||||
},
|
||||
'error': None,
|
||||
}
|
||||
|
||||
|
||||
def get_conversation_id(*, route: str, payload: dict[str, Any]) -> str:
|
||||
"""尽量为同一段多轮对话生成稳定的会话 ID。"""
|
||||
explicit = _pick_explicit_conversation_id(payload)
|
||||
if explicit:
|
||||
return _safe_id(explicit)
|
||||
|
||||
seed = _conversation_seed(route, payload)
|
||||
digest = hashlib.sha256(seed.encode('utf-8')).hexdigest()[:24]
|
||||
return f'conv_{digest}'
|
||||
|
||||
|
||||
def attach_upstream_request(turn: dict[str, Any] | None, payload: dict[str, Any], headers: dict[str, Any] | None = None) -> None:
|
||||
"""记录最终发往上游的请求。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['upstream_request'] = {
|
||||
'headers': sanitize_headers(headers or {}),
|
||||
'body': deep_copy_jsonable(payload),
|
||||
}
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def attach_upstream_response(turn: dict[str, Any] | None, response_data: Any) -> None:
|
||||
"""记录上游完整非流式响应。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['upstream_response'] = deep_copy_jsonable(response_data)
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def attach_client_response(turn: dict[str, Any] | None, response_data: Any) -> None:
|
||||
"""记录最终返回给客户端的完整响应。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['client_response'] = deep_copy_jsonable(response_data)
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def append_upstream_event(turn: dict[str, Any] | None, event: Any) -> None:
|
||||
"""记录一条上游流式事件。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['stream_trace']['upstream_events'].append(deep_copy_jsonable(event))
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def append_client_event(turn: dict[str, Any] | None, event: Any) -> None:
|
||||
"""记录一条返回给客户端的流式事件。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['stream_trace']['client_events'].append(deep_copy_jsonable(event))
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def set_stream_summary(turn: dict[str, Any] | None, summary: dict[str, Any]) -> None:
|
||||
"""记录流式摘要,例如累计文本、事件数、usage 等。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['stream_trace']['summary'] = deep_copy_jsonable(summary)
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def attach_error(turn: dict[str, Any] | None, error: Any) -> None:
|
||||
"""记录错误信息。"""
|
||||
if turn is None:
|
||||
return
|
||||
turn['error'] = deep_copy_jsonable(error)
|
||||
_touch(turn)
|
||||
|
||||
|
||||
def finalize_turn(
|
||||
turn: dict[str, Any] | None,
|
||||
*,
|
||||
usage: dict[str, Any] | None = None,
|
||||
duration_ms: int = 0,
|
||||
) -> None:
|
||||
"""将 turn 追加/更新到对应的会话日志文件。"""
|
||||
if turn is None or not Config.DEBUG:
|
||||
return
|
||||
|
||||
turn['updated_at'] = datetime.utcnow().isoformat() + 'Z'
|
||||
turn['duration_ms'] = duration_ms
|
||||
if usage is not None:
|
||||
turn['usage'] = deep_copy_jsonable(usage)
|
||||
|
||||
threading.Thread(target=_write_turn, args=(deep_copy_jsonable(turn),), daemon=True).start()
|
||||
|
||||
|
||||
def sanitize_headers(headers: dict[str, Any]) -> dict[str, Any]:
|
||||
"""对敏感请求头做脱敏。"""
|
||||
sanitized: dict[str, Any] = {}
|
||||
for key, value in headers.items():
|
||||
key_lower = str(key).lower()
|
||||
if key_lower in {'authorization', 'x-api-key', 'api-key', 'x-goog-api-key'}:
|
||||
sanitized[key] = _mask_secret(value)
|
||||
else:
|
||||
sanitized[key] = value
|
||||
return sanitized
|
||||
|
||||
|
||||
def deep_copy_jsonable(value: Any) -> Any:
|
||||
"""尽量深拷贝 JSON 兼容数据。"""
|
||||
try:
|
||||
return copy.deepcopy(value)
|
||||
except Exception:
|
||||
try:
|
||||
return json.loads(json.dumps(value, ensure_ascii=False, default=str))
|
||||
except Exception:
|
||||
return str(value)
|
||||
|
||||
|
||||
def _write_turn(turn: dict[str, Any]) -> None:
|
||||
conversation_id = turn['conversation_id']
|
||||
lock = _get_lock(conversation_id)
|
||||
with lock:
|
||||
try:
|
||||
date_str = turn['started_at'][:10]
|
||||
day_dir = os.path.join(_LOG_DIR, date_str)
|
||||
os.makedirs(day_dir, exist_ok=True)
|
||||
filepath = os.path.join(day_dir, f'{conversation_id}.json')
|
||||
|
||||
if os.path.exists(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
doc = json.load(f)
|
||||
else:
|
||||
doc = {
|
||||
'conversation_id': conversation_id,
|
||||
'route': turn.get('route', ''),
|
||||
'created_at': turn['started_at'],
|
||||
'updated_at': turn['updated_at'],
|
||||
'turns': [],
|
||||
}
|
||||
|
||||
turns = doc.setdefault('turns', [])
|
||||
replaced = False
|
||||
for index, existing in enumerate(turns):
|
||||
if existing.get('turn_id') == turn.get('turn_id'):
|
||||
turns[index] = turn
|
||||
replaced = True
|
||||
break
|
||||
if not replaced:
|
||||
turns.append(turn)
|
||||
|
||||
doc['updated_at'] = turn['updated_at']
|
||||
doc['last_client_model'] = turn.get('client_model', '')
|
||||
doc['last_backend'] = turn.get('backend', '')
|
||||
doc['turn_count'] = len(turns)
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(doc, f, ensure_ascii=False, indent=2, default=str)
|
||||
except OSError as e:
|
||||
logger.warning('写入对话日志失败: %s', e)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning('解析对话日志失败: %s', e)
|
||||
|
||||
|
||||
def _get_lock(conversation_id: str) -> threading.Lock:
|
||||
with _LOCKS_GUARD:
|
||||
if conversation_id not in _LOCKS:
|
||||
_LOCKS[conversation_id] = threading.Lock()
|
||||
return _LOCKS[conversation_id]
|
||||
|
||||
|
||||
def _touch(turn: dict[str, Any] | None) -> None:
|
||||
if turn is None:
|
||||
return
|
||||
turn['updated_at'] = datetime.utcnow().isoformat() + 'Z'
|
||||
|
||||
|
||||
def _pick_explicit_conversation_id(payload: dict[str, Any]) -> str:
|
||||
candidates = (
|
||||
payload.get('conversation_id'),
|
||||
payload.get('conversationId'),
|
||||
payload.get('session_id'),
|
||||
payload.get('sessionId'),
|
||||
payload.get('chat_id'),
|
||||
payload.get('chatId'),
|
||||
payload.get('metadata', {}).get('conversation_id') if isinstance(payload.get('metadata'), dict) else None,
|
||||
payload.get('metadata', {}).get('session_id') if isinstance(payload.get('metadata'), dict) else None,
|
||||
)
|
||||
for item in candidates:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ''
|
||||
|
||||
|
||||
def _conversation_seed(route: str, payload: dict[str, Any]) -> str:
|
||||
if route == 'chat':
|
||||
messages = payload.get('messages', [])
|
||||
return 'chat|' + _normalize_messages_seed(messages)
|
||||
|
||||
if route == 'responses':
|
||||
instructions = payload.get('instructions') or ''
|
||||
input_data = payload.get('input', [])
|
||||
if isinstance(input_data, str):
|
||||
seed_input = input_data
|
||||
else:
|
||||
seed_input = json.dumps(input_data, ensure_ascii=False, default=str)
|
||||
return 'responses|' + instructions + '|' + seed_input
|
||||
|
||||
if route == 'messages':
|
||||
messages = payload.get('messages', [])
|
||||
system = payload.get('system', '')
|
||||
return 'messages|' + str(system) + '|' + json.dumps(messages, ensure_ascii=False, default=str)
|
||||
|
||||
return route + '|' + json.dumps(payload, ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def _normalize_messages_seed(messages: Any) -> str:
|
||||
if not isinstance(messages, list):
|
||||
return ''
|
||||
normalized: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
normalized.append({
|
||||
'role': msg.get('role', ''),
|
||||
'content': _normalize_content(msg.get('content')),
|
||||
'tool_call_id': msg.get('tool_call_id', ''),
|
||||
'tool_calls': [
|
||||
{
|
||||
'id': tc.get('id', ''),
|
||||
'name': (tc.get('function') or {}).get('name', ''),
|
||||
}
|
||||
for tc in msg.get('tool_calls', [])
|
||||
if isinstance(tc, dict)
|
||||
],
|
||||
})
|
||||
return json.dumps(normalized, ensure_ascii=False, separators=(',', ':'))
|
||||
|
||||
|
||||
def _normalize_content(content: Any) -> Any:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
result = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
result.append(item)
|
||||
else:
|
||||
result.append(str(item))
|
||||
return result
|
||||
if content is None:
|
||||
return ''
|
||||
return str(content)
|
||||
|
||||
|
||||
def _safe_id(raw: str) -> str:
|
||||
cleaned = ''.join(ch if ch.isalnum() or ch in ('-', '_', '.') else '_' for ch in raw.strip())
|
||||
return cleaned[:120] or gen_id('conv_')
|
||||
|
||||
|
||||
def _mask_secret(value: Any) -> str:
|
||||
text = str(value or '')
|
||||
if len(text) <= 8:
|
||||
return '***'
|
||||
return text[:4] + '***' + text[-4:]
|
||||
150
utils/thinking_cache.py
Normal file
150
utils/thinking_cache.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""轻量 Thinking 缓存
|
||||
|
||||
纯内存缓存,在多轮对话中保存和恢复 thinking/reasoning 内容。
|
||||
解决 Cursor 不会把 thinking 内容回传给 API 的问题,
|
||||
某些模型(如推理模型)在缺少历史 thinking 时表现会下降。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_THINK_RE = re.compile(r'<think>.*?</think>', re.DOTALL)
|
||||
_UNCLOSED_THINK_RE = re.compile(r'<think>.*$', re.DOTALL)
|
||||
_TOOL_ID_RE = re.compile(r'[^a-zA-Z0-9_-]')
|
||||
_TTL = 86400 # 24 hours
|
||||
|
||||
|
||||
class ThinkingCache:
|
||||
"""纯内存 thinking 缓存,TTL 2 小时。"""
|
||||
|
||||
def __init__(self):
|
||||
self._store: dict[str, tuple[str, float]] = {}
|
||||
|
||||
def inject(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""遍历 assistant 消息,缺少 reasoning_content 时从缓存注入。"""
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return messages
|
||||
|
||||
now = time.time()
|
||||
for msg in messages:
|
||||
if msg.get('role') != 'assistant':
|
||||
continue
|
||||
if msg.get('reasoning_content'):
|
||||
continue
|
||||
key = sid + ':' + self._message_hash(msg)
|
||||
entry = self._store.get(key)
|
||||
if entry and (now - entry[1]) < _TTL:
|
||||
msg['reasoning_content'] = entry[0]
|
||||
logger.debug('已从缓存注入 thinking (%d 字符)', len(entry[0]))
|
||||
|
||||
return messages
|
||||
|
||||
def store_from_response(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
reasoning_content: str,
|
||||
) -> None:
|
||||
"""将响应中的 thinking 内容存入缓存。"""
|
||||
if not reasoning_content:
|
||||
return
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return
|
||||
|
||||
fake_msg: dict[str, Any] = {'role': 'assistant', 'content': '', 'tool_calls': []}
|
||||
key = sid + ':' + self._message_hash(fake_msg)
|
||||
self._store[key] = (reasoning_content, time.time())
|
||||
self._cleanup()
|
||||
|
||||
def store_assistant_thinking(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
assistant_msg: dict[str, Any],
|
||||
) -> None:
|
||||
"""从完整的 assistant 消息中提取并缓存 thinking。"""
|
||||
rc = assistant_msg.get('reasoning_content', '')
|
||||
if not rc:
|
||||
return
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return
|
||||
key = sid + ':' + self._message_hash(assistant_msg)
|
||||
self._store[key] = (rc, time.time())
|
||||
self._cleanup()
|
||||
|
||||
def _session_id(self, messages: list[dict[str, Any]]) -> str:
|
||||
first_user = ''
|
||||
first_assistant = ''
|
||||
for msg in messages:
|
||||
role = msg.get('role', '')
|
||||
if role in ('system', 'developer'):
|
||||
continue
|
||||
if role == 'user' and not first_user:
|
||||
first_user = self._normalize_content(
|
||||
msg.get('content', '')
|
||||
)
|
||||
elif role == 'assistant' and not first_assistant:
|
||||
first_assistant = self._normalize_content(
|
||||
msg.get('content', '')
|
||||
)
|
||||
if first_user and first_assistant:
|
||||
break
|
||||
|
||||
if not first_user or not first_assistant:
|
||||
return ''
|
||||
|
||||
raw = first_user + '|' + first_assistant
|
||||
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||
|
||||
def _message_hash(self, msg: dict[str, Any]) -> str:
|
||||
content = self._normalize_content(msg.get('content', ''))
|
||||
tool_ids = sorted(
|
||||
self._normalize_tool_id(tc.get('id', ''))
|
||||
for tc in msg.get('tool_calls', [])
|
||||
if isinstance(tc, dict)
|
||||
)
|
||||
raw = json.dumps({'c': content, 't': tool_ids}, ensure_ascii=False)
|
||||
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||
|
||||
@staticmethod
|
||||
def _normalize_content(content: Any) -> str:
|
||||
if isinstance(content, list):
|
||||
parts = []
|
||||
for p in content:
|
||||
if isinstance(p, dict) and p.get('type') == 'text':
|
||||
parts.append(p.get('text', ''))
|
||||
elif isinstance(p, str):
|
||||
parts.append(p)
|
||||
text = '\n'.join(parts)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = str(content) if content else ''
|
||||
text = _THINK_RE.sub('', text)
|
||||
text = _UNCLOSED_THINK_RE.sub('', text)
|
||||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_tool_id(tid: str) -> str:
|
||||
return _TOOL_ID_RE.sub('', tid)
|
||||
|
||||
def _cleanup(self) -> None:
|
||||
"""惰性清理过期条目(每 100 次写入触发一次全量扫描)。"""
|
||||
if len(self._store) < 100:
|
||||
return
|
||||
now = time.time()
|
||||
expired = [k for k, (_, ts) in self._store.items() if (now - ts) >= _TTL]
|
||||
for k in expired:
|
||||
del self._store[k]
|
||||
|
||||
|
||||
thinking_cache = ThinkingCache()
|
||||
72
utils/usage_tracker.py
Normal file
72
utils/usage_tracker.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""用量统计 — 内存聚合
|
||||
|
||||
按模型名聚合请求数、token 用量等统计数据。
|
||||
重启后重置,适合轻量监控场景。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
|
||||
class _ModelStats:
|
||||
__slots__ = ('request_count', 'input_tokens', 'output_tokens', 'first_seen', 'last_seen')
|
||||
|
||||
def __init__(self):
|
||||
self.request_count = 0
|
||||
self.input_tokens = 0
|
||||
self.output_tokens = 0
|
||||
self.first_seen = time.time()
|
||||
self.last_seen = time.time()
|
||||
|
||||
|
||||
class UsageTracker:
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self._stats: dict[str, _ModelStats] = {}
|
||||
self._start_time = time.time()
|
||||
|
||||
def record(
|
||||
self,
|
||||
model: str,
|
||||
usage: dict[str, Any] | None = None,
|
||||
*,
|
||||
input_key: str = 'prompt_tokens',
|
||||
output_key: str = 'completion_tokens',
|
||||
) -> None:
|
||||
"""记录一次请求的用量。"""
|
||||
with self._lock:
|
||||
if model not in self._stats:
|
||||
self._stats[model] = _ModelStats()
|
||||
s = self._stats[model]
|
||||
s.request_count += 1
|
||||
s.last_seen = time.time()
|
||||
if usage:
|
||||
s.input_tokens += usage.get(input_key, 0) or 0
|
||||
s.output_tokens += usage.get(output_key, 0) or 0
|
||||
|
||||
def get_stats(self) -> dict[str, Any]:
|
||||
"""返回所有模型的聚合统计。"""
|
||||
with self._lock:
|
||||
result = {}
|
||||
for model, s in self._stats.items():
|
||||
result[model] = {
|
||||
'request_count': s.request_count,
|
||||
'input_tokens': s.input_tokens,
|
||||
'output_tokens': s.output_tokens,
|
||||
'total_tokens': s.input_tokens + s.output_tokens,
|
||||
}
|
||||
return {
|
||||
'uptime_seconds': int(time.time() - self._start_time),
|
||||
'models': result,
|
||||
}
|
||||
|
||||
def reset(self) -> None:
|
||||
with self._lock:
|
||||
self._stats.clear()
|
||||
self._start_time = time.time()
|
||||
|
||||
|
||||
usage_tracker = UsageTracker()
|
||||
Loading…
Add table
Add a link
Reference in a new issue