支持gimini格式，优化debug日志

2026-03-14 09:27:15 +08:00 · 2026-03-14 09:27:15 +08:00 · 4de6db13f9
commit 4de6db13f9
parent e726f11bad
16 changed files with 1783 additions and 55 deletions
--- a/utils/http.py
+++ b/utils/http.py
@ -44,6 +44,16 @@ def build_anthropic_headers(api_key: str) -> dict[str, str]:
    return headers


+def build_gemini_headers(api_key: str) -> dict[str, str]:
+    """构建 Gemini 请求头，根据密钥前缀选择鉴权方式"""
+    headers = {'Content-Type': 'application/json'}
+    if api_key.startswith('AIza'):
+        headers['x-goog-api-key'] = api_key
+    else:
+        headers['Authorization'] = f'Bearer {api_key}'
+    return headers
+
+
 # ─── 响应构建 ──────────────────────────────────────


@ -125,6 +135,26 @@ def iter_responses_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
    yield from _iter_event_sse(response)


+def iter_gemini_sse(response) -> Iterator[dict[str, Any]]:
+    """解析 Gemini SSE 流，yield 完整的 GenerateContentResponse 字典。
+
+    Gemini 流式使用 ?alt=sse，每个 data: 行是一个完整的 JSON 响应。
+    """
+    for line in response.iter_lines():
+        if not line:
+            continue
+        decoded = line.decode('utf-8', errors='replace')
+        if not decoded.startswith('data:'):
+            continue
+        data_str = decoded[5:].strip()
+        if not data_str:
+            continue
+        try:
+            yield json.loads(data_str)
+        except json.JSONDecodeError:
+            continue
+
+
 def _iter_event_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
    """解析带 event/data 的通用 SSE 流。

--- a/utils/request_logger.py
+++ b/utils/request_logger.py
@ -0,0 +1,331 @@
+"""对话级文件日志
+
+将同一段多轮对话聚合到一个 JSON 文件中，而不是按单次请求散落成多个文件。
+仅在 DEBUG 开启时记录。
+日志目录: data/conversations/YYYY-MM-DD/{conversation_id}.json
+"""
+
+from __future__ import annotations
+
+import copy
+import hashlib
+import json
+import logging
+import os
+import threading
+from datetime import datetime
+from typing import Any
+
+from config import Config
+from settings import DATA_DIR
+from utils.http import gen_id
+
+logger = logging.getLogger(__name__)
+
+_LOG_DIR = os.path.join(DATA_DIR, 'conversations')
+_LOCKS: dict[str, threading.Lock] = {}
+_LOCKS_GUARD = threading.Lock()
+
+
+def start_turn(
+    *,
+    route: str,
+    client_model: str,
+    backend: str,
+    stream: bool,
+    client_request: dict[str, Any],
+    request_headers: dict[str, Any] | None = None,
+    target_url: str = '',
+    upstream_model: str = '',
+    metadata: dict[str, Any] | None = None,
+) -> dict[str, Any] | None:
+    """创建一条新的对话 turn 上下文。"""
+    if not Config.DEBUG:
+        return None
+
+    now = datetime.utcnow().isoformat() + 'Z'
+    conversation_id = get_conversation_id(route=route, payload=client_request)
+    turn_id = gen_id('turn_')
+    return {
+        'conversation_id': conversation_id,
+        'turn_id': turn_id,
+        'route': route,
+        'client_model': client_model,
+        'backend': backend,
+        'stream': stream,
+        'target_url': target_url,
+        'upstream_model': upstream_model,
+        'started_at': now,
+        'updated_at': now,
+        'request_headers': sanitize_headers(request_headers or {}),
+        'client_request': deep_copy_jsonable(client_request),
+        'metadata': deep_copy_jsonable(metadata or {}),
+        'upstream_request': None,
+        'upstream_response': None,
+        'client_response': None,
+        'stream_trace': {
+            'upstream_events': [],
+            'client_events': [],
+            'summary': {},
+        },
+        'error': None,
+    }
+
+
+def get_conversation_id(*, route: str, payload: dict[str, Any]) -> str:
+    """尽量为同一段多轮对话生成稳定的会话 ID。"""
+    explicit = _pick_explicit_conversation_id(payload)
+    if explicit:
+        return _safe_id(explicit)
+
+    seed = _conversation_seed(route, payload)
+    digest = hashlib.sha256(seed.encode('utf-8')).hexdigest()[:24]
+    return f'conv_{digest}'
+
+
+def attach_upstream_request(turn: dict[str, Any] | None, payload: dict[str, Any], headers: dict[str, Any] | None = None) -> None:
+    """记录最终发往上游的请求。"""
+    if turn is None:
+        return
+    turn['upstream_request'] = {
+        'headers': sanitize_headers(headers or {}),
+        'body': deep_copy_jsonable(payload),
+    }
+    _touch(turn)
+
+
+def attach_upstream_response(turn: dict[str, Any] | None, response_data: Any) -> None:
+    """记录上游完整非流式响应。"""
+    if turn is None:
+        return
+    turn['upstream_response'] = deep_copy_jsonable(response_data)
+    _touch(turn)
+
+
+def attach_client_response(turn: dict[str, Any] | None, response_data: Any) -> None:
+    """记录最终返回给客户端的完整响应。"""
+    if turn is None:
+        return
+    turn['client_response'] = deep_copy_jsonable(response_data)
+    _touch(turn)
+
+
+def append_upstream_event(turn: dict[str, Any] | None, event: Any) -> None:
+    """记录一条上游流式事件。"""
+    if turn is None:
+        return
+    turn['stream_trace']['upstream_events'].append(deep_copy_jsonable(event))
+    _touch(turn)
+
+
+def append_client_event(turn: dict[str, Any] | None, event: Any) -> None:
+    """记录一条返回给客户端的流式事件。"""
+    if turn is None:
+        return
+    turn['stream_trace']['client_events'].append(deep_copy_jsonable(event))
+    _touch(turn)
+
+
+def set_stream_summary(turn: dict[str, Any] | None, summary: dict[str, Any]) -> None:
+    """记录流式摘要，例如累计文本、事件数、usage 等。"""
+    if turn is None:
+        return
+    turn['stream_trace']['summary'] = deep_copy_jsonable(summary)
+    _touch(turn)
+
+
+def attach_error(turn: dict[str, Any] | None, error: Any) -> None:
+    """记录错误信息。"""
+    if turn is None:
+        return
+    turn['error'] = deep_copy_jsonable(error)
+    _touch(turn)
+
+
+def finalize_turn(
+    turn: dict[str, Any] | None,
+    *,
+    usage: dict[str, Any] | None = None,
+    duration_ms: int = 0,
+) -> None:
+    """将 turn 追加/更新到对应的会话日志文件。"""
+    if turn is None or not Config.DEBUG:
+        return
+
+    turn['updated_at'] = datetime.utcnow().isoformat() + 'Z'
+    turn['duration_ms'] = duration_ms
+    if usage is not None:
+        turn['usage'] = deep_copy_jsonable(usage)
+
+    threading.Thread(target=_write_turn, args=(deep_copy_jsonable(turn),), daemon=True).start()
+
+
+def sanitize_headers(headers: dict[str, Any]) -> dict[str, Any]:
+    """对敏感请求头做脱敏。"""
+    sanitized: dict[str, Any] = {}
+    for key, value in headers.items():
+        key_lower = str(key).lower()
+        if key_lower in {'authorization', 'x-api-key', 'api-key', 'x-goog-api-key'}:
+            sanitized[key] = _mask_secret(value)
+        else:
+            sanitized[key] = value
+    return sanitized
+
+
+def deep_copy_jsonable(value: Any) -> Any:
+    """尽量深拷贝 JSON 兼容数据。"""
+    try:
+        return copy.deepcopy(value)
+    except Exception:
+        try:
+            return json.loads(json.dumps(value, ensure_ascii=False, default=str))
+        except Exception:
+            return str(value)
+
+
+def _write_turn(turn: dict[str, Any]) -> None:
+    conversation_id = turn['conversation_id']
+    lock = _get_lock(conversation_id)
+    with lock:
+        try:
+            date_str = turn['started_at'][:10]
+            day_dir = os.path.join(_LOG_DIR, date_str)
+            os.makedirs(day_dir, exist_ok=True)
+            filepath = os.path.join(day_dir, f'{conversation_id}.json')
+
+            if os.path.exists(filepath):
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    doc = json.load(f)
+            else:
+                doc = {
+                    'conversation_id': conversation_id,
+                    'route': turn.get('route', ''),
+                    'created_at': turn['started_at'],
+                    'updated_at': turn['updated_at'],
+                    'turns': [],
+                }
+
+            turns = doc.setdefault('turns', [])
+            replaced = False
+            for index, existing in enumerate(turns):
+                if existing.get('turn_id') == turn.get('turn_id'):
+                    turns[index] = turn
+                    replaced = True
+                    break
+            if not replaced:
+                turns.append(turn)
+
+            doc['updated_at'] = turn['updated_at']
+            doc['last_client_model'] = turn.get('client_model', '')
+            doc['last_backend'] = turn.get('backend', '')
+            doc['turn_count'] = len(turns)
+
+            with open(filepath, 'w', encoding='utf-8') as f:
+                json.dump(doc, f, ensure_ascii=False, indent=2, default=str)
+        except OSError as e:
+            logger.warning('写入对话日志失败: %s', e)
+        except json.JSONDecodeError as e:
+            logger.warning('解析对话日志失败: %s', e)
+
+
+def _get_lock(conversation_id: str) -> threading.Lock:
+    with _LOCKS_GUARD:
+        if conversation_id not in _LOCKS:
+            _LOCKS[conversation_id] = threading.Lock()
+        return _LOCKS[conversation_id]
+
+
+def _touch(turn: dict[str, Any] | None) -> None:
+    if turn is None:
+        return
+    turn['updated_at'] = datetime.utcnow().isoformat() + 'Z'
+
+
+def _pick_explicit_conversation_id(payload: dict[str, Any]) -> str:
+    candidates = (
+        payload.get('conversation_id'),
+        payload.get('conversationId'),
+        payload.get('session_id'),
+        payload.get('sessionId'),
+        payload.get('chat_id'),
+        payload.get('chatId'),
+        payload.get('metadata', {}).get('conversation_id') if isinstance(payload.get('metadata'), dict) else None,
+        payload.get('metadata', {}).get('session_id') if isinstance(payload.get('metadata'), dict) else None,
+    )
+    for item in candidates:
+        if isinstance(item, str) and item.strip():
+            return item.strip()
+    return ''
+
+
+def _conversation_seed(route: str, payload: dict[str, Any]) -> str:
+    if route == 'chat':
+        messages = payload.get('messages', [])
+        return 'chat|' + _normalize_messages_seed(messages)
+
+    if route == 'responses':
+        instructions = payload.get('instructions') or ''
+        input_data = payload.get('input', [])
+        if isinstance(input_data, str):
+            seed_input = input_data
+        else:
+            seed_input = json.dumps(input_data, ensure_ascii=False, default=str)
+        return 'responses|' + instructions + '|' + seed_input
+
+    if route == 'messages':
+        messages = payload.get('messages', [])
+        system = payload.get('system', '')
+        return 'messages|' + str(system) + '|' + json.dumps(messages, ensure_ascii=False, default=str)
+
+    return route + '|' + json.dumps(payload, ensure_ascii=False, default=str)
+
+
+def _normalize_messages_seed(messages: Any) -> str:
+    if not isinstance(messages, list):
+        return ''
+    normalized: list[dict[str, Any]] = []
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        normalized.append({
+            'role': msg.get('role', ''),
+            'content': _normalize_content(msg.get('content')),
+            'tool_call_id': msg.get('tool_call_id', ''),
+            'tool_calls': [
+                {
+                    'id': tc.get('id', ''),
+                    'name': (tc.get('function') or {}).get('name', ''),
+                }
+                for tc in msg.get('tool_calls', [])
+                if isinstance(tc, dict)
+            ],
+        })
+    return json.dumps(normalized, ensure_ascii=False, separators=(',', ':'))
+
+
+def _normalize_content(content: Any) -> Any:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        result = []
+        for item in content:
+            if isinstance(item, dict):
+                result.append(item)
+            else:
+                result.append(str(item))
+        return result
+    if content is None:
+        return ''
+    return str(content)
+
+
+def _safe_id(raw: str) -> str:
+    cleaned = ''.join(ch if ch.isalnum() or ch in ('-', '_', '.') else '_' for ch in raw.strip())
+    return cleaned[:120] or gen_id('conv_')
+
+
+def _mask_secret(value: Any) -> str:
+    text = str(value or '')
+    if len(text) <= 8:
+        return '***'
+    return text[:4] + '***' + text[-4:]
--- a/utils/thinking_cache.py
+++ b/utils/thinking_cache.py
@ -0,0 +1,150 @@
+"""轻量 Thinking 缓存
+
+纯内存缓存，在多轮对话中保存和恢复 thinking/reasoning 内容。
+解决 Cursor 不会把 thinking 内容回传给 API 的问题，
+某些模型（如推理模型）在缺少历史 thinking 时表现会下降。
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+import time
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+_THINK_RE = re.compile(r'<think>.*?</think>', re.DOTALL)
+_UNCLOSED_THINK_RE = re.compile(r'<think>.*$', re.DOTALL)
+_TOOL_ID_RE = re.compile(r'[^a-zA-Z0-9_-]')
+_TTL = 86400  # 24 hours
+
+
+class ThinkingCache:
+    """纯内存 thinking 缓存，TTL 2 小时。"""
+
+    def __init__(self):
+        self._store: dict[str, tuple[str, float]] = {}
+
+    def inject(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        """遍历 assistant 消息，缺少 reasoning_content 时从缓存注入。"""
+        sid = self._session_id(messages)
+        if not sid:
+            return messages
+
+        now = time.time()
+        for msg in messages:
+            if msg.get('role') != 'assistant':
+                continue
+            if msg.get('reasoning_content'):
+                continue
+            key = sid + ':' + self._message_hash(msg)
+            entry = self._store.get(key)
+            if entry and (now - entry[1]) < _TTL:
+                msg['reasoning_content'] = entry[0]
+                logger.debug('已从缓存注入 thinking (%d 字符)', len(entry[0]))
+
+        return messages
+
+    def store_from_response(
+        self,
+        messages: list[dict[str, Any]],
+        reasoning_content: str,
+    ) -> None:
+        """将响应中的 thinking 内容存入缓存。"""
+        if not reasoning_content:
+            return
+        sid = self._session_id(messages)
+        if not sid:
+            return
+
+        fake_msg: dict[str, Any] = {'role': 'assistant', 'content': '', 'tool_calls': []}
+        key = sid + ':' + self._message_hash(fake_msg)
+        self._store[key] = (reasoning_content, time.time())
+        self._cleanup()
+
+    def store_assistant_thinking(
+        self,
+        messages: list[dict[str, Any]],
+        assistant_msg: dict[str, Any],
+    ) -> None:
+        """从完整的 assistant 消息中提取并缓存 thinking。"""
+        rc = assistant_msg.get('reasoning_content', '')
+        if not rc:
+            return
+        sid = self._session_id(messages)
+        if not sid:
+            return
+        key = sid + ':' + self._message_hash(assistant_msg)
+        self._store[key] = (rc, time.time())
+        self._cleanup()
+
+    def _session_id(self, messages: list[dict[str, Any]]) -> str:
+        first_user = ''
+        first_assistant = ''
+        for msg in messages:
+            role = msg.get('role', '')
+            if role in ('system', 'developer'):
+                continue
+            if role == 'user' and not first_user:
+                first_user = self._normalize_content(
+                    msg.get('content', '')
+                )
+            elif role == 'assistant' and not first_assistant:
+                first_assistant = self._normalize_content(
+                    msg.get('content', '')
+                )
+            if first_user and first_assistant:
+                break
+
+        if not first_user or not first_assistant:
+            return ''
+
+        raw = first_user + '|' + first_assistant
+        return hashlib.sha256(raw.encode()).hexdigest()[:16]
+
+    def _message_hash(self, msg: dict[str, Any]) -> str:
+        content = self._normalize_content(msg.get('content', ''))
+        tool_ids = sorted(
+            self._normalize_tool_id(tc.get('id', ''))
+            for tc in msg.get('tool_calls', [])
+            if isinstance(tc, dict)
+        )
+        raw = json.dumps({'c': content, 't': tool_ids}, ensure_ascii=False)
+        return hashlib.sha256(raw.encode()).hexdigest()[:16]
+
+    @staticmethod
+    def _normalize_content(content: Any) -> str:
+        if isinstance(content, list):
+            parts = []
+            for p in content:
+                if isinstance(p, dict) and p.get('type') == 'text':
+                    parts.append(p.get('text', ''))
+                elif isinstance(p, str):
+                    parts.append(p)
+            text = '\n'.join(parts)
+        elif isinstance(content, str):
+            text = content
+        else:
+            text = str(content) if content else ''
+        text = _THINK_RE.sub('', text)
+        text = _UNCLOSED_THINK_RE.sub('', text)
+        return text.strip()
+
+    @staticmethod
+    def _normalize_tool_id(tid: str) -> str:
+        return _TOOL_ID_RE.sub('', tid)
+
+    def _cleanup(self) -> None:
+        """惰性清理过期条目（每 100 次写入触发一次全量扫描）。"""
+        if len(self._store) < 100:
+            return
+        now = time.time()
+        expired = [k for k, (_, ts) in self._store.items() if (now - ts) >= _TTL]
+        for k in expired:
+            del self._store[k]
+
+
+thinking_cache = ThinkingCache()
--- a/utils/usage_tracker.py
+++ b/utils/usage_tracker.py
@ -0,0 +1,72 @@
+"""用量统计 — 内存聚合
+
+按模型名聚合请求数、token 用量等统计数据。
+重启后重置，适合轻量监控场景。
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from typing import Any
+
+
+class _ModelStats:
+    __slots__ = ('request_count', 'input_tokens', 'output_tokens', 'first_seen', 'last_seen')
+
+    def __init__(self):
+        self.request_count = 0
+        self.input_tokens = 0
+        self.output_tokens = 0
+        self.first_seen = time.time()
+        self.last_seen = time.time()
+
+
+class UsageTracker:
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._stats: dict[str, _ModelStats] = {}
+        self._start_time = time.time()
+
+    def record(
+        self,
+        model: str,
+        usage: dict[str, Any] | None = None,
+        *,
+        input_key: str = 'prompt_tokens',
+        output_key: str = 'completion_tokens',
+    ) -> None:
+        """记录一次请求的用量。"""
+        with self._lock:
+            if model not in self._stats:
+                self._stats[model] = _ModelStats()
+            s = self._stats[model]
+            s.request_count += 1
+            s.last_seen = time.time()
+            if usage:
+                s.input_tokens += usage.get(input_key, 0) or 0
+                s.output_tokens += usage.get(output_key, 0) or 0
+
+    def get_stats(self) -> dict[str, Any]:
+        """返回所有模型的聚合统计。"""
+        with self._lock:
+            result = {}
+            for model, s in self._stats.items():
+                result[model] = {
+                    'request_count': s.request_count,
+                    'input_tokens': s.input_tokens,
+                    'output_tokens': s.output_tokens,
+                    'total_tokens': s.input_tokens + s.output_tokens,
+                }
+            return {
+                'uptime_seconds': int(time.time() - self._start_time),
+                'models': result,
+            }
+
+    def reset(self) -> None:
+        with self._lock:
+            self._stats.clear()
+            self._start_time = time.time()
+
+
+usage_tracker = UsageTracker()