支持gimini格式,优化debug日志
This commit is contained in:
parent
e726f11bad
commit
4de6db13f9
16 changed files with 1783 additions and 55 deletions
150
utils/thinking_cache.py
Normal file
150
utils/thinking_cache.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
"""轻量 Thinking 缓存
|
||||
|
||||
纯内存缓存,在多轮对话中保存和恢复 thinking/reasoning 内容。
|
||||
解决 Cursor 不会把 thinking 内容回传给 API 的问题,
|
||||
某些模型(如推理模型)在缺少历史 thinking 时表现会下降。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_THINK_RE = re.compile(r'<think>.*?</think>', re.DOTALL)
|
||||
_UNCLOSED_THINK_RE = re.compile(r'<think>.*$', re.DOTALL)
|
||||
_TOOL_ID_RE = re.compile(r'[^a-zA-Z0-9_-]')
|
||||
_TTL = 86400 # 24 hours
|
||||
|
||||
|
||||
class ThinkingCache:
|
||||
"""纯内存 thinking 缓存,TTL 2 小时。"""
|
||||
|
||||
def __init__(self):
|
||||
self._store: dict[str, tuple[str, float]] = {}
|
||||
|
||||
def inject(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""遍历 assistant 消息,缺少 reasoning_content 时从缓存注入。"""
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return messages
|
||||
|
||||
now = time.time()
|
||||
for msg in messages:
|
||||
if msg.get('role') != 'assistant':
|
||||
continue
|
||||
if msg.get('reasoning_content'):
|
||||
continue
|
||||
key = sid + ':' + self._message_hash(msg)
|
||||
entry = self._store.get(key)
|
||||
if entry and (now - entry[1]) < _TTL:
|
||||
msg['reasoning_content'] = entry[0]
|
||||
logger.debug('已从缓存注入 thinking (%d 字符)', len(entry[0]))
|
||||
|
||||
return messages
|
||||
|
||||
def store_from_response(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
reasoning_content: str,
|
||||
) -> None:
|
||||
"""将响应中的 thinking 内容存入缓存。"""
|
||||
if not reasoning_content:
|
||||
return
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return
|
||||
|
||||
fake_msg: dict[str, Any] = {'role': 'assistant', 'content': '', 'tool_calls': []}
|
||||
key = sid + ':' + self._message_hash(fake_msg)
|
||||
self._store[key] = (reasoning_content, time.time())
|
||||
self._cleanup()
|
||||
|
||||
def store_assistant_thinking(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
assistant_msg: dict[str, Any],
|
||||
) -> None:
|
||||
"""从完整的 assistant 消息中提取并缓存 thinking。"""
|
||||
rc = assistant_msg.get('reasoning_content', '')
|
||||
if not rc:
|
||||
return
|
||||
sid = self._session_id(messages)
|
||||
if not sid:
|
||||
return
|
||||
key = sid + ':' + self._message_hash(assistant_msg)
|
||||
self._store[key] = (rc, time.time())
|
||||
self._cleanup()
|
||||
|
||||
def _session_id(self, messages: list[dict[str, Any]]) -> str:
|
||||
first_user = ''
|
||||
first_assistant = ''
|
||||
for msg in messages:
|
||||
role = msg.get('role', '')
|
||||
if role in ('system', 'developer'):
|
||||
continue
|
||||
if role == 'user' and not first_user:
|
||||
first_user = self._normalize_content(
|
||||
msg.get('content', '')
|
||||
)
|
||||
elif role == 'assistant' and not first_assistant:
|
||||
first_assistant = self._normalize_content(
|
||||
msg.get('content', '')
|
||||
)
|
||||
if first_user and first_assistant:
|
||||
break
|
||||
|
||||
if not first_user or not first_assistant:
|
||||
return ''
|
||||
|
||||
raw = first_user + '|' + first_assistant
|
||||
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||
|
||||
def _message_hash(self, msg: dict[str, Any]) -> str:
|
||||
content = self._normalize_content(msg.get('content', ''))
|
||||
tool_ids = sorted(
|
||||
self._normalize_tool_id(tc.get('id', ''))
|
||||
for tc in msg.get('tool_calls', [])
|
||||
if isinstance(tc, dict)
|
||||
)
|
||||
raw = json.dumps({'c': content, 't': tool_ids}, ensure_ascii=False)
|
||||
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||
|
||||
@staticmethod
|
||||
def _normalize_content(content: Any) -> str:
|
||||
if isinstance(content, list):
|
||||
parts = []
|
||||
for p in content:
|
||||
if isinstance(p, dict) and p.get('type') == 'text':
|
||||
parts.append(p.get('text', ''))
|
||||
elif isinstance(p, str):
|
||||
parts.append(p)
|
||||
text = '\n'.join(parts)
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = str(content) if content else ''
|
||||
text = _THINK_RE.sub('', text)
|
||||
text = _UNCLOSED_THINK_RE.sub('', text)
|
||||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_tool_id(tid: str) -> str:
|
||||
return _TOOL_ID_RE.sub('', tid)
|
||||
|
||||
def _cleanup(self) -> None:
|
||||
"""惰性清理过期条目(每 100 次写入触发一次全量扫描)。"""
|
||||
if len(self._store) < 100:
|
||||
return
|
||||
now = time.time()
|
||||
expired = [k for k, (_, ts) in self._store.items() if (now - ts) >= _TTL]
|
||||
for k in expired:
|
||||
del self._store[k]
|
||||
|
||||
|
||||
thinking_cache = ThinkingCache()
|
||||
Loading…
Add table
Add a link
Reference in a new issue