初始化提交

This commit is contained in:
h88782481 2026-03-09 14:18:42 +08:00
commit 202731df74
28 changed files with 3140 additions and 0 deletions

0
utils/__init__.py Normal file
View file

131
utils/http.py Normal file
View file

@ -0,0 +1,131 @@
"""HTTP 工具 - 请求头构建、上游转发、SSE 流解析、响应构建"""
import json
import uuid
import logging
import requests
from flask import Response, jsonify
from config import Config
logger = logging.getLogger(__name__)
def gen_id(prefix=''):
"""生成唯一 ID"""
return f'{prefix}{uuid.uuid4().hex[:24]}'
# ─── 请求头构建 ────────────────────────────────────
def build_openai_headers(api_key):
"""构建 OpenAI 兼容请求头"""
return {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json',
}
def build_anthropic_headers(api_key):
"""构建 Anthropic 请求头,根据密钥前缀自动选择鉴权方式"""
headers = {
'anthropic-version': '2023-06-01',
'Content-Type': 'application/json',
}
if api_key.startswith('sk-'):
headers['x-api-key'] = api_key
else:
headers['Authorization'] = f'Bearer {api_key}'
return headers
# ─── 响应构建 ──────────────────────────────────────
def sse_response(generator):
"""将生成器包装为 SSE 流式响应"""
return Response(
generator,
content_type='text/event-stream',
headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'},
)
def error_json(message, error_type='proxy_error', status=502):
"""构建 JSON 错误响应"""
return jsonify({'error': {'message': str(message), 'type': error_type}}), status
# ─── 上游请求转发 ──────────────────────────────────
def forward_request(url, headers, payload, stream=False):
"""转发请求到上游 API
返回值:
成功: (response, None)
失败流式: (None, error_body_str)
失败非流式: (None, Flask Response)
"""
try:
resp = requests.post(
url, headers=headers, json=payload,
timeout=Config.API_TIMEOUT, stream=stream,
)
if resp.status_code != 200:
body = resp.content.decode('utf-8', errors='replace')
logger.warning(f'上游返回 {resp.status_code}: {body[:300]}')
if stream:
return None, f'上游错误 {resp.status_code}: {body}'
return None, Response(
resp.content, status=resp.status_code,
content_type=resp.headers.get('Content-Type', 'application/json'),
)
return resp, None
except requests.RequestException as e:
logger.error(f'请求上游失败: {e}')
if stream:
return None, str(e)
return None, error_json(str(e))
# ─── SSE 流解析 ───────────────────────────────────
def iter_openai_sse(response):
"""解析 OpenAI SSE 流yield chunk 字典yield None 表示 [DONE]"""
for line in response.iter_lines():
if not line:
continue
decoded = line.decode('utf-8', errors='replace')
if not decoded.startswith('data:'):
continue
data_str = decoded[5:].strip()
if data_str == '[DONE]':
yield None
return
try:
yield json.loads(data_str)
except json.JSONDecodeError:
continue
def iter_anthropic_sse(response):
"""解析 Anthropic SSE 流yield (event_type, data_dict) 元组"""
event_type = ''
for line in response.iter_lines():
if not line:
continue
decoded = line.decode('utf-8', errors='replace')
if decoded.startswith('event:'):
event_type = decoded[6:].strip()
elif decoded.startswith('data:'):
data_str = decoded[5:].strip()
if not data_str:
continue
try:
yield event_type, json.loads(data_str)
except json.JSONDecodeError:
continue

101
utils/think_tag.py Normal file
View file

@ -0,0 +1,101 @@
"""<think> 标签提取器
部分上游 API DeepSeek使用 <think>...</think> 标签包裹思考内容
Cursor 期望 reasoning_content 字段本模块在流式和非流式响应中
提取 <think> 标签内容并转为 reasoning_content
"""
import re
_THINK_RE = re.compile(r'<think>(.*?)</think>', re.DOTALL)
def extract_from_text(content):
"""从文本中提取 <think> 标签(非流式)
返回: (cleaned_content, reasoning_content)
"""
if not isinstance(content, str) or '<think>' not in content:
return content, None
m = _THINK_RE.search(content)
if not m:
return content, None
reasoning = m.group(1).strip()
cleaned = (content[:m.start()] + content[m.end():]).strip() or None
return cleaned, reasoning
class ThinkTagExtractor:
"""流式 <think> 标签提取器
处理跨 chunk <think>...</think> 标签将标签内的文本
转为 reasoning_content delta标签外的文本保持为 content delta
"""
def __init__(self):
self._in_thinking = False
def process_chunk(self, chunk):
"""处理一个流式 chunk返回转换后的 chunk 列表"""
for choice in (chunk.get('choices') or []):
delta = choice.get('delta') or {}
if delta.get('reasoning_content'):
return [chunk]
content = delta.get('content')
if content is None or content == '':
return [chunk]
return self._split(chunk, content)
return [chunk]
def _split(self, chunk, text):
"""根据 <think> 标签拆分文本为多个 chunk"""
results = []
if self._in_thinking:
end = text.find('</think>')
if end >= 0:
self._in_thinking = False
if text[:end]:
results.append(self._make(chunk, reasoning=text[:end]))
rest = text[end + 8:].lstrip('\n')
if rest:
results.append(self._make(chunk, content=rest))
else:
results.append(self._make(chunk, reasoning=text))
else:
start = text.find('<think>')
if start >= 0:
before = text[:start]
after = text[start + 7:]
if before:
results.append(self._make(chunk, content=before))
end = after.find('</think>')
if end >= 0:
if after[:end]:
results.append(self._make(chunk, reasoning=after[:end]))
rest = after[end + 8:].lstrip('\n')
if rest:
results.append(self._make(chunk, content=rest))
else:
self._in_thinking = True
if after:
results.append(self._make(chunk, reasoning=after))
else:
results.append(chunk)
return results or [chunk]
@staticmethod
def _make(template, content=None, reasoning=None):
"""根据模板 chunk 构造新的 delta chunk"""
delta = {}
if content is not None:
delta['content'] = content
if reasoning is not None:
delta['reasoning_content'] = reasoning
return {
'id': template.get('id', ''),
'object': 'chat.completion.chunk',
'model': template.get('model', ''),
'choices': [{'index': 0, 'delta': delta, 'finish_reason': None}],
}

134
utils/tool_fixer.py Normal file
View file

@ -0,0 +1,134 @@
"""工具参数修复
修复 LLM 生成的工具调用参数中的常见问题
- 智能引号 普通引号
- file_path path 字段映射
- StrReplace 工具的 old_string 精确匹配修复
- Anthropic tool_use 块的 ID stop_reason 修复
"""
import os
import re
import uuid
# 智能引号字符集
_SMART_DOUBLE = frozenset('«»\u201c\u201d\u275e\u201f\u201e\u275d')
_SMART_SINGLE = frozenset('\u2018\u2019\u201a\u201b')
def normalize_args(args):
"""规范化工具参数file_path → path"""
if isinstance(args, dict) and 'file_path' in args and 'path' not in args:
args['path'] = args.pop('file_path')
return args
def repair_str_replace_args(tool_name, args):
"""修复 StrReplace/search_replace 工具的精确匹配问题
old_string 包含智能引号导致无法精确匹配文件内容时
用容错正则在文件中查找唯一匹配并替换为实际内容
"""
if not isinstance(args, dict):
return args
name_lower = (tool_name or '').lower()
if 'str_replace' not in name_lower and 'search_replace' not in name_lower:
return args
old_str = args.get('old_string') or args.get('old_str')
if not old_str:
return args
file_path = args.get('path') or args.get('file_path')
if not file_path or not os.path.isfile(file_path):
return args
try:
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
except Exception:
return args
# 已精确匹配,无需修复
if old_str in content:
return args
# 构建容错正则尝试匹配
pattern = _build_fuzzy_pattern(old_str)
try:
matches = list(re.finditer(pattern, content))
except re.error:
return args
# 仅在唯一匹配时修复,避免歧义
if len(matches) != 1:
return args
matched = matches[0].group()
if 'old_string' in args:
args['old_string'] = matched
elif 'old_str' in args:
args['old_str'] = matched
# 同步修复 new_string 中的智能引号
new_str = args.get('new_string') or args.get('new_str')
if new_str:
fixed = _replace_smart_quotes(new_str)
if 'new_string' in args:
args['new_string'] = fixed
elif 'new_str' in args:
args['new_str'] = fixed
return args
def fix_anthropic_tool_use(response_data):
"""修复 Anthropic 响应中的 tool_use 块(补全 ID、修正 stop_reason"""
if not isinstance(response_data, dict):
return response_data
content = response_data.get('content', [])
if not isinstance(content, list):
return response_data
has_tool_use = False
for block in content:
if isinstance(block, dict) and block.get('type') == 'tool_use':
has_tool_use = True
if not block.get('id'):
block['id'] = f'toolu_{uuid.uuid4().hex[:24]}'
if has_tool_use and response_data.get('stop_reason') != 'tool_use':
response_data['stop_reason'] = 'tool_use'
return response_data
# ─── 内部辅助 ──────────────────────────────────────
def _build_fuzzy_pattern(text):
"""构建容错正则:智能引号可互换、空白可伸缩、反斜杠可重复"""
parts = []
for ch in text:
if ch in _SMART_DOUBLE or ch == '"':
parts.append('["\u00ab\u201c\u201d\u275e\u201f\u201e\u275d\u00bb]')
elif ch in _SMART_SINGLE or ch == "'":
parts.append("['\u2018\u2019\u201a\u201b]")
elif ch in (' ', '\t'):
parts.append(r'\s+')
elif ch == '\\':
parts.append(r'\\{1,2}')
else:
parts.append(re.escape(ch))
return ''.join(parts)
def _replace_smart_quotes(text):
"""将智能引号替换为普通 ASCII 引号"""
return ''.join(
'"' if ch in _SMART_DOUBLE else
"'" if ch in _SMART_SINGLE else
ch for ch in text
)