初始化提交
This commit is contained in:
commit
202731df74
28 changed files with 3140 additions and 0 deletions
0
utils/__init__.py
Normal file
0
utils/__init__.py
Normal file
131
utils/http.py
Normal file
131
utils/http.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""HTTP 工具 - 请求头构建、上游转发、SSE 流解析、响应构建"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from flask import Response, jsonify
|
||||
|
||||
from config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def gen_id(prefix=''):
|
||||
"""生成唯一 ID"""
|
||||
return f'{prefix}{uuid.uuid4().hex[:24]}'
|
||||
|
||||
|
||||
# ─── 请求头构建 ────────────────────────────────────
|
||||
|
||||
|
||||
def build_openai_headers(api_key):
|
||||
"""构建 OpenAI 兼容请求头"""
|
||||
return {
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
|
||||
def build_anthropic_headers(api_key):
|
||||
"""构建 Anthropic 请求头,根据密钥前缀自动选择鉴权方式"""
|
||||
headers = {
|
||||
'anthropic-version': '2023-06-01',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if api_key.startswith('sk-'):
|
||||
headers['x-api-key'] = api_key
|
||||
else:
|
||||
headers['Authorization'] = f'Bearer {api_key}'
|
||||
return headers
|
||||
|
||||
|
||||
# ─── 响应构建 ──────────────────────────────────────
|
||||
|
||||
|
||||
def sse_response(generator):
|
||||
"""将生成器包装为 SSE 流式响应"""
|
||||
return Response(
|
||||
generator,
|
||||
content_type='text/event-stream',
|
||||
headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'},
|
||||
)
|
||||
|
||||
|
||||
def error_json(message, error_type='proxy_error', status=502):
|
||||
"""构建 JSON 错误响应"""
|
||||
return jsonify({'error': {'message': str(message), 'type': error_type}}), status
|
||||
|
||||
|
||||
# ─── 上游请求转发 ──────────────────────────────────
|
||||
|
||||
|
||||
def forward_request(url, headers, payload, stream=False):
|
||||
"""转发请求到上游 API
|
||||
|
||||
返回值:
|
||||
成功: (response, None)
|
||||
失败(流式): (None, error_body_str)
|
||||
失败(非流式): (None, Flask Response)
|
||||
"""
|
||||
try:
|
||||
resp = requests.post(
|
||||
url, headers=headers, json=payload,
|
||||
timeout=Config.API_TIMEOUT, stream=stream,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
body = resp.content.decode('utf-8', errors='replace')
|
||||
logger.warning(f'上游返回 {resp.status_code}: {body[:300]}')
|
||||
if stream:
|
||||
return None, f'上游错误 {resp.status_code}: {body}'
|
||||
return None, Response(
|
||||
resp.content, status=resp.status_code,
|
||||
content_type=resp.headers.get('Content-Type', 'application/json'),
|
||||
)
|
||||
return resp, None
|
||||
except requests.RequestException as e:
|
||||
logger.error(f'请求上游失败: {e}')
|
||||
if stream:
|
||||
return None, str(e)
|
||||
return None, error_json(str(e))
|
||||
|
||||
|
||||
# ─── SSE 流解析 ───────────────────────────────────
|
||||
|
||||
|
||||
def iter_openai_sse(response):
|
||||
"""解析 OpenAI SSE 流,yield chunk 字典;yield None 表示 [DONE]"""
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode('utf-8', errors='replace')
|
||||
if not decoded.startswith('data:'):
|
||||
continue
|
||||
data_str = decoded[5:].strip()
|
||||
if data_str == '[DONE]':
|
||||
yield None
|
||||
return
|
||||
try:
|
||||
yield json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
|
||||
def iter_anthropic_sse(response):
|
||||
"""解析 Anthropic SSE 流,yield (event_type, data_dict) 元组"""
|
||||
event_type = ''
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
decoded = line.decode('utf-8', errors='replace')
|
||||
if decoded.startswith('event:'):
|
||||
event_type = decoded[6:].strip()
|
||||
elif decoded.startswith('data:'):
|
||||
data_str = decoded[5:].strip()
|
||||
if not data_str:
|
||||
continue
|
||||
try:
|
||||
yield event_type, json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
101
utils/think_tag.py
Normal file
101
utils/think_tag.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""<think> 标签提取器
|
||||
|
||||
部分上游 API(如 DeepSeek)使用 <think>...</think> 标签包裹思考内容,
|
||||
而 Cursor 期望 reasoning_content 字段。本模块在流式和非流式响应中
|
||||
提取 <think> 标签内容并转为 reasoning_content。
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_THINK_RE = re.compile(r'<think>(.*?)</think>', re.DOTALL)
|
||||
|
||||
|
||||
def extract_from_text(content):
|
||||
"""从文本中提取 <think> 标签(非流式)
|
||||
|
||||
返回: (cleaned_content, reasoning_content)
|
||||
"""
|
||||
if not isinstance(content, str) or '<think>' not in content:
|
||||
return content, None
|
||||
m = _THINK_RE.search(content)
|
||||
if not m:
|
||||
return content, None
|
||||
reasoning = m.group(1).strip()
|
||||
cleaned = (content[:m.start()] + content[m.end():]).strip() or None
|
||||
return cleaned, reasoning
|
||||
|
||||
|
||||
class ThinkTagExtractor:
|
||||
"""流式 <think> 标签提取器
|
||||
|
||||
处理跨 chunk 的 <think>...</think> 标签,将标签内的文本
|
||||
转为 reasoning_content delta,标签外的文本保持为 content delta。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._in_thinking = False
|
||||
|
||||
def process_chunk(self, chunk):
|
||||
"""处理一个流式 chunk,返回转换后的 chunk 列表"""
|
||||
for choice in (chunk.get('choices') or []):
|
||||
delta = choice.get('delta') or {}
|
||||
if delta.get('reasoning_content'):
|
||||
return [chunk]
|
||||
content = delta.get('content')
|
||||
if content is None or content == '':
|
||||
return [chunk]
|
||||
return self._split(chunk, content)
|
||||
return [chunk]
|
||||
|
||||
def _split(self, chunk, text):
|
||||
"""根据 <think> 标签拆分文本为多个 chunk"""
|
||||
results = []
|
||||
|
||||
if self._in_thinking:
|
||||
end = text.find('</think>')
|
||||
if end >= 0:
|
||||
self._in_thinking = False
|
||||
if text[:end]:
|
||||
results.append(self._make(chunk, reasoning=text[:end]))
|
||||
rest = text[end + 8:].lstrip('\n')
|
||||
if rest:
|
||||
results.append(self._make(chunk, content=rest))
|
||||
else:
|
||||
results.append(self._make(chunk, reasoning=text))
|
||||
else:
|
||||
start = text.find('<think>')
|
||||
if start >= 0:
|
||||
before = text[:start]
|
||||
after = text[start + 7:]
|
||||
if before:
|
||||
results.append(self._make(chunk, content=before))
|
||||
end = after.find('</think>')
|
||||
if end >= 0:
|
||||
if after[:end]:
|
||||
results.append(self._make(chunk, reasoning=after[:end]))
|
||||
rest = after[end + 8:].lstrip('\n')
|
||||
if rest:
|
||||
results.append(self._make(chunk, content=rest))
|
||||
else:
|
||||
self._in_thinking = True
|
||||
if after:
|
||||
results.append(self._make(chunk, reasoning=after))
|
||||
else:
|
||||
results.append(chunk)
|
||||
|
||||
return results or [chunk]
|
||||
|
||||
@staticmethod
|
||||
def _make(template, content=None, reasoning=None):
|
||||
"""根据模板 chunk 构造新的 delta chunk"""
|
||||
delta = {}
|
||||
if content is not None:
|
||||
delta['content'] = content
|
||||
if reasoning is not None:
|
||||
delta['reasoning_content'] = reasoning
|
||||
return {
|
||||
'id': template.get('id', ''),
|
||||
'object': 'chat.completion.chunk',
|
||||
'model': template.get('model', ''),
|
||||
'choices': [{'index': 0, 'delta': delta, 'finish_reason': None}],
|
||||
}
|
||||
134
utils/tool_fixer.py
Normal file
134
utils/tool_fixer.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
"""工具参数修复
|
||||
|
||||
修复 LLM 生成的工具调用参数中的常见问题:
|
||||
- 智能引号 → 普通引号
|
||||
- file_path → path 字段映射
|
||||
- StrReplace 工具的 old_string 精确匹配修复
|
||||
- Anthropic tool_use 块的 ID 和 stop_reason 修复
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
# 智能引号字符集
|
||||
_SMART_DOUBLE = frozenset('«»\u201c\u201d\u275e\u201f\u201e\u275d')
|
||||
_SMART_SINGLE = frozenset('\u2018\u2019\u201a\u201b')
|
||||
|
||||
|
||||
def normalize_args(args):
|
||||
"""规范化工具参数:file_path → path"""
|
||||
if isinstance(args, dict) and 'file_path' in args and 'path' not in args:
|
||||
args['path'] = args.pop('file_path')
|
||||
return args
|
||||
|
||||
|
||||
def repair_str_replace_args(tool_name, args):
|
||||
"""修复 StrReplace/search_replace 工具的精确匹配问题
|
||||
|
||||
当 old_string 包含智能引号导致无法精确匹配文件内容时,
|
||||
用容错正则在文件中查找唯一匹配并替换为实际内容。
|
||||
"""
|
||||
if not isinstance(args, dict):
|
||||
return args
|
||||
|
||||
name_lower = (tool_name or '').lower()
|
||||
if 'str_replace' not in name_lower and 'search_replace' not in name_lower:
|
||||
return args
|
||||
|
||||
old_str = args.get('old_string') or args.get('old_str')
|
||||
if not old_str:
|
||||
return args
|
||||
|
||||
file_path = args.get('path') or args.get('file_path')
|
||||
if not file_path or not os.path.isfile(file_path):
|
||||
return args
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||
content = f.read()
|
||||
except Exception:
|
||||
return args
|
||||
|
||||
# 已精确匹配,无需修复
|
||||
if old_str in content:
|
||||
return args
|
||||
|
||||
# 构建容错正则尝试匹配
|
||||
pattern = _build_fuzzy_pattern(old_str)
|
||||
try:
|
||||
matches = list(re.finditer(pattern, content))
|
||||
except re.error:
|
||||
return args
|
||||
|
||||
# 仅在唯一匹配时修复,避免歧义
|
||||
if len(matches) != 1:
|
||||
return args
|
||||
|
||||
matched = matches[0].group()
|
||||
if 'old_string' in args:
|
||||
args['old_string'] = matched
|
||||
elif 'old_str' in args:
|
||||
args['old_str'] = matched
|
||||
|
||||
# 同步修复 new_string 中的智能引号
|
||||
new_str = args.get('new_string') or args.get('new_str')
|
||||
if new_str:
|
||||
fixed = _replace_smart_quotes(new_str)
|
||||
if 'new_string' in args:
|
||||
args['new_string'] = fixed
|
||||
elif 'new_str' in args:
|
||||
args['new_str'] = fixed
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def fix_anthropic_tool_use(response_data):
|
||||
"""修复 Anthropic 响应中的 tool_use 块(补全 ID、修正 stop_reason)"""
|
||||
if not isinstance(response_data, dict):
|
||||
return response_data
|
||||
|
||||
content = response_data.get('content', [])
|
||||
if not isinstance(content, list):
|
||||
return response_data
|
||||
|
||||
has_tool_use = False
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get('type') == 'tool_use':
|
||||
has_tool_use = True
|
||||
if not block.get('id'):
|
||||
block['id'] = f'toolu_{uuid.uuid4().hex[:24]}'
|
||||
|
||||
if has_tool_use and response_data.get('stop_reason') != 'tool_use':
|
||||
response_data['stop_reason'] = 'tool_use'
|
||||
|
||||
return response_data
|
||||
|
||||
|
||||
# ─── 内部辅助 ──────────────────────────────────────
|
||||
|
||||
|
||||
def _build_fuzzy_pattern(text):
|
||||
"""构建容错正则:智能引号可互换、空白可伸缩、反斜杠可重复"""
|
||||
parts = []
|
||||
for ch in text:
|
||||
if ch in _SMART_DOUBLE or ch == '"':
|
||||
parts.append('["\u00ab\u201c\u201d\u275e\u201f\u201e\u275d\u00bb]')
|
||||
elif ch in _SMART_SINGLE or ch == "'":
|
||||
parts.append("['\u2018\u2019\u201a\u201b]")
|
||||
elif ch in (' ', '\t'):
|
||||
parts.append(r'\s+')
|
||||
elif ch == '\\':
|
||||
parts.append(r'\\{1,2}')
|
||||
else:
|
||||
parts.append(re.escape(ch))
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
def _replace_smart_quotes(text):
|
||||
"""将智能引号替换为普通 ASCII 引号"""
|
||||
return ''.join(
|
||||
'"' if ch in _SMART_DOUBLE else
|
||||
"'" if ch in _SMART_SINGLE else
|
||||
ch for ch in text
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue