初始化提交

2026-03-09 14:18:42 +08:00 · 2026-03-09 14:18:42 +08:00 · 202731df74
commit 202731df74
28 changed files with 3140 additions and 0 deletions
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/http.py
+++ b/utils/http.py
@ -0,0 +1,131 @@
+"""HTTP 工具 - 请求头构建、上游转发、SSE 流解析、响应构建"""
+
+import json
+import uuid
+import logging
+
+import requests
+from flask import Response, jsonify
+
+from config import Config
+
+logger = logging.getLogger(__name__)
+
+
+def gen_id(prefix=''):
+    """生成唯一 ID"""
+    return f'{prefix}{uuid.uuid4().hex[:24]}'
+
+
+# ─── 请求头构建 ────────────────────────────────────
+
+
+def build_openai_headers(api_key):
+    """构建 OpenAI 兼容请求头"""
+    return {
+        'Authorization': f'Bearer {api_key}',
+        'Content-Type': 'application/json',
+    }
+
+
+def build_anthropic_headers(api_key):
+    """构建 Anthropic 请求头，根据密钥前缀自动选择鉴权方式"""
+    headers = {
+        'anthropic-version': '2023-06-01',
+        'Content-Type': 'application/json',
+    }
+    if api_key.startswith('sk-'):
+        headers['x-api-key'] = api_key
+    else:
+        headers['Authorization'] = f'Bearer {api_key}'
+    return headers
+
+
+# ─── 响应构建 ──────────────────────────────────────
+
+
+def sse_response(generator):
+    """将生成器包装为 SSE 流式响应"""
+    return Response(
+        generator,
+        content_type='text/event-stream',
+        headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'},
+    )
+
+
+def error_json(message, error_type='proxy_error', status=502):
+    """构建 JSON 错误响应"""
+    return jsonify({'error': {'message': str(message), 'type': error_type}}), status
+
+
+# ─── 上游请求转发 ──────────────────────────────────
+
+
+def forward_request(url, headers, payload, stream=False):
+    """转发请求到上游 API
+
+    返回值:
+        成功: (response, None)
+        失败（流式）: (None, error_body_str)
+        失败（非流式）: (None, Flask Response)
+    """
+    try:
+        resp = requests.post(
+            url, headers=headers, json=payload,
+            timeout=Config.API_TIMEOUT, stream=stream,
+        )
+        if resp.status_code != 200:
+            body = resp.content.decode('utf-8', errors='replace')
+            logger.warning(f'上游返回 {resp.status_code}: {body[:300]}')
+            if stream:
+                return None, f'上游错误 {resp.status_code}: {body}'
+            return None, Response(
+                resp.content, status=resp.status_code,
+                content_type=resp.headers.get('Content-Type', 'application/json'),
+            )
+        return resp, None
+    except requests.RequestException as e:
+        logger.error(f'请求上游失败: {e}')
+        if stream:
+            return None, str(e)
+        return None, error_json(str(e))
+
+
+# ─── SSE 流解析 ───────────────────────────────────
+
+
+def iter_openai_sse(response):
+    """解析 OpenAI SSE 流，yield chunk 字典；yield None 表示 [DONE]"""
+    for line in response.iter_lines():
+        if not line:
+            continue
+        decoded = line.decode('utf-8', errors='replace')
+        if not decoded.startswith('data:'):
+            continue
+        data_str = decoded[5:].strip()
+        if data_str == '[DONE]':
+            yield None
+            return
+        try:
+            yield json.loads(data_str)
+        except json.JSONDecodeError:
+            continue
+
+
+def iter_anthropic_sse(response):
+    """解析 Anthropic SSE 流，yield (event_type, data_dict) 元组"""
+    event_type = ''
+    for line in response.iter_lines():
+        if not line:
+            continue
+        decoded = line.decode('utf-8', errors='replace')
+        if decoded.startswith('event:'):
+            event_type = decoded[6:].strip()
+        elif decoded.startswith('data:'):
+            data_str = decoded[5:].strip()
+            if not data_str:
+                continue
+            try:
+                yield event_type, json.loads(data_str)
+            except json.JSONDecodeError:
+                continue
--- a/utils/think_tag.py
+++ b/utils/think_tag.py
@ -0,0 +1,101 @@
+"""<think> 标签提取器
+
+部分上游 API（如 DeepSeek）使用 <think>...</think> 标签包裹思考内容，
+而 Cursor 期望 reasoning_content 字段。本模块在流式和非流式响应中
+提取 <think> 标签内容并转为 reasoning_content。
+"""
+
+import re
+
+_THINK_RE = re.compile(r'<think>(.*?)</think>', re.DOTALL)
+
+
+def extract_from_text(content):
+    """从文本中提取 <think> 标签（非流式）
+
+    返回: (cleaned_content, reasoning_content)
+    """
+    if not isinstance(content, str) or '<think>' not in content:
+        return content, None
+    m = _THINK_RE.search(content)
+    if not m:
+        return content, None
+    reasoning = m.group(1).strip()
+    cleaned = (content[:m.start()] + content[m.end():]).strip() or None
+    return cleaned, reasoning
+
+
+class ThinkTagExtractor:
+    """流式 <think> 标签提取器
+
+    处理跨 chunk 的 <think>...</think> 标签，将标签内的文本
+    转为 reasoning_content delta，标签外的文本保持为 content delta。
+    """
+
+    def __init__(self):
+        self._in_thinking = False
+
+    def process_chunk(self, chunk):
+        """处理一个流式 chunk，返回转换后的 chunk 列表"""
+        for choice in (chunk.get('choices') or []):
+            delta = choice.get('delta') or {}
+            if delta.get('reasoning_content'):
+                return [chunk]
+            content = delta.get('content')
+            if content is None or content == '':
+                return [chunk]
+            return self._split(chunk, content)
+        return [chunk]
+
+    def _split(self, chunk, text):
+        """根据 <think> 标签拆分文本为多个 chunk"""
+        results = []
+
+        if self._in_thinking:
+            end = text.find('</think>')
+            if end >= 0:
+                self._in_thinking = False
+                if text[:end]:
+                    results.append(self._make(chunk, reasoning=text[:end]))
+                rest = text[end + 8:].lstrip('\n')
+                if rest:
+                    results.append(self._make(chunk, content=rest))
+            else:
+                results.append(self._make(chunk, reasoning=text))
+        else:
+            start = text.find('<think>')
+            if start >= 0:
+                before = text[:start]
+                after = text[start + 7:]
+                if before:
+                    results.append(self._make(chunk, content=before))
+                end = after.find('</think>')
+                if end >= 0:
+                    if after[:end]:
+                        results.append(self._make(chunk, reasoning=after[:end]))
+                    rest = after[end + 8:].lstrip('\n')
+                    if rest:
+                        results.append(self._make(chunk, content=rest))
+                else:
+                    self._in_thinking = True
+                    if after:
+                        results.append(self._make(chunk, reasoning=after))
+            else:
+                results.append(chunk)
+
+        return results or [chunk]
+
+    @staticmethod
+    def _make(template, content=None, reasoning=None):
+        """根据模板 chunk 构造新的 delta chunk"""
+        delta = {}
+        if content is not None:
+            delta['content'] = content
+        if reasoning is not None:
+            delta['reasoning_content'] = reasoning
+        return {
+            'id': template.get('id', ''),
+            'object': 'chat.completion.chunk',
+            'model': template.get('model', ''),
+            'choices': [{'index': 0, 'delta': delta, 'finish_reason': None}],
+        }
--- a/utils/tool_fixer.py
+++ b/utils/tool_fixer.py
@ -0,0 +1,134 @@
+"""工具参数修复
+
+修复 LLM 生成的工具调用参数中的常见问题：
+  - 智能引号 → 普通引号
+  - file_path → path 字段映射
+  - StrReplace 工具的 old_string 精确匹配修复
+  - Anthropic tool_use 块的 ID 和 stop_reason 修复
+"""
+
+import os
+import re
+import uuid
+
+# 智能引号字符集
+_SMART_DOUBLE = frozenset('«»\u201c\u201d\u275e\u201f\u201e\u275d')
+_SMART_SINGLE = frozenset('\u2018\u2019\u201a\u201b')
+
+
+def normalize_args(args):
+    """规范化工具参数：file_path → path"""
+    if isinstance(args, dict) and 'file_path' in args and 'path' not in args:
+        args['path'] = args.pop('file_path')
+    return args
+
+
+def repair_str_replace_args(tool_name, args):
+    """修复 StrReplace/search_replace 工具的精确匹配问题
+
+    当 old_string 包含智能引号导致无法精确匹配文件内容时，
+    用容错正则在文件中查找唯一匹配并替换为实际内容。
+    """
+    if not isinstance(args, dict):
+        return args
+
+    name_lower = (tool_name or '').lower()
+    if 'str_replace' not in name_lower and 'search_replace' not in name_lower:
+        return args
+
+    old_str = args.get('old_string') or args.get('old_str')
+    if not old_str:
+        return args
+
+    file_path = args.get('path') or args.get('file_path')
+    if not file_path or not os.path.isfile(file_path):
+        return args
+
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+            content = f.read()
+    except Exception:
+        return args
+
+    # 已精确匹配，无需修复
+    if old_str in content:
+        return args
+
+    # 构建容错正则尝试匹配
+    pattern = _build_fuzzy_pattern(old_str)
+    try:
+        matches = list(re.finditer(pattern, content))
+    except re.error:
+        return args
+
+    # 仅在唯一匹配时修复，避免歧义
+    if len(matches) != 1:
+        return args
+
+    matched = matches[0].group()
+    if 'old_string' in args:
+        args['old_string'] = matched
+    elif 'old_str' in args:
+        args['old_str'] = matched
+
+    # 同步修复 new_string 中的智能引号
+    new_str = args.get('new_string') or args.get('new_str')
+    if new_str:
+        fixed = _replace_smart_quotes(new_str)
+        if 'new_string' in args:
+            args['new_string'] = fixed
+        elif 'new_str' in args:
+            args['new_str'] = fixed
+
+    return args
+
+
+def fix_anthropic_tool_use(response_data):
+    """修复 Anthropic 响应中的 tool_use 块（补全 ID、修正 stop_reason）"""
+    if not isinstance(response_data, dict):
+        return response_data
+
+    content = response_data.get('content', [])
+    if not isinstance(content, list):
+        return response_data
+
+    has_tool_use = False
+    for block in content:
+        if isinstance(block, dict) and block.get('type') == 'tool_use':
+            has_tool_use = True
+            if not block.get('id'):
+                block['id'] = f'toolu_{uuid.uuid4().hex[:24]}'
+
+    if has_tool_use and response_data.get('stop_reason') != 'tool_use':
+        response_data['stop_reason'] = 'tool_use'
+
+    return response_data
+
+
+# ─── 内部辅助 ──────────────────────────────────────
+
+
+def _build_fuzzy_pattern(text):
+    """构建容错正则：智能引号可互换、空白可伸缩、反斜杠可重复"""
+    parts = []
+    for ch in text:
+        if ch in _SMART_DOUBLE or ch == '"':
+            parts.append('["\u00ab\u201c\u201d\u275e\u201f\u201e\u275d\u00bb]')
+        elif ch in _SMART_SINGLE or ch == "'":
+            parts.append("['\u2018\u2019\u201a\u201b]")
+        elif ch in (' ', '\t'):
+            parts.append(r'\s+')
+        elif ch == '\\':
+            parts.append(r'\\{1,2}')
+        else:
+            parts.append(re.escape(ch))
+    return ''.join(parts)
+
+
+def _replace_smart_quotes(text):
+    """将智能引号替换为普通 ASCII 引号"""
+    return ''.join(
+        '"' if ch in _SMART_DOUBLE else
+        "'" if ch in _SMART_SINGLE else
+        ch for ch in text
+    )