修复小bug
This commit is contained in:
parent
ae059ffcb8
commit
406a36af89
5 changed files with 92 additions and 13 deletions
|
|
@ -165,6 +165,9 @@ def _handle_openai_stream(
|
||||||
for chunk in iter_openai_sse(resp):
|
for chunk in iter_openai_sse(resp):
|
||||||
if chunk is None:
|
if chunk is None:
|
||||||
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
_dbg(f'流式响应结束,共 {chunk_count} 个数据片段')
|
||||||
|
close_chunk = think_extractor.finalize()
|
||||||
|
if close_chunk:
|
||||||
|
yield sse_data_message(close_chunk)
|
||||||
yield sse_data_message('[DONE]')
|
yield sse_data_message('[DONE]')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -93,9 +93,14 @@ def _inject_thinking(data):
|
||||||
|
|
||||||
|
|
||||||
def _process_stream(resp):
|
def _process_stream(resp):
|
||||||
"""处理 /v1/messages 流式响应,检测并注入 thinking 事件"""
|
"""处理 /v1/messages 流式响应,检测并注入 thinking 事件
|
||||||
|
|
||||||
|
追踪上游 content block 的 index,在注入 thinking blocks 时使用独立的 index,
|
||||||
|
并将后续上游 block 的 index 偏移,避免冲突。
|
||||||
|
"""
|
||||||
reasoning_buf = ''
|
reasoning_buf = ''
|
||||||
injected = False
|
injected = False
|
||||||
|
index_offset = 0
|
||||||
|
|
||||||
for line in resp.iter_lines():
|
for line in resp.iter_lines():
|
||||||
if not line:
|
if not line:
|
||||||
|
|
@ -119,7 +124,6 @@ def _process_stream(resp):
|
||||||
|
|
||||||
modified = False
|
modified = False
|
||||||
|
|
||||||
# 提取 reasoning_content
|
|
||||||
for container_key in ('message', 'delta'):
|
for container_key in ('message', 'delta'):
|
||||||
container = event_data.get(container_key)
|
container = event_data.get(container_key)
|
||||||
if not container:
|
if not container:
|
||||||
|
|
@ -129,13 +133,17 @@ def _process_stream(resp):
|
||||||
reasoning_buf += rc
|
reasoning_buf += rc
|
||||||
modified = True
|
modified = True
|
||||||
|
|
||||||
# 在首个 text_delta 前注入 thinking blocks
|
|
||||||
if reasoning_buf and not injected:
|
if reasoning_buf and not injected:
|
||||||
if event_data.get('delta', {}).get('type') == 'text_delta':
|
if event_data.get('delta', {}).get('type') == 'text_delta':
|
||||||
injected = True
|
injected = True
|
||||||
yield from _emit_thinking_blocks(reasoning_buf)
|
yield from _emit_thinking_blocks(reasoning_buf)
|
||||||
|
index_offset = 1
|
||||||
reasoning_buf = ''
|
reasoning_buf = ''
|
||||||
|
|
||||||
|
if index_offset and 'index' in event_data:
|
||||||
|
event_data['index'] = event_data['index'] + index_offset
|
||||||
|
modified = True
|
||||||
|
|
||||||
yield f'data: {json.dumps(event_data)}\n\n' if modified else decoded + '\n\n'
|
yield f'data: {json.dumps(event_data)}\n\n' if modified else decoded + '\n\n'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
19
settings.py
19
settings.py
|
|
@ -5,13 +5,13 @@
|
||||||
- model_mappings: Cursor 模型名 → {upstream_model, backend, target_url, api_key, custom_instructions}
|
- model_mappings: Cursor 模型名 → {upstream_model, backend, target_url, api_key, custom_instructions}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from config import Config
|
from config import Config
|
||||||
|
|
||||||
# 数据目录放在项目根目录下,便于 Docker 卷挂载
|
|
||||||
_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
DATA_DIR = os.path.join(_ROOT_DIR, 'data')
|
DATA_DIR = os.path.join(_ROOT_DIR, 'data')
|
||||||
SETTINGS_FILE = os.path.join(DATA_DIR, 'settings.json')
|
SETTINGS_FILE = os.path.join(DATA_DIR, 'settings.json')
|
||||||
|
|
@ -38,10 +38,10 @@ def load():
|
||||||
with open(SETTINGS_FILE, 'r', encoding='utf-8') as f:
|
with open(SETTINGS_FILE, 'r', encoding='utf-8') as f:
|
||||||
_cache = {**_DEFAULTS, **json.load(f)}
|
_cache = {**_DEFAULTS, **json.load(f)}
|
||||||
except (json.JSONDecodeError, OSError):
|
except (json.JSONDecodeError, OSError):
|
||||||
_cache = dict(_DEFAULTS)
|
_cache = copy.deepcopy(_DEFAULTS)
|
||||||
else:
|
else:
|
||||||
_cache = dict(_DEFAULTS)
|
_cache = copy.deepcopy(_DEFAULTS)
|
||||||
return dict(_cache)
|
return copy.deepcopy(_cache)
|
||||||
|
|
||||||
|
|
||||||
def save(data):
|
def save(data):
|
||||||
|
|
@ -58,10 +58,13 @@ def save(data):
|
||||||
|
|
||||||
|
|
||||||
def get():
|
def get():
|
||||||
"""获取当前配置快照,优先返回内存缓存中的结果。"""
|
"""获取当前配置的深拷贝快照,保证调用方修改不影响缓存。"""
|
||||||
if _cache is None:
|
with _lock:
|
||||||
return load()
|
if _cache is None:
|
||||||
return dict(_cache)
|
pass
|
||||||
|
else:
|
||||||
|
return copy.deepcopy(_cache)
|
||||||
|
return load()
|
||||||
|
|
||||||
|
|
||||||
def get_url():
|
def get_url():
|
||||||
|
|
|
||||||
|
|
@ -126,10 +126,15 @@ def iter_responses_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||||
|
|
||||||
|
|
||||||
def _iter_event_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
|
def _iter_event_sse(response) -> Iterator[tuple[str, dict[str, Any]]]:
|
||||||
"""解析带 event/data 的通用 SSE 流。"""
|
"""解析带 event/data 的通用 SSE 流。
|
||||||
|
|
||||||
|
SSE 规范中空行是事件分隔符,遇到空行时重置 event_type,
|
||||||
|
避免前一个事件的类型泄漏到下一个事件。
|
||||||
|
"""
|
||||||
event_type = ''
|
event_type = ''
|
||||||
for line in response.iter_lines():
|
for line in response.iter_lines():
|
||||||
if not line:
|
if not line:
|
||||||
|
event_type = ''
|
||||||
continue
|
continue
|
||||||
decoded = line.decode('utf-8', errors='replace')
|
decoded = line.decode('utf-8', errors='replace')
|
||||||
if decoded.startswith('event:'):
|
if decoded.startswith('event:'):
|
||||||
|
|
|
||||||
|
|
@ -30,24 +30,84 @@ class ThinkTagExtractor:
|
||||||
|
|
||||||
处理跨 chunk 的 <think>...</think> 标签,将标签内的文本
|
处理跨 chunk 的 <think>...</think> 标签,将标签内的文本
|
||||||
转为 reasoning_content delta,标签外的文本保持为 content delta。
|
转为 reasoning_content delta,标签外的文本保持为 content delta。
|
||||||
|
|
||||||
|
额外处理:
|
||||||
|
- content 和 tool_calls 同时出现时拆分为两个独立 chunk(Cursor 会丢弃同时包含两者的 content)
|
||||||
|
- tool_calls 首次出现时在前面插入换行,确保文本以换行结束
|
||||||
|
- 流结束时如果 think 标签仍未关闭,自动合成关闭 chunk
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""初始化跨 chunk 的 thinking 状态跟踪。"""
|
"""初始化跨 chunk 的 thinking 状态跟踪。"""
|
||||||
self._in_thinking = False
|
self._in_thinking = False
|
||||||
|
self._tool_calls_seen = False
|
||||||
|
|
||||||
def process_chunk(self, chunk):
|
def process_chunk(self, chunk):
|
||||||
"""处理一个流式 chunk,返回转换后的 chunk 列表"""
|
"""处理一个流式 chunk,返回转换后的 chunk 列表"""
|
||||||
for choice in (chunk.get('choices') or []):
|
for choice in (chunk.get('choices') or []):
|
||||||
delta = choice.get('delta') or {}
|
delta = choice.get('delta') or {}
|
||||||
|
|
||||||
|
has_tool_calls = bool(delta.get('tool_calls'))
|
||||||
|
has_content = delta.get('content') is not None and delta.get('content') != ''
|
||||||
|
|
||||||
|
# content 和 tool_calls 同时出现:拆分为两个独立事件
|
||||||
|
if has_content and has_tool_calls:
|
||||||
|
results = []
|
||||||
|
content_chunk = self._make(chunk, content=delta['content'])
|
||||||
|
results.extend(self._process_content(content_chunk, delta['content']))
|
||||||
|
tc_chunk = {
|
||||||
|
'id': chunk.get('id', ''),
|
||||||
|
'object': 'chat.completion.chunk',
|
||||||
|
'model': chunk.get('model', ''),
|
||||||
|
'choices': [{'index': 0, 'delta': {'tool_calls': delta['tool_calls']},
|
||||||
|
'finish_reason': choice.get('finish_reason')}],
|
||||||
|
}
|
||||||
|
results.extend(self._handle_tool_calls_chunk(tc_chunk))
|
||||||
|
return results
|
||||||
|
|
||||||
|
if has_tool_calls:
|
||||||
|
return self._handle_tool_calls_chunk(chunk)
|
||||||
|
|
||||||
if delta.get('reasoning_content'):
|
if delta.get('reasoning_content'):
|
||||||
return [chunk]
|
return [chunk]
|
||||||
content = delta.get('content')
|
content = delta.get('content')
|
||||||
if content is None or content == '':
|
if content is None or content == '':
|
||||||
return [chunk]
|
return [chunk]
|
||||||
return self._split(chunk, content)
|
return self._process_content(chunk, content)
|
||||||
return [chunk]
|
return [chunk]
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
"""流结束时调用,如果 think 标签仍未关闭则返回关闭 chunk"""
|
||||||
|
if not self._in_thinking:
|
||||||
|
return None
|
||||||
|
self._in_thinking = False
|
||||||
|
return {
|
||||||
|
'id': '',
|
||||||
|
'object': 'chat.completion.chunk',
|
||||||
|
'model': '',
|
||||||
|
'choices': [{'index': 0, 'delta': {'content': '\n</think>\n\n'}, 'finish_reason': None}],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _process_content(self, chunk, content):
|
||||||
|
"""处理包含 content 的 chunk"""
|
||||||
|
return self._split(chunk, content)
|
||||||
|
|
||||||
|
def _handle_tool_calls_chunk(self, chunk):
|
||||||
|
"""处理包含 tool_calls 的 chunk,首次出现时在前面插入换行"""
|
||||||
|
results = []
|
||||||
|
if not self._tool_calls_seen:
|
||||||
|
self._tool_calls_seen = True
|
||||||
|
if self._in_thinking:
|
||||||
|
self._in_thinking = False
|
||||||
|
results.append(self._make(chunk, content='\n</think>\n\n'))
|
||||||
|
else:
|
||||||
|
results.append(self._make(chunk, content='\n'))
|
||||||
|
elif self._in_thinking:
|
||||||
|
self._in_thinking = False
|
||||||
|
results.append(self._make(chunk, content='\n</think>\n\n'))
|
||||||
|
results.append(chunk)
|
||||||
|
return results
|
||||||
|
|
||||||
def _split(self, chunk, text):
|
def _split(self, chunk, text):
|
||||||
"""根据 <think> 标签拆分文本为多个 chunk"""
|
"""根据 <think> 标签拆分文本为多个 chunk"""
|
||||||
results = []
|
results = []
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue