优化缓存命中问题
This commit is contained in:
parent
2f2a3cce41
commit
56faf4fcf1
3 changed files with 44 additions and 11 deletions
|
|
@ -261,6 +261,12 @@ def _convert_request_message(message: Any) -> tuple[JsonDict | None, str | None]
|
||||||
anthropic_role = 'assistant' if role == 'assistant' else 'user'
|
anthropic_role = 'assistant' if role == 'assistant' else 'user'
|
||||||
anthropic_content = _convert_content(message)
|
anthropic_content = _convert_content(message)
|
||||||
|
|
||||||
|
if role == 'assistant' and message.get('reasoning_content'):
|
||||||
|
thinking_block = {'type': 'thinking', 'thinking': message['reasoning_content']}
|
||||||
|
blocks = _to_blocks(anthropic_content)
|
||||||
|
blocks.insert(0, thinking_block)
|
||||||
|
anthropic_content = blocks
|
||||||
|
|
||||||
if role == 'assistant' and 'tool_calls' in message:
|
if role == 'assistant' and 'tool_calls' in message:
|
||||||
anthropic_content = _append_tool_use_blocks(anthropic_content, message.get('tool_calls', []))
|
anthropic_content = _append_tool_use_blocks(anthropic_content, message.get('tool_calls', []))
|
||||||
|
|
||||||
|
|
@ -463,6 +469,8 @@ def _convert_content_part(part: Any) -> JsonDict | None:
|
||||||
return {'type': 'text', 'text': part.get('text', '')}
|
return {'type': 'text', 'text': part.get('text', '')}
|
||||||
if part_type == 'image_url':
|
if part_type == 'image_url':
|
||||||
return _convert_image(part)
|
return _convert_image(part)
|
||||||
|
if part_type == 'image':
|
||||||
|
return part
|
||||||
if part_type in ('tool_use', 'tool_result'):
|
if part_type in ('tool_use', 'tool_result'):
|
||||||
return part
|
return part
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -703,7 +703,11 @@ def _append_responses_input_item(
|
||||||
instructions: list[str],
|
instructions: list[str],
|
||||||
input_items: list[JsonDict],
|
input_items: list[JsonDict],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""将单条 Chat Completions 消息追加为 Responses `input` 项。"""
|
"""将单条 Chat Completions 消息追加为 Responses `input` 项。
|
||||||
|
|
||||||
|
尽量使用 EasyInputMessage 格式({role, content})以减少 token 开销,
|
||||||
|
提高上游 prompt caching 的前缀匹配命中率。
|
||||||
|
"""
|
||||||
if not isinstance(message, dict):
|
if not isinstance(message, dict):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -724,21 +728,26 @@ def _append_responses_input_item(
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
item: JsonDict = {
|
text = _content_to_text(content)
|
||||||
'type': 'message',
|
has_tool_calls = bool(message.get('tool_calls'))
|
||||||
'role': role or 'user',
|
|
||||||
'content': _content_to_responses_parts(content, role),
|
|
||||||
}
|
|
||||||
input_items.append(item)
|
|
||||||
|
|
||||||
if role == 'assistant':
|
if role == 'assistant' and has_tool_calls:
|
||||||
|
if text:
|
||||||
|
input_items.append({
|
||||||
|
'type': 'message',
|
||||||
|
'role': 'assistant',
|
||||||
|
'content': [{'type': 'output_text', 'text': text}],
|
||||||
|
})
|
||||||
for tool_call in message.get('tool_calls') or []:
|
for tool_call in message.get('tool_calls') or []:
|
||||||
input_items.append(_build_responses_function_call_item(tool_call))
|
input_items.append(_build_responses_function_call_item(tool_call))
|
||||||
|
else:
|
||||||
|
input_items.append({'role': role or 'user', 'content': text or ''})
|
||||||
|
|
||||||
|
|
||||||
def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
||||||
"""将 Responses `input` 数组重建为 Chat Completions `messages` 列表。"""
|
"""将 Responses `input` 数组重建为 Chat Completions `messages` 列表。"""
|
||||||
index = 0
|
index = 0
|
||||||
|
pending_reasoning: str | None = None
|
||||||
while index < len(items):
|
while index < len(items):
|
||||||
item = items[index]
|
item = items[index]
|
||||||
|
|
||||||
|
|
@ -754,20 +763,35 @@ def _convert_input_items(items: list[Any], messages: list[JsonDict]) -> None:
|
||||||
item_type = item.get('type', '')
|
item_type = item.get('type', '')
|
||||||
role = item.get('role', '')
|
role = item.get('role', '')
|
||||||
|
|
||||||
|
if item_type == 'reasoning':
|
||||||
|
pending_reasoning = _extract_reasoning_text(item)
|
||||||
|
index += 1
|
||||||
|
continue
|
||||||
|
|
||||||
if role and not item_type:
|
if role and not item_type:
|
||||||
messages.append({
|
msg: JsonDict = {
|
||||||
'role': role,
|
'role': role,
|
||||||
'content': _normalize_simple_content(item.get('content', '')),
|
'content': _normalize_simple_content(item.get('content', '')),
|
||||||
})
|
}
|
||||||
|
if role == 'assistant' and pending_reasoning:
|
||||||
|
msg['reasoning_content'] = pending_reasoning
|
||||||
|
pending_reasoning = None
|
||||||
|
messages.append(msg)
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item_type == 'message':
|
if item_type == 'message':
|
||||||
consumed = _append_message_item(items, start=index, messages=messages)
|
consumed = _append_message_item(items, start=index, messages=messages)
|
||||||
|
if item.get('role') == 'assistant' and pending_reasoning and messages:
|
||||||
|
messages[-1]['reasoning_content'] = pending_reasoning
|
||||||
|
pending_reasoning = None
|
||||||
index += consumed
|
index += consumed
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if item_type == 'function_call':
|
if item_type == 'function_call':
|
||||||
|
if pending_reasoning and messages and messages[-1].get('role') == 'assistant':
|
||||||
|
messages[-1]['reasoning_content'] = pending_reasoning
|
||||||
|
pending_reasoning = None
|
||||||
_append_function_call_item(item, messages)
|
_append_function_call_item(item, messages)
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,8 @@ def chat_completions():
|
||||||
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
log_route_context('聊天补全', ctx, extra=f'消息数={message_count}')
|
||||||
_log_messages(payload)
|
_log_messages(payload)
|
||||||
|
|
||||||
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
|
if ctx.backend != 'responses':
|
||||||
|
payload['messages'] = thinking_cache.inject(payload.get('messages', []))
|
||||||
|
|
||||||
if ctx.backend == 'openai':
|
if ctx.backend == 'openai':
|
||||||
return _handle_openai_backend(ctx, payload, turn)
|
return _handle_openai_backend(ctx, payload, turn)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue