尝试修复/v1/responses后端没有命中缓存的情况
This commit is contained in:
parent
cb7350b100
commit
251437a760
3 changed files with 22 additions and 0 deletions
|
|
@ -42,6 +42,7 @@ from routes.common import (
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
chat_error_chunk,
|
chat_error_chunk,
|
||||||
|
ensure_prompt_cache_key,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -312,6 +313,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
|
||||||
responses_payload = cc_to_responses_request(payload)
|
responses_payload = cc_to_responses_request(payload)
|
||||||
responses_payload['model'] = ctx.upstream_model
|
responses_payload['model'] = ctx.upstream_model
|
||||||
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
responses_payload = inject_instructions_responses(responses_payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
|
responses_payload = ensure_prompt_cache_key(responses_payload)
|
||||||
_dbg(
|
_dbg(
|
||||||
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
'已转换为 Responses 请求:字段=' + str(list(responses_payload.keys()))
|
||||||
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
+ f' 输入项数={len(responses_payload.get("input", []))}'
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ SSE 消息拼装逻辑,避免 `chat.py` 和 `responses.py` 各自维护重复
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
@ -218,6 +219,23 @@ def inject_instructions_anthropic(payload: dict[str, Any], instructions: str, po
|
||||||
# ─── Body / Header 修改 ──────────────────────────
|
# ─── Body / Header 修改 ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_prompt_cache_key(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""确保 Responses 请求携带 prompt_cache_key 以启用上游提示缓存。
|
||||||
|
|
||||||
|
上游(如 sub2api)对原生 /v1/responses 请求不会自动生成 prompt_cache_key,
|
||||||
|
导致提示缓存无法命中。这里根据模型名 + instructions 生成稳定的 cache key,
|
||||||
|
使得相同模型和系统提示的对话可以共享缓存前缀。
|
||||||
|
"""
|
||||||
|
if payload.get('prompt_cache_key'):
|
||||||
|
return payload
|
||||||
|
|
||||||
|
model = payload.get('model', '')
|
||||||
|
instructions = payload.get('instructions', '')
|
||||||
|
seed = f'{model}|{instructions}'
|
||||||
|
payload['prompt_cache_key'] = hashlib.sha256(seed.encode()).hexdigest()[:32]
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]:
|
def apply_body_modifications(payload: dict[str, Any], modifications: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""对转发请求体应用字段级修改。
|
"""对转发请求体应用字段级修改。
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ from routes.common import (
|
||||||
build_openai_target,
|
build_openai_target,
|
||||||
build_responses_target,
|
build_responses_target,
|
||||||
build_route_context,
|
build_route_context,
|
||||||
|
ensure_prompt_cache_key,
|
||||||
inject_instructions_anthropic,
|
inject_instructions_anthropic,
|
||||||
inject_instructions_cc,
|
inject_instructions_cc,
|
||||||
inject_instructions_responses,
|
inject_instructions_responses,
|
||||||
|
|
@ -247,6 +248,7 @@ def _handle_responses_backend(ctx: RouteContext, payload: dict[str, Any], turn:
|
||||||
payload = dict(payload)
|
payload = dict(payload)
|
||||||
payload['model'] = ctx.upstream_model
|
payload['model'] = ctx.upstream_model
|
||||||
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
payload = inject_instructions_responses(payload, ctx.custom_instructions, ctx.instructions_position)
|
||||||
|
payload = ensure_prompt_cache_key(payload)
|
||||||
url, headers = build_responses_target(ctx)
|
url, headers = build_responses_target(ctx)
|
||||||
payload = apply_body_modifications(payload, ctx.body_modifications)
|
payload = apply_body_modifications(payload, ctx.body_modifications)
|
||||||
headers = apply_header_modifications(headers, ctx.header_modifications)
|
headers = apply_header_modifications(headers, ctx.header_modifications)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue