fix: coalesce small stream deltas to prevent character swallowing; add read-tool cache guard

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-01 13:53:27 +08:00
parent 92e321fe2c
commit 2671298439
11 changed files with 349 additions and 54 deletions

View File

@@ -160,6 +160,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认
工具 schema 的权威来源始终是**当前请求实际携带的 schema**,而不是同名工具在其他 runtimeClaude Code / OpenCode / Codex 等)里的默认印象。兼容层现在会同时兼容 OpenAI 风格 `function.parameters`、直接工具对象上的 `parameters` / `input_schema`、以及 camelCase 的 `inputSchema` / `schema`,并在最终输出阶段按这份请求内 schema 决定是保留 array/object还是仅对明确声明为 `string` 的路径做字符串化。该规则同样适用于 Claude 的流式收尾和 Vercel Node 流式 tool-call formatter避免不同 runtime 因 schema shape 差异而出现同名工具参数类型漂移。
正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。
对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command``exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。
如果当前请求声明了 `Read` / `read_file` 这类读取工具,兼容层会额外注入一条 read-tool cache guard当读取结果只表示“文件未变更 / 已在历史中 / 请引用先前上下文 / 没有正文内容”时模型必须把它视为内容不可用不能反复调用同一个无正文读取应改为请求完整正文读取能力或向用户说明需要重新提供文件内容。这个约束只缓解客户端缓存返回空内容导致的死循环DS2API 不会也无法凭空恢复客户端本地文件正文。
OpenAI 路径实现:
[internal/promptcompat/tool_prompt.go](../internal/promptcompat/tool_prompt.go)

View File

@@ -53,6 +53,32 @@ type chatStreamRuntime struct {
finalErrorCode string
}
type chatDeltaBatch struct {
runtime *chatStreamRuntime
field string
text strings.Builder
}
func (b *chatDeltaBatch) append(field, text string) {
if text == "" {
return
}
if b.field != "" && b.field != field {
b.flush()
}
b.field = field
b.text.WriteString(text)
}
func (b *chatDeltaBatch) flush() {
if b.field == "" || b.text.Len() == 0 {
return
}
b.runtime.sendDelta(map[string]any{b.field: b.text.String()})
b.field = ""
b.text.Reset()
}
func newChatStreamRuntime(
w http.ResponseWriter,
rc *http.ResponseController,
@@ -164,42 +190,22 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
detected := detectAssistantToolCalls(s.rawText.String(), finalText, s.rawThinking.String(), finalToolDetectionThinking, s.toolNames)
if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
finishReason = "tool_calls"
delta := map[string]any{
s.sendDelta(map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs, s.toolsRaw),
}
if !s.firstChunkSent {
delta["role"] = "assistant"
s.firstChunkSent = true
}
s.sendChunk(openaifmt.BuildChatStreamChunk(
s.completionID,
s.created,
s.model,
[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
nil,
))
})
s.toolCallsEmitted = true
s.toolCallsDoneEmitted = true
} else if s.bufferToolContent {
batch := chatDeltaBatch{runtime: s}
for _, evt := range toolstream.Flush(&s.toolSieve, s.toolNames) {
if len(evt.ToolCalls) > 0 {
batch.flush()
finishReason = "tool_calls"
s.toolCallsEmitted = true
s.toolCallsDoneEmitted = true
tcDelta := map[string]any{
s.sendDelta(map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(evt.ToolCalls, s.streamToolCallIDs, s.toolsRaw),
}
if !s.firstChunkSent {
tcDelta["role"] = "assistant"
s.firstChunkSent = true
}
s.sendChunk(openaifmt.BuildChatStreamChunk(
s.completionID,
s.created,
s.model,
[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, tcDelta)},
nil,
))
})
s.resetStreamToolCallState()
}
if evt.Content == "" {
@@ -209,21 +215,9 @@ func (s *chatStreamRuntime) finalize(finishReason string, deferEmptyOutput bool)
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
delta := map[string]any{
"content": cleaned,
}
if !s.firstChunkSent {
delta["role"] = "assistant"
s.firstChunkSent = true
}
s.sendChunk(openaifmt.BuildChatStreamChunk(
s.completionID,
s.created,
s.model,
[]map[string]any{openaifmt.BuildChatStreamDeltaChoice(0, delta)},
nil,
))
batch.append("content", cleaned)
}
batch.flush()
}
if len(detected.Calls) > 0 || s.toolCallsEmitted {
@@ -275,6 +269,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
}
contentSeen := false
batch := chatDeltaBatch{runtime: s}
for _, p := range parsed.ToolDetectionThinkingParts {
trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
if trimmed != "" {
@@ -298,7 +293,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
continue
}
s.thinking.WriteString(trimmed)
s.sendDelta(map[string]any{"reasoning_content": trimmed})
batch.append("reasoning_content", trimmed)
}
} else {
rawTrimmed := sse.TrimContinuationOverlap(s.rawText.String(), p.Text)
@@ -319,7 +314,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
if trimmed == "" {
continue
}
s.sendDelta(map[string]any{"content": trimmed})
batch.append("content", trimmed)
} else {
events := toolstream.ProcessChunk(&s.toolSieve, rawTrimmed, s.toolNames)
for _, evt := range events {
@@ -335,6 +330,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
if len(formatted) == 0 {
continue
}
batch.flush()
tcDelta := map[string]any{
"tool_calls": formatted,
}
@@ -343,6 +339,7 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
continue
}
if len(evt.ToolCalls) > 0 {
batch.flush()
s.toolCallsEmitted = true
s.toolCallsDoneEmitted = true
tcDelta := map[string]any{
@@ -357,14 +354,12 @@ func (s *chatStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedD
if cleaned == "" || (s.searchEnabled && sse.IsCitation(cleaned)) {
continue
}
contentDelta := map[string]any{
"content": cleaned,
}
s.sendDelta(contentDelta)
batch.append("content", cleaned)
}
}
}
}
}
batch.flush()
return streamengine.ParsedDecision{ContentSeen: contentSeen}
}

View File

@@ -309,6 +309,49 @@ func TestHandleStreamEmitsSingleChoiceFramesForMultipleParsedParts(t *testing.T)
}
}
func TestHandleStreamCoalescesSmallContentDeltas(t *testing.T) {
h := &Handler{}
lines := make([]string, 0, 101)
for i := 0; i < 100; i++ {
b, _ := json.Marshal(map[string]any{
"p": "response/content",
"v": "字",
})
lines = append(lines, "data: "+string(b))
}
lines = append(lines, "data: [DONE]")
resp := makeSSEHTTPResponse(lines...)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid-coalesce", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, nil)
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
var content strings.Builder
contentDeltaFrames := 0
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
if len(choices) != 1 {
t.Fatalf("expected exactly one choice per stream frame, got %d frame=%#v body=%s", len(choices), frame, rec.Body.String())
}
choice, _ := choices[0].(map[string]any)
delta, _ := choice["delta"].(map[string]any)
if c, ok := delta["content"].(string); ok {
contentDeltaFrames++
content.WriteString(c)
}
}
if got, want := content.String(), strings.Repeat("字", 100); got != want {
t.Fatalf("coalesced stream content mismatch: got %q want %q body=%s", got, want, rec.Body.String())
}
if contentDeltaFrames >= 100 {
t.Fatalf("expected coalescing to reduce 100 tiny content frames, got %d body=%s", contentDeltaFrames, rec.Body.String())
}
}
func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(

View File

@@ -0,0 +1,39 @@
package responses
import (
"strings"
openaifmt "ds2api/internal/format/openai"
)
type responsesDeltaBatch struct {
runtime *responsesStreamRuntime
kind string
text strings.Builder
}
func (b *responsesDeltaBatch) append(kind, text string) {
if text == "" {
return
}
if b.kind != "" && b.kind != kind {
b.flush()
}
b.kind = kind
b.text.WriteString(text)
}
func (b *responsesDeltaBatch) flush() {
if b.kind == "" || b.text.Len() == 0 {
return
}
text := b.text.String()
switch b.kind {
case "reasoning":
b.runtime.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(b.runtime.responseID, text))
case "text":
b.runtime.emitTextDelta(text)
}
b.kind = ""
b.text.Reset()
}

View File

@@ -222,6 +222,7 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
}
contentSeen := false
batch := responsesDeltaBatch{runtime: s}
for _, p := range parsed.ToolDetectionThinkingParts {
trimmed := sse.TrimContinuationOverlap(s.toolDetectionThinking.String(), p.Text)
if trimmed != "" {
@@ -247,7 +248,7 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
continue
}
s.thinking.WriteString(trimmed)
s.sendEvent("response.reasoning.delta", openaifmt.BuildResponsesReasoningDeltaPayload(s.responseID, trimmed))
batch.append("reasoning", trimmed)
continue
}
@@ -269,11 +270,13 @@ func (s *responsesStreamRuntime) onParsed(parsed sse.LineResult) streamengine.Pa
if trimmed == "" {
continue
}
s.emitTextDelta(trimmed)
batch.append("text", trimmed)
continue
}
batch.flush()
s.processToolStreamEvents(toolstream.ProcessChunk(&s.sieve, rawTrimmed, s.toolNames), true, true)
}
batch.flush()
return streamengine.ParsedDecision{ContentSeen: contentSeen}
}

View File

@@ -109,6 +109,48 @@ func TestHandleResponsesStreamOutputTextDeltaCarriesItemIndexes(t *testing.T) {
}
}
func TestHandleResponsesStreamCoalescesSmallOutputTextDeltas(t *testing.T) {
h := &Handler{}
req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)
rec := httptest.NewRecorder()
var streamBody strings.Builder
for i := 0; i < 100; i++ {
b, _ := json.Marshal(map[string]any{
"p": "response/content",
"v": "字",
})
streamBody.WriteString("data: ")
streamBody.WriteString(string(b))
streamBody.WriteString("\n")
}
streamBody.WriteString("data: [DONE]\n")
resp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(streamBody.String())),
}
h.handleResponsesStream(rec, req, resp, "owner-a", "resp_coalesce", "deepseek-v4-flash", "prompt", 0, false, false, nil, nil, promptcompat.DefaultToolChoicePolicy(), "")
payloads := extractSSEEventPayloads(rec.Body.String(), "response.output_text.delta")
if len(payloads) == 0 {
t.Fatalf("expected response.output_text.delta payloads, body=%s", rec.Body.String())
}
var content strings.Builder
for _, payload := range payloads {
content.WriteString(asString(payload["delta"]))
}
if got, want := content.String(), strings.Repeat("字", 100); got != want {
t.Fatalf("coalesced response content mismatch: got %q want %q body=%s", got, want, rec.Body.String())
}
if len(payloads) >= 100 {
t.Fatalf("expected coalescing to reduce 100 tiny text deltas, got %d body=%s", len(payloads), rec.Body.String())
}
if !strings.Contains(rec.Body.String(), "event: response.completed") {
t.Fatalf("expected completed event, body=%s", rec.Body.String())
}
}
func TestHandleResponsesStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t *testing.T) {
h := &Handler{}
req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil)

View File

@@ -1,5 +1,8 @@
'use strict';
const MIN_DELTA_FLUSH_CHARS = 160;
const MAX_DELTA_FLUSH_WAIT_MS = 80;
function createChatCompletionEmitter({ res, sessionID, created, model, isClosed }) {
let firstChunkSent = false;
@@ -34,6 +37,62 @@ function createChatCompletionEmitter({ res, sessionID, created, model, isClosed
};
}
function createDeltaCoalescer({ sendDeltaFrame, minFlushChars = MIN_DELTA_FLUSH_CHARS, maxFlushWaitMS = MAX_DELTA_FLUSH_WAIT_MS }) {
let pendingField = '';
let pendingText = '';
let flushTimer = null;
const clearFlushTimer = () => {
if (flushTimer) {
clearTimeout(flushTimer);
flushTimer = null;
}
};
const flush = () => {
clearFlushTimer();
if (!pendingField || !pendingText) {
return;
}
const delta = { [pendingField]: pendingText };
pendingField = '';
pendingText = '';
sendDeltaFrame(delta);
};
const scheduleFlush = () => {
if (flushTimer || maxFlushWaitMS <= 0) {
return;
}
flushTimer = setTimeout(flush, maxFlushWaitMS);
if (typeof flushTimer.unref === 'function') {
flushTimer.unref();
}
};
const append = (field, text) => {
if (!field || !text) {
return;
}
if (pendingField && pendingField !== field) {
flush();
}
pendingField = field;
pendingText += text;
if ([...pendingText].length >= minFlushChars) {
flush();
return;
}
scheduleFlush();
};
return {
append,
flush,
};
}
module.exports = {
createChatCompletionEmitter,
createDeltaCoalescer,
};

View File

@@ -20,7 +20,7 @@ const {
boolDefaultTrue,
resetStreamToolCallState,
} = require('./toolcall_policy');
const { createChatCompletionEmitter } = require('./stream_emitter');
const { createChatCompletionEmitter, createDeltaCoalescer } = require('./stream_emitter');
const {
asString,
isAbortError,
@@ -191,6 +191,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
model,
isClosed: () => clientClosed,
});
const deltaCoalescer = createDeltaCoalescer({ sendDeltaFrame });
const finish = async (reason, options = {}) => {
if (ended) {
@@ -201,6 +202,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
await releaseLease();
return true;
}
deltaCoalescer.flush();
const detected = parseStandaloneToolCalls(outputText, toolNames);
if (detected.length > 0 && !toolCallsDoneEmitted) {
toolCallsEmitted = true;
@@ -210,6 +212,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
const tailEvents = flushToolSieve(toolSieveState, toolNames);
for (const evt of tailEvents) {
if (evt.type === 'tool_calls' && Array.isArray(evt.calls) && evt.calls.length > 0) {
deltaCoalescer.flush();
toolCallsEmitted = true;
toolCallsDoneEmitted = true;
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs, payload.tools) });
@@ -217,9 +220,10 @@ async function handleVercelStream(req, res, rawBody, payload) {
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
deltaCoalescer.append('content', evt.text);
}
}
deltaCoalescer.flush();
}
if (detected.length > 0 || toolCallsEmitted) {
reason = 'tool_calls';
@@ -327,7 +331,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
continue;
}
thinkingText += trimmed;
sendDeltaFrame({ reasoning_content: trimmed });
deltaCoalescer.append('reasoning_content', trimmed);
}
} else {
const trimmed = trimContinuationOverlap(outputText, p.text);
@@ -339,7 +343,7 @@ async function handleVercelStream(req, res, rawBody, payload) {
}
outputText += trimmed;
if (!toolSieveEnabled) {
sendDeltaFrame({ content: trimmed });
deltaCoalescer.append('content', trimmed);
continue;
}
const events = processToolSieveChunk(toolSieveState, trimmed, toolNames);
@@ -352,19 +356,21 @@ async function handleVercelStream(req, res, rawBody, payload) {
const formatted = formatIncrementalToolCallDeltas(filtered, streamToolCallIDs);
if (formatted.length > 0) {
toolCallsEmitted = true;
sendDeltaFrame({ tool_calls: formatted });
deltaCoalescer.flush();
sendDeltaFrame({ tool_calls: formatted });
}
continue;
}
if (evt.type === 'tool_calls') {
toolCallsEmitted = true;
toolCallsDoneEmitted = true;
deltaCoalescer.flush();
sendDeltaFrame({ tool_calls: formatOpenAIStreamToolCalls(evt.calls, streamToolCallIDs, payload.tools) });
resetStreamToolCallState(streamToolCallIDs, streamToolNames);
continue;
}
if (evt.text) {
sendDeltaFrame({ content: evt.text });
deltaCoalescer.append('content', evt.text);
}
}
}

View File

@@ -88,6 +88,58 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
}
}
func TestBuildOpenAIFinalPromptReadLikeToolIncludesCacheGuard(t *testing.T) {
messages := []any{
map[string]any{"role": "user", "content": "请读取文件"},
}
tools := []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "read_file",
"description": "Read a file",
"parameters": map[string]any{
"type": "object",
},
},
},
}
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
if !strings.Contains(finalPrompt, "Read-tool cache guard") {
t.Fatalf("read-like tool prompt missing cache guard: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "provides no file body") {
t.Fatalf("read-like tool prompt missing no-body handling: %q", finalPrompt)
}
if !strings.Contains(finalPrompt, "Do not repeatedly call the same read request") {
t.Fatalf("read-like tool prompt missing loop guard: %q", finalPrompt)
}
}
func TestBuildOpenAIFinalPromptNonReadToolOmitsCacheGuard(t *testing.T) {
messages := []any{
map[string]any{"role": "user", "content": "搜索一下"},
}
tools := []any{
map[string]any{
"type": "function",
"function": map[string]any{
"name": "search",
"description": "Search docs",
"parameters": map[string]any{
"type": "object",
},
},
},
}
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
if strings.Contains(finalPrompt, "Read-tool cache guard") {
t.Fatalf("non-read tool prompt should not include read cache guard: %q", finalPrompt)
}
}
func TestBuildOpenAIFinalPromptWithThinkingKeepsPromptUnchanged(t *testing.T) {
messages := []any{
map[string]any{"role": "user", "content": "继续回答上一个问题"},

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"strings"
"unicode"
"ds2api/internal/toolcall"
)
@@ -46,6 +47,9 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoiceP
return messages, names
}
toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\n" + toolcall.BuildToolCallInstructions(names)
if hasReadLikeTool(names) {
toolPrompt += "\n\nRead-tool cache guard: If a Read/read_file-style tool result says the file is unchanged, already available in history, should be referenced from previous context, or otherwise provides no file body, treat that result as missing content. Do not repeatedly call the same read request for that missing body. Request a full-content read if the tool supports it, or tell the user that the file contents need to be provided again."
}
if policy.Mode == ToolChoiceRequired {
toolPrompt += "\n7) For this response, you MUST call at least one tool from the allowed list."
}
@@ -64,3 +68,23 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy ToolChoiceP
messages = append([]map[string]any{{"role": "system", "content": toolPrompt}}, messages...)
return messages, names
}
func hasReadLikeTool(names []string) bool {
for _, name := range names {
switch normalizeToolNameForGuard(name) {
case "read", "readfile":
return true
}
}
return false
}
func normalizeToolNameForGuard(name string) string {
var b strings.Builder
for _, r := range strings.ToLower(strings.TrimSpace(name)) {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
b.WriteRune(r)
}
}
return b.String()
}

View File

@@ -210,6 +210,37 @@ test('vercel stream retries empty output once and keeps one terminal frame', asy
assert.match(completionBodies[1].prompt, /Previous reply had no visible output\. Please regenerate the visible final answer or tool call now\.$/);
});
test('vercel stream coalesces many small content deltas while keeping one choice', async () => {
const lines = Array.from({ length: 100 }, () => `data: ${JSON.stringify({ p: 'response/content', v: '字' })}\n\n`);
lines.push('data: [DONE]\n\n');
const { frames } = await runMockVercelStream(lines);
const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
const contentFrames = parsed.filter((item) => item.choices?.[0]?.delta?.content);
const content = contentFrames.map((item) => item.choices[0].delta.content).join('');
assert.equal(content, '字'.repeat(100));
assert.ok(contentFrames.length < 100, `expected fewer than 100 content frames, got ${contentFrames.length}`);
for (const item of parsed) {
assert.equal(item.choices.length, 1);
}
});
test('vercel stream flushes reasoning before content and before stop', async () => {
const { frames } = await runMockVercelStream([
`data: ${JSON.stringify({ p: 'response/fragments', o: 'APPEND', v: [
{ type: 'THINK', content: '思考' },
{ type: 'THINK', content: '过程' },
{ type: 'RESPONSE', content: '回答' },
] })}\n\n`,
'data: [DONE]\n\n',
], { thinking_enabled: true });
const parsed = frames.filter((frame) => frame !== '[DONE]').map((frame) => JSON.parse(frame));
const reasoning = parsed.map((item) => item.choices?.[0]?.delta?.reasoning_content || '').join('');
const content = parsed.map((item) => item.choices?.[0]?.delta?.content || '').join('');
assert.equal(reasoning, '思考过程');
assert.equal(content, '回答');
assert.equal(parsed.at(-1).choices[0].finish_reason, 'stop');
});
test('vercel stream exhausts DeepSeek continue before synthetic retry', async () => {
const { frames, fetchURLs, fetchBodies } = await runMockVercelStreamSequence([
[