feat: Improve tool sieve to correctly preserve trailing text within the same chunk as a tool call.

This commit is contained in:
CJACK
2026-02-18 17:24:43 +08:00
parent ce74b124d2
commit 7fc10573ab
4 changed files with 55 additions and 2 deletions

View File

@@ -220,7 +220,7 @@ function consumeToolCapture(state, toolNames) {
}
const prefixPart = captured.slice(0, start);
const suffixPart = captured.slice(obj.end);
if (!state.toolNameSent && (hasMeaningfulText(prefixPart) || hasMeaningfulText(suffixPart) || looksLikeToolExampleContext(state.recentTextTail))) {
if (!state.toolNameSent && (hasMeaningfulText(prefixPart) || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart))) {
return {
ready: true,
prefix: captured,

View File

@@ -183,3 +183,15 @@ test('sieve still intercepts tool call after leading plain text without suffix',
assert.equal(leakedText.includes('我将调用工具。'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
});
test('sieve intercepts tool call and preserves trailing same-chunk text', () => {
const events = runSieve(
['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}然后继续解释。'],
['read_file'],
);
const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0));
const leakedText = collectText(events);
assert.equal(hasTool, true);
assert.equal(leakedText.includes('然后继续解释。'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
});

View File

@@ -539,6 +539,47 @@ func TestHandleStreamToolCallAfterLeadingTextStillIntercepted(t *testing.T) {
}
}
func TestHandleStreamToolCallWithSameChunkTrailingTextStillIntercepted(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}接下来我会继续说明。"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid7c", "deepseek-chat", "prompt", false, false, []string{"search"})
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
if !streamHasToolCallsDelta(frames) {
t.Fatalf("expected tool_calls delta, body=%s", rec.Body.String())
}
content := strings.Builder{}
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
for _, item := range choices {
choice, _ := item.(map[string]any)
delta, _ := choice["delta"].(map[string]any)
if c, ok := delta["content"].(string); ok {
content.WriteString(c)
}
}
}
got := content.String()
if !strings.Contains(got, "接下来我会继续说明。") {
t.Fatalf("expected trailing plain text to be preserved, got=%q", got)
}
if strings.Contains(strings.ToLower(got), "tool_calls") {
t.Fatalf("unexpected raw tool json leak, got=%q", got)
}
if streamFinishReason(frames) != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
}
}
func TestHandleStreamToolCallKeyAppearsLateStillNoPrefixLeak(t *testing.T) {
h := &Handler{}
spaces := strings.Repeat(" ", 200)

View File

@@ -227,7 +227,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
}
prefixPart := captured[:start]
suffixPart := captured[end:]
if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || strings.TrimSpace(suffixPart) != "" || looksLikeToolExampleContext(state.recentTextTail)) {
if !state.toolNameSent && (strings.TrimSpace(prefixPart) != "" || looksLikeToolExampleContext(state.recentTextTail) || looksLikeToolExampleContext(suffixPart)) {
return captured, nil, "", true
}
parsed := util.ParseStandaloneToolCalls(obj, toolNames)