mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-06 17:35:30 +08:00
fix: prevent XML tool call leakage by strictly matching opening and closing tag pairs during streaming
This commit is contained in:
@@ -114,8 +114,14 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
|
||||
} else {
|
||||
content := state.capture.String()
|
||||
if content != "" {
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
// If the captured text looks like an incomplete XML tool call block,
|
||||
// swallow it to prevent leaking raw XML tags to the client.
|
||||
if hasOpenXMLToolTag(content) {
|
||||
// Drop it silently — incomplete tool call.
|
||||
} else {
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
}
|
||||
}
|
||||
}
|
||||
state.capture.Reset()
|
||||
@@ -200,6 +206,11 @@ func findToolSegmentStart(s string) int {
|
||||
if start < 0 {
|
||||
start = bestKeyIdx
|
||||
}
|
||||
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
|
||||
// back up past the '<' to capture the full tag.
|
||||
if start > 0 && s[start-1] == '<' {
|
||||
start--
|
||||
}
|
||||
if fenceStart, ok := openFenceStartBefore(s, start); ok {
|
||||
return fenceStart
|
||||
}
|
||||
|
||||
@@ -12,6 +12,17 @@ import (
|
||||
var xmlToolCallClosingTags = []string{"</tool_calls>", "</tool_call>", "</invoke>", "</function_call>", "</function_calls>", "</tool_use>"}
|
||||
var xmlToolCallOpeningTags = []string{"<tool_calls", "<tool_call", "<invoke", "<function_call", "<function_calls", "<tool_use"}
|
||||
|
||||
// xmlToolCallTagPairs maps each opening tag to its expected closing tag.
|
||||
// Order matters: longer/wrapper tags must be checked first.
|
||||
var xmlToolCallTagPairs = []struct{ open, close string }{
|
||||
{"<tool_calls", "</tool_calls>"},
|
||||
{"<tool_call", "</tool_call>"},
|
||||
{"<function_calls", "</function_calls>"},
|
||||
{"<function_call", "</function_call>"},
|
||||
{"<invoke", "</invoke>"},
|
||||
{"<tool_use", "</tool_use>"},
|
||||
}
|
||||
|
||||
// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
|
||||
var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls>\s*(?:.*?)\s*</tool_calls>|<tool_call>\s*(?:.*?)\s*</tool_call>|<invoke\b[^>]*>(?:.*?)</invoke>|<function_calls?\b[^>]*>(?:.*?)</function_calls?>|<tool_use>(?:.*?)</tool_use>)`)
|
||||
|
||||
@@ -22,59 +33,45 @@ var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_call>
|
||||
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
|
||||
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
|
||||
lower := strings.ToLower(captured)
|
||||
// Find the earliest XML tool opening tag.
|
||||
openIdx := -1
|
||||
for _, tag := range xmlToolCallOpeningTags {
|
||||
idx := strings.Index(lower, tag)
|
||||
if idx >= 0 && (openIdx < 0 || idx < openIdx) {
|
||||
openIdx = idx
|
||||
// Find the FIRST matching open/close pair, preferring wrapper tags.
|
||||
// Tag pairs are ordered longest-first (e.g. <tool_calls before <tool_call)
|
||||
// so wrapper tags are checked before inner tags.
|
||||
for _, pair := range xmlToolCallTagPairs {
|
||||
openIdx := strings.Index(lower, pair.open)
|
||||
if openIdx < 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if openIdx < 0 {
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
// Look for a matching closing tag.
|
||||
closeIdx := -1
|
||||
for _, tag := range xmlToolCallClosingTags {
|
||||
idx := strings.Index(lower[openIdx:], tag)
|
||||
if idx >= 0 {
|
||||
absEnd := openIdx + idx + len(tag)
|
||||
if closeIdx < 0 || absEnd > closeIdx {
|
||||
closeIdx = absEnd
|
||||
}
|
||||
// Find the LAST occurrence of the specific closing tag to get the outermost block.
|
||||
closeIdx := strings.LastIndex(lower, pair.close)
|
||||
if closeIdx < openIdx {
|
||||
// Opening tag is present but its specific closing tag hasn't arrived.
|
||||
// Return not-ready so we keep buffering — do NOT fall through to
|
||||
// try inner pairs (e.g. <tool_call inside <tool_calls).
|
||||
return "", nil, "", false
|
||||
}
|
||||
}
|
||||
if closeIdx <= 0 {
|
||||
return "", nil, "", false
|
||||
}
|
||||
closeEnd := closeIdx + len(pair.close)
|
||||
|
||||
xmlBlock := captured[openIdx:closeIdx]
|
||||
prefixPart := captured[:openIdx]
|
||||
suffixPart := captured[closeIdx:]
|
||||
parsed := util.ParseToolCalls(xmlBlock, toolNames)
|
||||
if len(parsed) > 0 {
|
||||
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
|
||||
return prefixPart, parsed, suffixPart, true
|
||||
xmlBlock := captured[openIdx:closeEnd]
|
||||
prefixPart := captured[:openIdx]
|
||||
suffixPart := captured[closeEnd:]
|
||||
parsed := util.ParseToolCalls(xmlBlock, toolNames)
|
||||
if len(parsed) > 0 {
|
||||
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
|
||||
return prefixPart, parsed, suffixPart, true
|
||||
}
|
||||
// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
|
||||
return prefixPart, nil, suffixPart, true
|
||||
}
|
||||
// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
|
||||
return prefixPart, nil, suffixPart, true
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
|
||||
// but no corresponding closing tag yet.
|
||||
// whose SPECIFIC closing tag has not appeared yet.
|
||||
func hasOpenXMLToolTag(captured string) bool {
|
||||
lower := strings.ToLower(captured)
|
||||
for _, tag := range xmlToolCallOpeningTags {
|
||||
if strings.Contains(lower, tag) {
|
||||
hasClosed := false
|
||||
for _, ct := range xmlToolCallClosingTags {
|
||||
if strings.Contains(lower, ct) {
|
||||
hasClosed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasClosed {
|
||||
for _, pair := range xmlToolCallTagPairs {
|
||||
if strings.Contains(lower, pair.open) {
|
||||
if !strings.Contains(lower, pair.close) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,3 +153,135 @@ func TestHasOpenXMLToolTag(t *testing.T) {
|
||||
t.Fatal("should return false for plain text")
|
||||
}
|
||||
}
|
||||
|
||||
// Test the EXACT scenario the user reports: token-by-token streaming where
|
||||
// <tool_calls> tag arrives in small pieces.
|
||||
func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Simulate DeepSeek model generating tokens one at a time.
|
||||
chunks := []string{
|
||||
"<",
|
||||
"tool",
|
||||
"_calls",
|
||||
">\n",
|
||||
" <",
|
||||
"tool",
|
||||
"_call",
|
||||
">\n",
|
||||
" <",
|
||||
"tool",
|
||||
"_name",
|
||||
">",
|
||||
"read",
|
||||
"_file",
|
||||
"</",
|
||||
"tool",
|
||||
"_name",
|
||||
">\n",
|
||||
" <",
|
||||
"parameters",
|
||||
">",
|
||||
`{"path"`,
|
||||
`: "README.MD"`,
|
||||
`}`,
|
||||
"</",
|
||||
"parameters",
|
||||
">\n",
|
||||
" </",
|
||||
"tool",
|
||||
"_call",
|
||||
">\n",
|
||||
"</",
|
||||
"tool",
|
||||
"_calls",
|
||||
">",
|
||||
}
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent string
|
||||
var toolCalls int
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent += evt.Content
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("XML tool call content leaked to text in token-by-token mode: %q", textContent)
|
||||
}
|
||||
if strings.Contains(textContent, "tool_calls>") {
|
||||
t.Fatalf("closing tag fragment leaked to text: %q", textContent)
|
||||
}
|
||||
if strings.Contains(textContent, "read_file") {
|
||||
t.Fatalf("tool name leaked to text: %q", textContent)
|
||||
}
|
||||
if toolCalls == 0 {
|
||||
t.Fatal("expected tool calls to be extracted, got none")
|
||||
}
|
||||
}
|
||||
|
||||
// Test that flushToolSieve on incomplete XML does NOT leak the raw XML content.
|
||||
func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// XML block starts but stream ends before completion.
|
||||
chunks := []string{
|
||||
"<tool_calls>\n",
|
||||
" <tool_call>\n",
|
||||
" <tool_name>read_file</tool_name>\n",
|
||||
}
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
// Stream ends abruptly - flush should NOT dump raw XML.
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent string
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent += evt.Content
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("incomplete XML leaked on flush: %q", textContent)
|
||||
}
|
||||
}
|
||||
|
||||
// Test that the opening tag "<tool_calls>\n " is NOT emitted as text content.
|
||||
func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// First chunk is the opening tag - should be held, not emitted.
|
||||
evts1 := processToolSieveChunk(&state, "<tool_calls>\n ", []string{"read_file"})
|
||||
for _, evt := range evts1 {
|
||||
if strings.Contains(evt.Content, "<tool_calls>") {
|
||||
t.Fatalf("opening tag leaked on first chunk: %q", evt.Content)
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining content arrives.
|
||||
evts2 := processToolSieveChunk(&state, "<tool_call>\n <tool_name>read_file</tool_name>\n <parameters>{\"path\":\"README.MD\"}</parameters>\n </tool_call>\n</tool_calls>", []string{"read_file"})
|
||||
evts2 = append(evts2, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent string
|
||||
var toolCalls int
|
||||
allEvents := append(evts1, evts2...)
|
||||
for _, evt := range allEvents {
|
||||
if evt.Content != "" {
|
||||
textContent += evt.Content
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("XML content leaked: %q", textContent)
|
||||
}
|
||||
if toolCalls == 0 {
|
||||
t.Fatal("expected tool calls to be extracted")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,63 +1,58 @@
|
||||
'use strict';
|
||||
const { parseToolCalls } = require('./parse');
|
||||
const {
|
||||
XML_TOOL_OPENING_TAGS,
|
||||
XML_TOOL_CLOSING_TAGS,
|
||||
} = require('./tool-keywords');
|
||||
|
||||
// Tag pairs ordered longest-first: wrapper tags checked before inner tags.
|
||||
const XML_TOOL_TAG_PAIRS = [
|
||||
{ open: '<tool_calls', close: '</tool_calls>' },
|
||||
{ open: '<tool_call', close: '</tool_call>' },
|
||||
{ open: '<function_calls', close: '</function_calls>' },
|
||||
{ open: '<function_call', close: '</function_call>' },
|
||||
{ open: '<invoke', close: '</invoke>' },
|
||||
{ open: '<tool_use', close: '</tool_use>' },
|
||||
];
|
||||
|
||||
const XML_TOOL_OPENING_TAGS = XML_TOOL_TAG_PAIRS.map(p => p.open);
|
||||
|
||||
function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
const lower = captured.toLowerCase();
|
||||
let openIdx = -1;
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
const idx = lower.indexOf(tag);
|
||||
if (idx >= 0 && (openIdx < 0 || idx < openIdx)) {
|
||||
openIdx = idx;
|
||||
// Find the FIRST matching open/close pair, preferring wrapper tags.
|
||||
for (const pair of XML_TOOL_TAG_PAIRS) {
|
||||
const openIdx = lower.indexOf(pair.open);
|
||||
if (openIdx < 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (openIdx < 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
let closeIdx = -1;
|
||||
for (const tag of XML_TOOL_CLOSING_TAGS) {
|
||||
const idx = lower.indexOf(tag, openIdx);
|
||||
if (idx >= 0) {
|
||||
const absEnd = idx + tag.length;
|
||||
if (closeIdx < 0 || absEnd > closeIdx) {
|
||||
closeIdx = absEnd;
|
||||
}
|
||||
// Find the LAST occurrence of the specific closing tag.
|
||||
const closeIdx = lower.lastIndexOf(pair.close);
|
||||
if (closeIdx < openIdx) {
|
||||
// Opening tag present but specific closing tag hasn't arrived.
|
||||
// Return not-ready — do NOT fall through to inner pairs.
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const closeEnd = closeIdx + pair.close.length;
|
||||
const xmlBlock = captured.slice(openIdx, closeEnd);
|
||||
let prefixPart = captured.slice(0, openIdx);
|
||||
let suffixPart = captured.slice(closeEnd);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
// XML tool syntax but failed to parse — consume to avoid leak.
|
||||
return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
|
||||
}
|
||||
if (closeIdx <= 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const xmlBlock = captured.slice(openIdx, closeIdx);
|
||||
let prefixPart = captured.slice(0, openIdx);
|
||||
let suffixPart = captured.slice(closeIdx);
|
||||
const parsed = parseToolCalls(xmlBlock, toolNames);
|
||||
if (Array.isArray(parsed) && parsed.length > 0) {
|
||||
const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
|
||||
return {
|
||||
ready: true,
|
||||
prefix: trimmedFence.prefix,
|
||||
calls: parsed,
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
function hasOpenXMLToolTag(captured) {
|
||||
const lower = captured.toLowerCase();
|
||||
for (const tag of XML_TOOL_OPENING_TAGS) {
|
||||
if (lower.includes(tag)) {
|
||||
let hasClosed = false;
|
||||
for (const ct of XML_TOOL_CLOSING_TAGS) {
|
||||
if (lower.includes(ct)) {
|
||||
hasClosed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasClosed) {
|
||||
for (const pair of XML_TOOL_TAG_PAIRS) {
|
||||
if (lower.includes(pair.open)) {
|
||||
if (!lower.includes(pair.close)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,7 +191,12 @@ function findToolSegmentStart(state, s) {
|
||||
}
|
||||
const keyIdx = bestKeyIdx;
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
const candidateStart = start >= 0 ? start : keyIdx;
|
||||
let candidateStart = start >= 0 ? start : keyIdx;
|
||||
// If the keyword matched inside an XML tag (e.g. "tool_calls" in "<tool_calls>"),
|
||||
// back up past the '<' to capture the full tag.
|
||||
if (candidateStart > 0 && s[candidateStart - 1] === '<') {
|
||||
candidateStart--;
|
||||
}
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
|
||||
return candidateStart;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user