mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 16:35:27 +08:00
feat: implement code fence awareness in tool sieve to prevent false-positive XML tool detection inside code blocks and refine prompt instructions.
This commit is contained in:
@@ -74,7 +74,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
|
||||
}
|
||||
|
||||
finalPrompt, _ := buildOpenAIFinalPrompt(messages, tools, "", false)
|
||||
if !strings.Contains(finalPrompt, "Remember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.") {
|
||||
if !strings.Contains(finalPrompt, "Remember: The ONLY valid way to use tools is the <tool_calls> XML block at the end of your response.") {
|
||||
t.Fatalf("vercel prepare finalPrompt missing final tool-call anchor instruction: %q", finalPrompt)
|
||||
}
|
||||
if !strings.Contains(finalPrompt, "TOOL CALL FORMAT") {
|
||||
|
||||
@@ -60,7 +60,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
|
||||
if pending == "" {
|
||||
break
|
||||
}
|
||||
start := findToolSegmentStart(pending)
|
||||
start := findToolSegmentStart(state, pending)
|
||||
if start >= 0 {
|
||||
prefix := pending[:start]
|
||||
if prefix != "" {
|
||||
@@ -74,7 +74,7 @@ func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames
|
||||
continue
|
||||
}
|
||||
|
||||
safe, hold := splitSafeContentForToolDetection(pending)
|
||||
safe, hold := splitSafeContentForToolDetection(state, pending)
|
||||
if safe == "" {
|
||||
break
|
||||
}
|
||||
@@ -114,14 +114,10 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
|
||||
} else {
|
||||
content := state.capture.String()
|
||||
if content != "" {
|
||||
// If the captured text looks like an incomplete XML tool call block,
|
||||
// swallow it to prevent leaking raw XML tags to the client.
|
||||
if hasOpenXMLToolTag(content) {
|
||||
// Drop it silently — incomplete tool call.
|
||||
} else {
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
}
|
||||
// If capture never resolved into a real tool call, release the
|
||||
// buffered text instead of swallowing it.
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
}
|
||||
}
|
||||
state.capture.Reset()
|
||||
@@ -130,24 +126,22 @@ func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStrea
|
||||
}
|
||||
if state.pending.Len() > 0 {
|
||||
content := state.pending.String()
|
||||
// Safety: if pending contains XML tool tag fragments (e.g. "tool_calls>"
|
||||
// from a split closing tag), swallow them instead of leaking.
|
||||
if hasOpenXMLToolTag(content) || looksLikeXMLToolTagFragment(content) {
|
||||
// Drop it — likely an incomplete tool call fragment.
|
||||
} else {
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
}
|
||||
// If pending never resolved into a real tool call, release it as text.
|
||||
state.noteText(content)
|
||||
events = append(events, toolStreamEvent{Content: content})
|
||||
state.pending.Reset()
|
||||
}
|
||||
return events
|
||||
}
|
||||
|
||||
func splitSafeContentForToolDetection(s string) (safe, hold string) {
|
||||
func splitSafeContentForToolDetection(state *toolStreamSieveState, s string) (safe, hold string) {
|
||||
if s == "" {
|
||||
return "", ""
|
||||
}
|
||||
if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 {
|
||||
if insideCodeFenceWithState(state, s[:xmlIdx]) {
|
||||
return s, ""
|
||||
}
|
||||
if xmlIdx > 0 {
|
||||
return s[:xmlIdx], s[xmlIdx:]
|
||||
}
|
||||
@@ -156,19 +150,33 @@ func splitSafeContentForToolDetection(s string) (safe, hold string) {
|
||||
return s, ""
|
||||
}
|
||||
|
||||
func findToolSegmentStart(s string) int {
|
||||
func findToolSegmentStart(state *toolStreamSieveState, s string) int {
|
||||
if s == "" {
|
||||
return -1
|
||||
}
|
||||
lower := strings.ToLower(s)
|
||||
bestKeyIdx := -1
|
||||
for _, tag := range xmlToolTagsToDetect {
|
||||
idx := strings.Index(lower, tag)
|
||||
if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
|
||||
bestKeyIdx = idx
|
||||
offset := 0
|
||||
for {
|
||||
bestKeyIdx := -1
|
||||
matchedTag := ""
|
||||
for _, tag := range xmlToolTagsToDetect {
|
||||
idx := strings.Index(lower[offset:], tag)
|
||||
if idx >= 0 {
|
||||
idx += offset
|
||||
if bestKeyIdx < 0 || idx < bestKeyIdx {
|
||||
bestKeyIdx = idx
|
||||
matchedTag = tag
|
||||
}
|
||||
}
|
||||
}
|
||||
if bestKeyIdx < 0 {
|
||||
return -1
|
||||
}
|
||||
if !insideCodeFenceWithState(state, s[:bestKeyIdx]) {
|
||||
return bestKeyIdx
|
||||
}
|
||||
offset = bestKeyIdx + len(matchedTag)
|
||||
}
|
||||
return bestKeyIdx
|
||||
}
|
||||
|
||||
func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix string, calls []toolcall.ParsedToolCall, suffix string, ready bool) {
|
||||
|
||||
@@ -6,19 +6,22 @@ import (
|
||||
)
|
||||
|
||||
type toolStreamSieveState struct {
|
||||
pending strings.Builder
|
||||
capture strings.Builder
|
||||
capturing bool
|
||||
recentTextTail string
|
||||
pendingToolRaw string
|
||||
pendingToolCalls []toolcall.ParsedToolCall
|
||||
disableDeltas bool
|
||||
toolNameSent bool
|
||||
toolName string
|
||||
toolArgsStart int
|
||||
toolArgsSent int
|
||||
toolArgsString bool
|
||||
toolArgsDone bool
|
||||
pending strings.Builder
|
||||
capture strings.Builder
|
||||
capturing bool
|
||||
codeFenceStack []int
|
||||
codeFencePendingTicks int
|
||||
codeFenceLineStart bool
|
||||
recentTextTail string
|
||||
pendingToolRaw string
|
||||
pendingToolCalls []toolcall.ParsedToolCall
|
||||
disableDeltas bool
|
||||
toolNameSent bool
|
||||
toolName string
|
||||
toolArgsStart int
|
||||
toolArgsSent int
|
||||
toolArgsString bool
|
||||
toolArgsDone bool
|
||||
}
|
||||
|
||||
type toolStreamEvent struct {
|
||||
@@ -47,9 +50,10 @@ func (s *toolStreamSieveState) resetIncrementalToolState() {
|
||||
}
|
||||
|
||||
func (s *toolStreamSieveState) noteText(content string) {
|
||||
if content == "" {
|
||||
if !hasMeaningfulText(content) {
|
||||
return
|
||||
}
|
||||
updateCodeFenceState(s, content)
|
||||
s.recentTextTail = appendTail(s.recentTextTail, content, toolSieveContextTailLimit)
|
||||
}
|
||||
|
||||
@@ -63,3 +67,107 @@ func appendTail(prev, next string, max int) string {
|
||||
}
|
||||
return combined[len(combined)-max:]
|
||||
}
|
||||
|
||||
func hasMeaningfulText(text string) bool {
|
||||
return strings.TrimSpace(text) != ""
|
||||
}
|
||||
|
||||
func insideCodeFenceWithState(state *toolStreamSieveState, text string) bool {
|
||||
if state == nil {
|
||||
return insideCodeFence(text)
|
||||
}
|
||||
simulated := simulateCodeFenceState(
|
||||
state.codeFenceStack,
|
||||
state.codeFencePendingTicks,
|
||||
state.codeFenceLineStart,
|
||||
text,
|
||||
)
|
||||
return len(simulated.stack) > 0
|
||||
}
|
||||
|
||||
func insideCodeFence(text string) bool {
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
return len(simulateCodeFenceState(nil, 0, true, text).stack) > 0
|
||||
}
|
||||
|
||||
func updateCodeFenceState(state *toolStreamSieveState, text string) {
|
||||
if state == nil || !hasMeaningfulText(text) {
|
||||
return
|
||||
}
|
||||
next := simulateCodeFenceState(
|
||||
state.codeFenceStack,
|
||||
state.codeFencePendingTicks,
|
||||
state.codeFenceLineStart,
|
||||
text,
|
||||
)
|
||||
state.codeFenceStack = next.stack
|
||||
state.codeFencePendingTicks = next.pendingTicks
|
||||
state.codeFenceLineStart = next.lineStart
|
||||
}
|
||||
|
||||
type codeFenceSimulation struct {
|
||||
stack []int
|
||||
pendingTicks int
|
||||
lineStart bool
|
||||
}
|
||||
|
||||
func simulateCodeFenceState(stack []int, pendingTicks int, lineStart bool, text string) codeFenceSimulation {
|
||||
chunk := text
|
||||
nextStack := append([]int(nil), stack...)
|
||||
ticks := pendingTicks
|
||||
atLineStart := lineStart
|
||||
|
||||
flushTicks := func() {
|
||||
if ticks > 0 {
|
||||
if atLineStart && ticks >= 3 {
|
||||
applyFenceMarker(&nextStack, ticks)
|
||||
}
|
||||
atLineStart = false
|
||||
ticks = 0
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(chunk); i++ {
|
||||
ch := chunk[i]
|
||||
if ch == '`' {
|
||||
ticks++
|
||||
continue
|
||||
}
|
||||
flushTicks()
|
||||
switch ch {
|
||||
case '\n', '\r':
|
||||
atLineStart = true
|
||||
case ' ', '\t':
|
||||
if atLineStart {
|
||||
continue
|
||||
}
|
||||
atLineStart = false
|
||||
default:
|
||||
atLineStart = false
|
||||
}
|
||||
}
|
||||
|
||||
return codeFenceSimulation{
|
||||
stack: nextStack,
|
||||
pendingTicks: ticks,
|
||||
lineStart: atLineStart,
|
||||
}
|
||||
}
|
||||
|
||||
func applyFenceMarker(stack *[]int, ticks int) {
|
||||
if stack == nil || ticks <= 0 {
|
||||
return
|
||||
}
|
||||
if len(*stack) == 0 {
|
||||
*stack = append(*stack, ticks)
|
||||
return
|
||||
}
|
||||
top := (*stack)[len(*stack)-1]
|
||||
if ticks >= top {
|
||||
*stack = (*stack)[:len(*stack)-1]
|
||||
return
|
||||
}
|
||||
*stack = append(*stack, ticks)
|
||||
}
|
||||
|
||||
@@ -26,8 +26,8 @@ var xmlToolCallTagPairs = []struct{ open, close string }{
|
||||
{"<invoke", "</invoke>"},
|
||||
{"<tool_use", "</tool_use>"},
|
||||
// Agent-style: these are XML "tool call" patterns from coding agents.
|
||||
// They get captured → parsed. If parsing fails, the block is consumed
|
||||
// (swallowed) to prevent raw XML from leaking to the client.
|
||||
// They get captured → parsed. If parsing fails, the raw XML is preserved
|
||||
// so the caller can still see the original text.
|
||||
{"<attempt_completion", "</attempt_completion>"},
|
||||
{"<ask_followup_question", "</ask_followup_question>"},
|
||||
{"<new_task", "</new_task>"},
|
||||
@@ -73,31 +73,12 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string,
|
||||
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
|
||||
return prefixPart, parsed, suffixPart, true
|
||||
}
|
||||
// If this block does not look like an executable tool-call payload,
|
||||
// pass it through as normal content (e.g. user-requested XML snippets).
|
||||
if !looksLikeExecutableXMLToolCallBlock(xmlBlock, pair.open) {
|
||||
return prefixPart + xmlBlock, nil, suffixPart, true
|
||||
}
|
||||
// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
|
||||
return prefixPart, nil, suffixPart, true
|
||||
// If this block failed to become a tool call, pass it through as text.
|
||||
return prefixPart + xmlBlock, nil, suffixPart, true
|
||||
}
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
func looksLikeExecutableXMLToolCallBlock(xmlBlock, openTag string) bool {
|
||||
lower := strings.ToLower(xmlBlock)
|
||||
// Agent wrapper tags are always treated as internal tool-call wrappers.
|
||||
switch openTag {
|
||||
case "<attempt_completion", "<ask_followup_question", "<new_task":
|
||||
return true
|
||||
}
|
||||
return strings.Contains(lower, "<tool_name") ||
|
||||
strings.Contains(lower, "<parameters") ||
|
||||
strings.Contains(lower, `"tool"`) ||
|
||||
strings.Contains(lower, `"tool_name"`) ||
|
||||
strings.Contains(lower, `"name"`)
|
||||
}
|
||||
|
||||
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
|
||||
// whose SPECIFIC closing tag has not appeared yet.
|
||||
func hasOpenXMLToolTag(captured string) bool {
|
||||
@@ -137,32 +118,3 @@ func findPartialXMLToolTagStart(s string) int {
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// looksLikeXMLToolTagFragment returns true if s looks like a fragment from a
|
||||
// split XML tool call tag — for example "tool_calls>" or "/tool_call>\n".
|
||||
// These fragments arise when '<' was consumed separately and the tail remains.
|
||||
func looksLikeXMLToolTagFragment(s string) bool {
|
||||
trimmed := strings.TrimSpace(s)
|
||||
if trimmed == "" {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(trimmed)
|
||||
// Check for closing tag tails like "tool_calls>" or "/tool_calls>"
|
||||
fragments := []string{
|
||||
"tool_calls>", "tool_call>", "/tool_calls>", "/tool_call>",
|
||||
"function_calls>", "function_call>", "/function_calls>", "/function_call>",
|
||||
"invoke>", "/invoke>", "tool_use>", "/tool_use>",
|
||||
"tool_name>", "/tool_name>", "parameters>", "/parameters>",
|
||||
// Agent-style tag fragments
|
||||
"attempt_completion>", "/attempt_completion>",
|
||||
"ask_followup_question>", "/ask_followup_question>",
|
||||
"new_task>", "/new_task>",
|
||||
"result>", "/result>",
|
||||
}
|
||||
for _, f := range fragments {
|
||||
if strings.Contains(lower, f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -121,6 +121,105 @@ func TestProcessToolSieveNonToolXMLKeepsSuffixForToolParsing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSievePassesThroughMalformedExecutableXMLBlock(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
chunk := `<tool_call><parameters>{"path":"README.md"}</parameters></tool_call>`
|
||||
events := processToolSieveChunk(&state, chunk, []string{"read_file"})
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected malformed executable-looking XML to stay text, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if textContent.String() != chunk {
|
||||
t.Fatalf("expected malformed executable-looking XML to pass through unchanged, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSievePassesThroughFencedXMLToolCallExamples(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
input := strings.Join([]string{
|
||||
"Before first example.\n```",
|
||||
"xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
|
||||
"Between examples.\n```xml\n",
|
||||
"<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"golang\"}</parameters></tool_call>\n",
|
||||
"```\nAfter examples.",
|
||||
}, "")
|
||||
|
||||
chunks := []string{
|
||||
"Before first example.\n```",
|
||||
"xml\n<tool_call><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
|
||||
"Between examples.\n```xml\n",
|
||||
"<tool_call><tool_name>search</tool_name><parameters>{\"q\":\"golang\"}</parameters></tool_call>\n",
|
||||
"```\nAfter examples.",
|
||||
}
|
||||
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file", "search"})...)
|
||||
}
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file", "search"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent.WriteString(evt.Content)
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected fenced XML examples to stay text, got %d tool calls events=%#v", toolCalls, events)
|
||||
}
|
||||
if textContent.String() != input {
|
||||
t.Fatalf("expected fenced XML examples to pass through unchanged, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveKeepsPartialXMLTagInsideFencedExample(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
input := strings.Join([]string{
|
||||
"Example:\n```xml\n<tool_ca",
|
||||
"ll><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
|
||||
"Done.",
|
||||
}, "")
|
||||
|
||||
chunks := []string{
|
||||
"Example:\n```xml\n<tool_ca",
|
||||
"ll><tool_name>read_file</tool_name><parameters>{\"path\":\"README.md\"}</parameters></tool_call>\n```\n",
|
||||
"Done.",
|
||||
}
|
||||
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent.WriteString(evt.Content)
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected partial fenced XML to stay text, got %d tool calls events=%#v", toolCalls, events)
|
||||
}
|
||||
if textContent.String() != input {
|
||||
t.Fatalf("expected partial fenced XML to pass through unchanged, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Chunk ends with a partial XML tool tag.
|
||||
@@ -149,13 +248,14 @@ func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
|
||||
{"tool_calls_tag", "some text <tool_calls>\n", 10},
|
||||
{"tool_call_tag", "prefix <tool_call>\n", 7},
|
||||
{"invoke_tag", "text <invoke name=\"foo\">body</invoke>", 5},
|
||||
{"xml_inside_code_fence", "```xml\n<tool_call><tool_name>read_file</tool_name></tool_call>\n```", -1},
|
||||
{"function_call_tag", "<function_call name=\"foo\">body</function_call>", 0},
|
||||
{"no_xml", "just plain text", -1},
|
||||
{"gemini_json_no_detect", `some text {"functionCall":{"name":"search"}}`, -1},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := findToolSegmentStart(tc.input)
|
||||
got := findToolSegmentStart(nil, tc.input)
|
||||
if got != tc.want {
|
||||
t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want)
|
||||
}
|
||||
@@ -269,8 +369,8 @@ func TestProcessToolSieveTokenByTokenXMLNoLeak(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test that flushToolSieve on incomplete XML does NOT leak the raw XML content.
|
||||
func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
|
||||
// Test that flushToolSieve on incomplete XML falls back to raw text.
|
||||
func TestFlushToolSieveIncompleteXMLFallsBackToText(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// XML block starts but stream ends before completion.
|
||||
chunks := []string{
|
||||
@@ -292,8 +392,8 @@ func TestFlushToolSieveIncompleteXMLDoesNotLeak(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("incomplete XML leaked on flush: %q", textContent)
|
||||
if textContent != strings.Join(chunks, "") {
|
||||
t.Fatalf("expected incomplete XML to fall back to raw text, got %q", textContent)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,10 +430,10 @@ func TestOpeningXMLTagNotLeakedAsContent(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
|
||||
func TestProcessToolSieveFallsBackToRawAttemptCompletion(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Simulate an agent outputting attempt_completion XML tag
|
||||
// which shouldn't leak to text output, even if it fails to parse as a valid tool.
|
||||
// Simulate an agent outputting attempt_completion XML tag.
|
||||
// If it does not parse as a tool call, it should fall back to raw text.
|
||||
chunks := []string{
|
||||
"Done with task.\n",
|
||||
"<attempt_completion>\n",
|
||||
@@ -357,7 +457,7 @@ func TestProcessToolSieveInterceptsAttemptCompletionLeak(t *testing.T) {
|
||||
t.Fatalf("expected leading text to be emitted, got %q", textContent)
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<attempt_completion>") || strings.Contains(textContent, "result>") {
|
||||
t.Fatalf("agent XML tag content leaked to text: %q", textContent)
|
||||
if textContent != strings.Join(chunks, "") {
|
||||
t.Fatalf("expected agent XML to fall back to raw text, got %q", textContent)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,8 +42,8 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) {
|
||||
suffix: trimmedFence.suffix,
|
||||
};
|
||||
}
|
||||
// XML tool syntax but failed to parse — consume to avoid leak.
|
||||
return { ready: true, prefix: prefixPart, calls: [], suffix: suffixPart };
|
||||
// If this block failed to become a tool call, pass it through as text.
|
||||
return { ready: true, prefix: prefixPart + xmlBlock, calls: [], suffix: suffixPart };
|
||||
}
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
@@ -79,22 +79,8 @@ function findPartialXMLToolTagStart(s) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
function looksLikeXMLToolTagFragment(s) {
|
||||
const trimmed = (s || '').trim();
|
||||
if (!trimmed) return false;
|
||||
const lower = trimmed.toLowerCase();
|
||||
const fragments = [
|
||||
'tool_calls>', 'tool_call>', '/tool_calls>', '/tool_call>',
|
||||
'function_calls>', 'function_call>', '/function_calls>', '/function_call>',
|
||||
'invoke>', '/invoke>', 'tool_use>', '/tool_use>',
|
||||
'tool_name>', '/tool_name>', 'parameters>', '/parameters>',
|
||||
];
|
||||
return fragments.some(f => lower.includes(f));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
consumeXMLToolCapture,
|
||||
hasOpenXMLToolTag,
|
||||
findPartialXMLToolTagStart,
|
||||
looksLikeXMLToolTagFragment,
|
||||
};
|
||||
|
||||
@@ -12,7 +12,6 @@ const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
hasOpenXMLToolTag,
|
||||
findPartialXMLToolTagStart,
|
||||
looksLikeXMLToolTagFragment,
|
||||
} = require('./sieve-xml');
|
||||
function processToolSieveChunk(state, chunk, toolNames) {
|
||||
if (!state) {
|
||||
@@ -77,7 +76,7 @@ function processToolSieveChunk(state, chunk, toolNames) {
|
||||
resetIncrementalToolState(state);
|
||||
continue;
|
||||
}
|
||||
const [safe, hold] = splitSafeContentForToolDetection(pending);
|
||||
const [safe, hold] = splitSafeContentForToolDetection(state, pending);
|
||||
if (!safe) {
|
||||
break;
|
||||
}
|
||||
@@ -114,26 +113,22 @@ function flushToolSieve(state, toolNames) {
|
||||
}
|
||||
} else if (state.capture) {
|
||||
const content = state.capture;
|
||||
if (!hasOpenXMLToolTag(content) && !looksLikeXMLToolTagFragment(content)) {
|
||||
noteText(state, content);
|
||||
events.push({ type: 'text', text: content });
|
||||
}
|
||||
noteText(state, content);
|
||||
events.push({ type: 'text', text: content });
|
||||
}
|
||||
state.capture = '';
|
||||
state.capturing = false;
|
||||
resetIncrementalToolState(state);
|
||||
}
|
||||
if (state.pending) {
|
||||
if (!hasOpenXMLToolTag(state.pending) && !looksLikeXMLToolTagFragment(state.pending)) {
|
||||
noteText(state, state.pending);
|
||||
events.push({ type: 'text', text: state.pending });
|
||||
}
|
||||
noteText(state, state.pending);
|
||||
events.push({ type: 'text', text: state.pending });
|
||||
state.pending = '';
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
function splitSafeContentForToolDetection(s) {
|
||||
function splitSafeContentForToolDetection(state, s) {
|
||||
const text = s || '';
|
||||
if (!text) {
|
||||
return ['', ''];
|
||||
@@ -141,6 +136,9 @@ function splitSafeContentForToolDetection(s) {
|
||||
// Only hold back partial XML tool tags.
|
||||
const xmlIdx = findPartialXMLToolTagStart(text);
|
||||
if (xmlIdx >= 0) {
|
||||
if (insideCodeFenceWithState(state, text.slice(0, xmlIdx))) {
|
||||
return [text, ''];
|
||||
}
|
||||
if (xmlIdx > 0) {
|
||||
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
|
||||
}
|
||||
|
||||
@@ -36,8 +36,6 @@ func BuildToolCallInstructions(toolNames []string) string {
|
||||
|
||||
return `TOOL CALL FORMAT — FOLLOW EXACTLY:
|
||||
|
||||
If you need to call tools, your entire response must be exactly one XML block and nothing else.
|
||||
|
||||
<tool_calls>
|
||||
<tool_call>
|
||||
<tool_name>TOOL_NAME_HERE</tool_name>
|
||||
@@ -63,7 +61,8 @@ PARAMETER SHAPES:
|
||||
- array => repeated tags or <item> children
|
||||
- number/bool/null => plain text
|
||||
|
||||
❌ WRONG — Do NOT do these:
|
||||
【WRONG — Do NOT do these】:
|
||||
|
||||
Wrong 1 — mixed text after XML:
|
||||
<tool_calls>...</tool_calls> I hope this helps.
|
||||
Wrong 2 — function-call syntax:
|
||||
@@ -74,14 +73,10 @@ Wrong 4 — Markdown code fences:
|
||||
` + "```xml" + `
|
||||
<tool_calls>...</tool_calls>
|
||||
` + "```" + `
|
||||
Wrong 5 — native tool tokens:
|
||||
<|Tool|>call_some_tool{"param":1}<|Tool|>
|
||||
Wrong 6 — role markers in response:
|
||||
<|Assistant|> Here is the result...
|
||||
|
||||
Remember: The ONLY valid way to use tools is the <tool_calls> XML block at the end of your response.
|
||||
|
||||
✅ CORRECT EXAMPLES:
|
||||
【CORRECT EXAMPLES】:
|
||||
|
||||
Example A — Single tool:
|
||||
<tool_calls>
|
||||
@@ -127,7 +122,7 @@ fi
|
||||
</tool_call>
|
||||
</tool_calls>
|
||||
|
||||
Remember: Output ONLY the <tool_calls>...</tool_calls> XML block when calling tools.`
|
||||
`
|
||||
}
|
||||
|
||||
func matchAny(name string, candidates ...string) bool {
|
||||
|
||||
@@ -126,7 +126,16 @@ test('sieve keeps embedded invalid tool-like json as normal text to avoid stream
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve flushes incomplete captured XML tool blocks without leaking raw tags', () => {
|
||||
test('sieve passes malformed executable-looking XML through as text', () => {
|
||||
const chunk = '<tool_call><parameters>{"path":"README.MD"}</parameters></tool_call>';
|
||||
const events = runSieve([chunk], ['read_file']);
|
||||
const leakedText = collectText(events);
|
||||
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
assert.equal(hasToolCalls, false);
|
||||
assert.equal(leakedText, chunk);
|
||||
});
|
||||
|
||||
test('sieve flushes incomplete captured XML tool blocks by falling back to raw text', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'前置正文G。',
|
||||
@@ -137,9 +146,10 @@ test('sieve flushes incomplete captured XML tool blocks without leaking raw tags
|
||||
['read_file'],
|
||||
);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(leakedText.includes('前置正文G。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
assert.equal(leakedText.includes('<tool_call'), false);
|
||||
const expected = ['前置正文G。', '<tool_calls>\n', ' <tool_call>\n', ' <tool_name>read_file</tool_name>\n'].join('');
|
||||
const hasToolCalls = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
assert.equal(hasToolCalls, false);
|
||||
assert.equal(leakedText, expected);
|
||||
});
|
||||
|
||||
test('sieve captures XML wrapper tags with attributes without leaking wrapper text', () => {
|
||||
|
||||
Reference in New Issue
Block a user