From 77a47ada4e509d2f60730296b39446c2861f0fef Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 10 May 2026 18:41:51 +0800 Subject: [PATCH] Fix tool detection when unclosed backtick precedes tool call Handles cases where a stray backtick opens an inline code span but is never closed. Previously, any subsequent XML tool tag was treated as inside markdown code and ignored. Now, tool tags are detected after an unclosed backtick, and the markdown state is reset when the backtick is confirmed to be literal text at stream boundaries. Co-Authored-By: Claude Opus 4.6 --- .../js/helpers/stream-tool-sieve/sieve.js | 114 +++++++++++++++++- internal/toolstream/fence_edge_sieve_test.go | 55 +++++++++ internal/toolstream/tool_sieve_core.go | 104 +++++++++++++++- internal/toolstream/tool_sieve_state.go | 7 -- tests/node/stream-tool-sieve.test.js | 21 ++++ 5 files changed, 287 insertions(+), 14 deletions(-) diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index 18898d8..961211c 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -3,7 +3,6 @@ const { resetIncrementalToolState, noteText, insideCodeFenceWithState, - insideMarkdownCodeSpanWithState, } = require('./state'); const { trimWrappingJSONFence } = require('./jsonscan'); const { @@ -71,10 +70,17 @@ function processToolSieveChunk(state, chunk, toolNames) { break; } const start = findToolSegmentStart(state, pending); + if (start === HOLD_TOOL_SEGMENT_START) { + break; + } if (start >= 0) { const prefix = pending.slice(0, start); if (prefix) { + const resetMarkdownSpan = shouldResetUnclosedMarkdownPrefix(state, prefix, pending.slice(start)); noteText(state, prefix); + if (resetMarkdownSpan) { + state.markdownCodeSpanTicks = 0; + } events.push({ type: 'text', text: prefix }); } state.pending = ''; @@ -99,6 +105,10 @@ function flushToolSieve(state, toolNames) { return []; } const events = processToolSieveChunk(state, '', toolNames); + if (state.pending && Number.isInteger(state.markdownCodeSpanTicks) && state.markdownCodeSpanTicks > 0) { + state.markdownCodeSpanTicks = 0; + events.push(...processToolSieveChunk(state, '', toolNames)); + } if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) { events.push({ type: 'tool_calls', calls: state.pendingToolCalls }); state.pendingToolRaw = ''; @@ -162,9 +172,18 @@ function splitSafeContentForToolDetection(state, s) { // Only hold back partial XML tool tags. const xmlIdx = findPartialXMLToolTagStart(text); if (xmlIdx >= 0) { - if (insideCodeFenceWithState(state, text.slice(0, xmlIdx)) || insideMarkdownCodeSpanWithState(state, text.slice(0, xmlIdx))) { + if (insideCodeFenceWithState(state, text.slice(0, xmlIdx))) { return [text, '']; } + const markdown = markdownCodeSpanStateAt(state, text.slice(0, xmlIdx)); + if (markdown.ticks > 0) { + if (markdownCodeSpanCloses(text.slice(xmlIdx), markdown.ticks)) { + return [text, '']; + } + if (markdown.fromPrior) { + return ['', text]; + } + } if (xmlIdx > 0) { return [text.slice(0, xmlIdx), text.slice(xmlIdx)]; } @@ -173,6 +192,8 @@ function splitSafeContentForToolDetection(state, s) { return [text, '']; } +const HOLD_TOOL_SEGMENT_START = -2; + function findToolSegmentStart(state, s) { if (!s) { return -1; @@ -183,13 +204,98 @@ function findToolSegmentStart(state, s) { if (!tag) { return -1; } - if (!insideCodeFenceWithState(state, s.slice(0, tag.start)) && !insideMarkdownCodeSpanWithState(state, s.slice(0, tag.start))) { + if (insideCodeFenceWithState(state, s.slice(0, tag.start))) { + offset = tag.end + 1; + continue; + } + const markdown = markdownCodeSpanStateAt(state, s.slice(0, tag.start)); + if (markdown.ticks === 0) { return tag.start; } - offset = tag.end + 1; + if (markdownCodeSpanCloses(s.slice(tag.start), markdown.ticks)) { + offset = tag.end + 1; + continue; + } + if (markdown.fromPrior) { + return HOLD_TOOL_SEGMENT_START; + } + return tag.start; } } +function markdownCodeSpanStateAt(state, text) { + const raw = typeof text === 'string' ? text : ''; + let ticks = state && Number.isInteger(state.markdownCodeSpanTicks) ? state.markdownCodeSpanTicks : 0; + let fromPrior = ticks > 0; + for (let i = 0; i < raw.length;) { + if (raw[i] !== '`') { + i += 1; + continue; + } + const run = countBacktickRun(raw, i); + if (ticks === 0) { + if (run >= 3 && atMarkdownFenceLineStart(raw, i)) { + i += run; + continue; + } + if (state && insideCodeFenceWithState(state, raw.slice(0, i))) { + i += run; + continue; + } + ticks = run; + fromPrior = false; + } else if (run === ticks) { + ticks = 0; + fromPrior = false; + } + i += run; + } + return { ticks, fromPrior }; +} + +function markdownCodeSpanCloses(text, ticks) { + const raw = typeof text === 'string' ? text : ''; + if (!Number.isInteger(ticks) || ticks <= 0) { + return false; + } + for (let i = 0; i < raw.length;) { + if (raw[i] !== '`') { + i += 1; + continue; + } + const run = countBacktickRun(raw, i); + if (run === ticks) { + return true; + } + i += run; + } + return false; +} + +function shouldResetUnclosedMarkdownPrefix(state, prefix, suffix) { + const markdown = markdownCodeSpanStateAt(state, prefix); + return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks); +} + +function countBacktickRun(text, start) { + let count = 0; + while (start + count < text.length && text[start + count] === '`') { + count += 1; + } + return count; +} + +function atMarkdownFenceLineStart(text, idx) { + for (let i = idx - 1; i >= 0; i -= 1) { + const ch = text[i]; + if (ch === ' ' || ch === '\t') { + continue; + } + return ch === '\n' || ch === '\r'; + } + return true; +} + function consumeToolCapture(state, toolNames) { const captured = state.capture || ''; if (!captured) { diff --git a/internal/toolstream/fence_edge_sieve_test.go b/internal/toolstream/fence_edge_sieve_test.go index 81e6f94..035108d 100644 --- a/internal/toolstream/fence_edge_sieve_test.go +++ b/internal/toolstream/fence_edge_sieve_test.go @@ -122,3 +122,58 @@ func TestProcessToolSieveInlineMarkdownToolCallSplitAcrossChunksDoesNotTrigger(t t.Fatalf("expected inline example text preserved, got %q", textContent.String()) } } + +func TestProcessToolSieveUnclosedInlineMarkdownBeforeToolDoesTrigger(t *testing.T) { + var state State + input := "note with stray ` before real call " + + "real.md" + + var events []Event + events = append(events, ProcessChunk(&state, input, []string{"read_file"})...) + events = append(events, Flush(&state, []string{"read_file"})...) + + var textContent strings.Builder + var calls []string + for _, evt := range events { + textContent.WriteString(evt.Content) + for _, call := range evt.ToolCalls { + if path, _ := call.Input["path"].(string); path != "" { + calls = append(calls, path) + } + } + } + + if len(calls) != 1 || calls[0] != "real.md" { + t.Fatalf("expected real tool call after stray backtick, got %#v from events %#v", calls, events) + } + if !strings.Contains(textContent.String(), "stray ` before real call") { + t.Fatalf("expected stray-backtick prefix preserved, got %q", textContent.String()) + } +} + +func TestProcessToolSieveUnclosedInlineMarkdownBeforeSplitToolDoesTriggerOnFlush(t *testing.T) { + var state State + chunks := []string{ + "note with stray ` before real call ", + "real.md", + } + + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"read_file"})...) + } + events = append(events, Flush(&state, []string{"read_file"})...) + + var calls []string + for _, evt := range events { + for _, call := range evt.ToolCalls { + if path, _ := call.Input["path"].(string); path != "" { + calls = append(calls, path) + } + } + } + + if len(calls) != 1 || calls[0] != "real.md" { + t.Fatalf("expected split real tool call after stray backtick, got %#v from events %#v", calls, events) + } +} diff --git a/internal/toolstream/tool_sieve_core.go b/internal/toolstream/tool_sieve_core.go index 3be88ef..a0791f4 100644 --- a/internal/toolstream/tool_sieve_core.go +++ b/internal/toolstream/tool_sieve_core.go @@ -57,10 +57,17 @@ func ProcessChunk(state *State, chunk string, toolNames []string) []Event { break } start := findToolSegmentStart(state, pending) + if start == holdToolSegmentStart { + break + } if start >= 0 { prefix := pending[:start] if prefix != "" { + resetMarkdownSpan := shouldResetUnclosedMarkdownPrefix(state, prefix, pending[start:]) state.noteText(prefix) + if resetMarkdownSpan { + state.markdownCodeSpanTicks = 0 + } events = append(events, Event{Content: prefix}) } state.pending.Reset() @@ -88,6 +95,13 @@ func Flush(state *State, toolNames []string) []Event { return nil } events := ProcessChunk(state, "", toolNames) + if state.pending.Len() > 0 && state.markdownCodeSpanTicks > 0 { + // At end of stream, an unmatched backtick is literal Markdown text. + // Re-scan pending content so a real tool call after that stray + // backtick is not permanently hidden by inline-code state. + state.markdownCodeSpanTicks = 0 + events = append(events, ProcessChunk(state, "", toolNames)...) + } if len(state.pendingToolCalls) > 0 { events = append(events, Event{ToolCalls: state.pendingToolCalls}) state.pendingToolRaw = "" @@ -155,9 +169,18 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string return "", "" } if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 { - if insideCodeFenceWithState(state, s[:xmlIdx]) || insideMarkdownCodeSpanWithState(state, s[:xmlIdx]) { + if insideCodeFenceWithState(state, s[:xmlIdx]) { return s, "" } + markdown := markdownCodeSpanStateAt(state, s[:xmlIdx]) + if markdown.ticks > 0 { + if markdownCodeSpanCloses(s[xmlIdx:], markdown.ticks) { + return s, "" + } + if markdown.fromPrior { + return "", s + } + } if xmlIdx > 0 { return s[:xmlIdx], s[xmlIdx:] } @@ -166,6 +189,8 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string return s, "" } +const holdToolSegmentStart = -2 + func findToolSegmentStart(state *State, s string) int { if s == "" { return -1 @@ -177,13 +202,86 @@ func findToolSegmentStart(state *State, s string) int { return -1 } start := includeDuplicateLeadingLessThan(s, tag.Start) - if !insideCodeFenceWithState(state, s[:start]) && !insideMarkdownCodeSpanWithState(state, s[:start]) { + if insideCodeFenceWithState(state, s[:start]) { + offset = tag.End + 1 + continue + } + markdown := markdownCodeSpanStateAt(state, s[:start]) + if markdown.ticks == 0 { return start } - offset = tag.End + 1 + if markdownCodeSpanCloses(s[start:], markdown.ticks) { + offset = tag.End + 1 + continue + } + if markdown.fromPrior { + return holdToolSegmentStart + } + return start } } +type markdownCodeSpanScan struct { + ticks int + fromPrior bool +} + +func markdownCodeSpanStateAt(state *State, text string) markdownCodeSpanScan { + ticks := 0 + fromPrior := false + if state != nil && state.markdownCodeSpanTicks > 0 { + ticks = state.markdownCodeSpanTicks + fromPrior = true + } + for i := 0; i < len(text); { + if text[i] != '`' { + i++ + continue + } + run := countBacktickRun(text, i) + if ticks == 0 { + if run >= 3 && atMarkdownFenceLineStart(text, i) { + i += run + continue + } + if state != nil && insideCodeFenceWithState(state, text[:i]) { + i += run + continue + } + ticks = run + fromPrior = false + } else if run == ticks { + ticks = 0 + fromPrior = false + } + i += run + } + return markdownCodeSpanScan{ticks: ticks, fromPrior: fromPrior} +} + +func markdownCodeSpanCloses(text string, ticks int) bool { + if ticks <= 0 { + return false + } + for i := 0; i < len(text); { + if text[i] != '`' { + i++ + continue + } + run := countBacktickRun(text, i) + if run == ticks { + return true + } + i += run + } + return false +} + +func shouldResetUnclosedMarkdownPrefix(state *State, prefix, suffix string) bool { + markdown := markdownCodeSpanStateAt(state, prefix) + return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks) +} + func includeDuplicateLeadingLessThan(s string, idx int) int { for idx > 0 && s[idx-1] == '<' { idx-- diff --git a/internal/toolstream/tool_sieve_state.go b/internal/toolstream/tool_sieve_state.go index e3d407b..2c1711c 100644 --- a/internal/toolstream/tool_sieve_state.go +++ b/internal/toolstream/tool_sieve_state.go @@ -80,13 +80,6 @@ func insideCodeFence(text string) bool { return len(simulateCodeFenceState(nil, 0, 0, true, text).stack) > 0 } -func insideMarkdownCodeSpanWithState(state *State, text string) bool { - if state == nil { - return simulateMarkdownCodeSpanTicks(nil, 0, text) > 0 - } - return simulateMarkdownCodeSpanTicks(state, state.markdownCodeSpanTicks, text) > 0 -} - func updateMarkdownCodeSpanState(state *State, text string) { if state == nil || !hasMeaningfulText(text) { return diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index f23777d..c5f09d8 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -632,6 +632,27 @@ test('sieve ignores inline markdown tool example split across chunks', () => { assert.equal(text.includes('完毕'), true); }); +test('sieve emits real tool after unclosed inline markdown in same chunk', () => { + const events = runSieve([ + 'note with stray ` before real call real.md', + ], ['read_file']); + const text = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].input.path, 'real.md'); + assert.equal(text.includes('stray ` before real call'), true); +}); + +test('sieve emits real tool after unclosed inline markdown across chunks', () => { + const events = runSieve([ + 'note with stray ` before real call ', + 'real.md', + ], ['read_file']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].input.path, 'real.md'); +}); + test('sieve emits real tool after split inline markdown tool example closes', () => { const events = runSieve([ '示例:`',