diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index 18898d8..961211c 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -3,7 +3,6 @@ const { resetIncrementalToolState, noteText, insideCodeFenceWithState, - insideMarkdownCodeSpanWithState, } = require('./state'); const { trimWrappingJSONFence } = require('./jsonscan'); const { @@ -71,10 +70,17 @@ function processToolSieveChunk(state, chunk, toolNames) { break; } const start = findToolSegmentStart(state, pending); + if (start === HOLD_TOOL_SEGMENT_START) { + break; + } if (start >= 0) { const prefix = pending.slice(0, start); if (prefix) { + const resetMarkdownSpan = shouldResetUnclosedMarkdownPrefix(state, prefix, pending.slice(start)); noteText(state, prefix); + if (resetMarkdownSpan) { + state.markdownCodeSpanTicks = 0; + } events.push({ type: 'text', text: prefix }); } state.pending = ''; @@ -99,6 +105,10 @@ function flushToolSieve(state, toolNames) { return []; } const events = processToolSieveChunk(state, '', toolNames); + if (state.pending && Number.isInteger(state.markdownCodeSpanTicks) && state.markdownCodeSpanTicks > 0) { + state.markdownCodeSpanTicks = 0; + events.push(...processToolSieveChunk(state, '', toolNames)); + } if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) { events.push({ type: 'tool_calls', calls: state.pendingToolCalls }); state.pendingToolRaw = ''; @@ -162,9 +172,18 @@ function splitSafeContentForToolDetection(state, s) { // Only hold back partial XML tool tags. const xmlIdx = findPartialXMLToolTagStart(text); if (xmlIdx >= 0) { - if (insideCodeFenceWithState(state, text.slice(0, xmlIdx)) || insideMarkdownCodeSpanWithState(state, text.slice(0, xmlIdx))) { + if (insideCodeFenceWithState(state, text.slice(0, xmlIdx))) { return [text, '']; } + const markdown = markdownCodeSpanStateAt(state, text.slice(0, xmlIdx)); + if (markdown.ticks > 0) { + if (markdownCodeSpanCloses(text.slice(xmlIdx), markdown.ticks)) { + return [text, '']; + } + if (markdown.fromPrior) { + return ['', text]; + } + } if (xmlIdx > 0) { return [text.slice(0, xmlIdx), text.slice(xmlIdx)]; } @@ -173,6 +192,8 @@ function splitSafeContentForToolDetection(state, s) { return [text, '']; } +const HOLD_TOOL_SEGMENT_START = -2; + function findToolSegmentStart(state, s) { if (!s) { return -1; @@ -183,13 +204,98 @@ function findToolSegmentStart(state, s) { if (!tag) { return -1; } - if (!insideCodeFenceWithState(state, s.slice(0, tag.start)) && !insideMarkdownCodeSpanWithState(state, s.slice(0, tag.start))) { + if (insideCodeFenceWithState(state, s.slice(0, tag.start))) { + offset = tag.end + 1; + continue; + } + const markdown = markdownCodeSpanStateAt(state, s.slice(0, tag.start)); + if (markdown.ticks === 0) { return tag.start; } - offset = tag.end + 1; + if (markdownCodeSpanCloses(s.slice(tag.start), markdown.ticks)) { + offset = tag.end + 1; + continue; + } + if (markdown.fromPrior) { + return HOLD_TOOL_SEGMENT_START; + } + return tag.start; } } +function markdownCodeSpanStateAt(state, text) { + const raw = typeof text === 'string' ? text : ''; + let ticks = state && Number.isInteger(state.markdownCodeSpanTicks) ? state.markdownCodeSpanTicks : 0; + let fromPrior = ticks > 0; + for (let i = 0; i < raw.length;) { + if (raw[i] !== '`') { + i += 1; + continue; + } + const run = countBacktickRun(raw, i); + if (ticks === 0) { + if (run >= 3 && atMarkdownFenceLineStart(raw, i)) { + i += run; + continue; + } + if (state && insideCodeFenceWithState(state, raw.slice(0, i))) { + i += run; + continue; + } + ticks = run; + fromPrior = false; + } else if (run === ticks) { + ticks = 0; + fromPrior = false; + } + i += run; + } + return { ticks, fromPrior }; +} + +function markdownCodeSpanCloses(text, ticks) { + const raw = typeof text === 'string' ? text : ''; + if (!Number.isInteger(ticks) || ticks <= 0) { + return false; + } + for (let i = 0; i < raw.length;) { + if (raw[i] !== '`') { + i += 1; + continue; + } + const run = countBacktickRun(raw, i); + if (run === ticks) { + return true; + } + i += run; + } + return false; +} + +function shouldResetUnclosedMarkdownPrefix(state, prefix, suffix) { + const markdown = markdownCodeSpanStateAt(state, prefix); + return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks); +} + +function countBacktickRun(text, start) { + let count = 0; + while (start + count < text.length && text[start + count] === '`') { + count += 1; + } + return count; +} + +function atMarkdownFenceLineStart(text, idx) { + for (let i = idx - 1; i >= 0; i -= 1) { + const ch = text[i]; + if (ch === ' ' || ch === '\t') { + continue; + } + return ch === '\n' || ch === '\r'; + } + return true; +} + function consumeToolCapture(state, toolNames) { const captured = state.capture || ''; if (!captured) { diff --git a/internal/toolstream/fence_edge_sieve_test.go b/internal/toolstream/fence_edge_sieve_test.go index 81e6f94..035108d 100644 --- a/internal/toolstream/fence_edge_sieve_test.go +++ b/internal/toolstream/fence_edge_sieve_test.go @@ -122,3 +122,58 @@ func TestProcessToolSieveInlineMarkdownToolCallSplitAcrossChunksDoesNotTrigger(t t.Fatalf("expected inline example text preserved, got %q", textContent.String()) } } + +func TestProcessToolSieveUnclosedInlineMarkdownBeforeToolDoesTrigger(t *testing.T) { + var state State + input := "note with stray ` before real call " + + "real.md" + + var events []Event + events = append(events, ProcessChunk(&state, input, []string{"read_file"})...) + events = append(events, Flush(&state, []string{"read_file"})...) + + var textContent strings.Builder + var calls []string + for _, evt := range events { + textContent.WriteString(evt.Content) + for _, call := range evt.ToolCalls { + if path, _ := call.Input["path"].(string); path != "" { + calls = append(calls, path) + } + } + } + + if len(calls) != 1 || calls[0] != "real.md" { + t.Fatalf("expected real tool call after stray backtick, got %#v from events %#v", calls, events) + } + if !strings.Contains(textContent.String(), "stray ` before real call") { + t.Fatalf("expected stray-backtick prefix preserved, got %q", textContent.String()) + } +} + +func TestProcessToolSieveUnclosedInlineMarkdownBeforeSplitToolDoesTriggerOnFlush(t *testing.T) { + var state State + chunks := []string{ + "note with stray ` before real call ", + "real.md", + } + + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"read_file"})...) + } + events = append(events, Flush(&state, []string{"read_file"})...) + + var calls []string + for _, evt := range events { + for _, call := range evt.ToolCalls { + if path, _ := call.Input["path"].(string); path != "" { + calls = append(calls, path) + } + } + } + + if len(calls) != 1 || calls[0] != "real.md" { + t.Fatalf("expected split real tool call after stray backtick, got %#v from events %#v", calls, events) + } +} diff --git a/internal/toolstream/tool_sieve_core.go b/internal/toolstream/tool_sieve_core.go index 3be88ef..a0791f4 100644 --- a/internal/toolstream/tool_sieve_core.go +++ b/internal/toolstream/tool_sieve_core.go @@ -57,10 +57,17 @@ func ProcessChunk(state *State, chunk string, toolNames []string) []Event { break } start := findToolSegmentStart(state, pending) + if start == holdToolSegmentStart { + break + } if start >= 0 { prefix := pending[:start] if prefix != "" { + resetMarkdownSpan := shouldResetUnclosedMarkdownPrefix(state, prefix, pending[start:]) state.noteText(prefix) + if resetMarkdownSpan { + state.markdownCodeSpanTicks = 0 + } events = append(events, Event{Content: prefix}) } state.pending.Reset() @@ -88,6 +95,13 @@ func Flush(state *State, toolNames []string) []Event { return nil } events := ProcessChunk(state, "", toolNames) + if state.pending.Len() > 0 && state.markdownCodeSpanTicks > 0 { + // At end of stream, an unmatched backtick is literal Markdown text. + // Re-scan pending content so a real tool call after that stray + // backtick is not permanently hidden by inline-code state. + state.markdownCodeSpanTicks = 0 + events = append(events, ProcessChunk(state, "", toolNames)...) + } if len(state.pendingToolCalls) > 0 { events = append(events, Event{ToolCalls: state.pendingToolCalls}) state.pendingToolRaw = "" @@ -155,9 +169,18 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string return "", "" } if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 { - if insideCodeFenceWithState(state, s[:xmlIdx]) || insideMarkdownCodeSpanWithState(state, s[:xmlIdx]) { + if insideCodeFenceWithState(state, s[:xmlIdx]) { return s, "" } + markdown := markdownCodeSpanStateAt(state, s[:xmlIdx]) + if markdown.ticks > 0 { + if markdownCodeSpanCloses(s[xmlIdx:], markdown.ticks) { + return s, "" + } + if markdown.fromPrior { + return "", s + } + } if xmlIdx > 0 { return s[:xmlIdx], s[xmlIdx:] } @@ -166,6 +189,8 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string return s, "" } +const holdToolSegmentStart = -2 + func findToolSegmentStart(state *State, s string) int { if s == "" { return -1 @@ -177,13 +202,86 @@ func findToolSegmentStart(state *State, s string) int { return -1 } start := includeDuplicateLeadingLessThan(s, tag.Start) - if !insideCodeFenceWithState(state, s[:start]) && !insideMarkdownCodeSpanWithState(state, s[:start]) { + if insideCodeFenceWithState(state, s[:start]) { + offset = tag.End + 1 + continue + } + markdown := markdownCodeSpanStateAt(state, s[:start]) + if markdown.ticks == 0 { return start } - offset = tag.End + 1 + if markdownCodeSpanCloses(s[start:], markdown.ticks) { + offset = tag.End + 1 + continue + } + if markdown.fromPrior { + return holdToolSegmentStart + } + return start } } +type markdownCodeSpanScan struct { + ticks int + fromPrior bool +} + +func markdownCodeSpanStateAt(state *State, text string) markdownCodeSpanScan { + ticks := 0 + fromPrior := false + if state != nil && state.markdownCodeSpanTicks > 0 { + ticks = state.markdownCodeSpanTicks + fromPrior = true + } + for i := 0; i < len(text); { + if text[i] != '`' { + i++ + continue + } + run := countBacktickRun(text, i) + if ticks == 0 { + if run >= 3 && atMarkdownFenceLineStart(text, i) { + i += run + continue + } + if state != nil && insideCodeFenceWithState(state, text[:i]) { + i += run + continue + } + ticks = run + fromPrior = false + } else if run == ticks { + ticks = 0 + fromPrior = false + } + i += run + } + return markdownCodeSpanScan{ticks: ticks, fromPrior: fromPrior} +} + +func markdownCodeSpanCloses(text string, ticks int) bool { + if ticks <= 0 { + return false + } + for i := 0; i < len(text); { + if text[i] != '`' { + i++ + continue + } + run := countBacktickRun(text, i) + if run == ticks { + return true + } + i += run + } + return false +} + +func shouldResetUnclosedMarkdownPrefix(state *State, prefix, suffix string) bool { + markdown := markdownCodeSpanStateAt(state, prefix) + return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks) +} + func includeDuplicateLeadingLessThan(s string, idx int) int { for idx > 0 && s[idx-1] == '<' { idx-- diff --git a/internal/toolstream/tool_sieve_state.go b/internal/toolstream/tool_sieve_state.go index e3d407b..2c1711c 100644 --- a/internal/toolstream/tool_sieve_state.go +++ b/internal/toolstream/tool_sieve_state.go @@ -80,13 +80,6 @@ func insideCodeFence(text string) bool { return len(simulateCodeFenceState(nil, 0, 0, true, text).stack) > 0 } -func insideMarkdownCodeSpanWithState(state *State, text string) bool { - if state == nil { - return simulateMarkdownCodeSpanTicks(nil, 0, text) > 0 - } - return simulateMarkdownCodeSpanTicks(state, state.markdownCodeSpanTicks, text) > 0 -} - func updateMarkdownCodeSpanState(state *State, text string) { if state == nil || !hasMeaningfulText(text) { return diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index f23777d..c5f09d8 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -632,6 +632,27 @@ test('sieve ignores inline markdown tool example split across chunks', () => { assert.equal(text.includes('完毕'), true); }); +test('sieve emits real tool after unclosed inline markdown in same chunk', () => { + const events = runSieve([ + 'note with stray ` before real call real.md', + ], ['read_file']); + const text = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].input.path, 'real.md'); + assert.equal(text.includes('stray ` before real call'), true); +}); + +test('sieve emits real tool after unclosed inline markdown across chunks', () => { + const events = runSieve([ + 'note with stray ` before real call ', + 'real.md', + ], ['read_file']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].input.path, 'real.md'); +}); + test('sieve emits real tool after split inline markdown tool example closes', () => { const events = runSieve([ '示例:`',