Fix tool detection when unclosed backtick precedes tool call

Handles cases where a stray backtick opens an inline code span but is never closed.
Previously, any subsequent XML tool tag was treated as inside markdown code and ignored.
Now, tool tags are detected after an unclosed backtick, and the markdown state is reset
when the backtick is confirmed to be literal text at stream boundaries.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
CJACK
2026-05-10 18:41:51 +08:00
parent 8623920c89
commit 77a47ada4e
5 changed files with 287 additions and 14 deletions

View File

@@ -3,7 +3,6 @@ const {
resetIncrementalToolState,
noteText,
insideCodeFenceWithState,
insideMarkdownCodeSpanWithState,
} = require('./state');
const { trimWrappingJSONFence } = require('./jsonscan');
const {
@@ -71,10 +70,17 @@ function processToolSieveChunk(state, chunk, toolNames) {
break;
}
const start = findToolSegmentStart(state, pending);
if (start === HOLD_TOOL_SEGMENT_START) {
break;
}
if (start >= 0) {
const prefix = pending.slice(0, start);
if (prefix) {
const resetMarkdownSpan = shouldResetUnclosedMarkdownPrefix(state, prefix, pending.slice(start));
noteText(state, prefix);
if (resetMarkdownSpan) {
state.markdownCodeSpanTicks = 0;
}
events.push({ type: 'text', text: prefix });
}
state.pending = '';
@@ -99,6 +105,10 @@ function flushToolSieve(state, toolNames) {
return [];
}
const events = processToolSieveChunk(state, '', toolNames);
if (state.pending && Number.isInteger(state.markdownCodeSpanTicks) && state.markdownCodeSpanTicks > 0) {
state.markdownCodeSpanTicks = 0;
events.push(...processToolSieveChunk(state, '', toolNames));
}
if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) {
events.push({ type: 'tool_calls', calls: state.pendingToolCalls });
state.pendingToolRaw = '';
@@ -162,9 +172,18 @@ function splitSafeContentForToolDetection(state, s) {
// Only hold back partial XML tool tags.
const xmlIdx = findPartialXMLToolTagStart(text);
if (xmlIdx >= 0) {
if (insideCodeFenceWithState(state, text.slice(0, xmlIdx)) || insideMarkdownCodeSpanWithState(state, text.slice(0, xmlIdx))) {
if (insideCodeFenceWithState(state, text.slice(0, xmlIdx))) {
return [text, ''];
}
const markdown = markdownCodeSpanStateAt(state, text.slice(0, xmlIdx));
if (markdown.ticks > 0) {
if (markdownCodeSpanCloses(text.slice(xmlIdx), markdown.ticks)) {
return [text, ''];
}
if (markdown.fromPrior) {
return ['', text];
}
}
if (xmlIdx > 0) {
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
}
@@ -173,6 +192,8 @@ function splitSafeContentForToolDetection(state, s) {
return [text, ''];
}
const HOLD_TOOL_SEGMENT_START = -2;
function findToolSegmentStart(state, s) {
if (!s) {
return -1;
@@ -183,13 +204,98 @@ function findToolSegmentStart(state, s) {
if (!tag) {
return -1;
}
if (!insideCodeFenceWithState(state, s.slice(0, tag.start)) && !insideMarkdownCodeSpanWithState(state, s.slice(0, tag.start))) {
if (insideCodeFenceWithState(state, s.slice(0, tag.start))) {
offset = tag.end + 1;
continue;
}
const markdown = markdownCodeSpanStateAt(state, s.slice(0, tag.start));
if (markdown.ticks === 0) {
return tag.start;
}
offset = tag.end + 1;
if (markdownCodeSpanCloses(s.slice(tag.start), markdown.ticks)) {
offset = tag.end + 1;
continue;
}
if (markdown.fromPrior) {
return HOLD_TOOL_SEGMENT_START;
}
return tag.start;
}
}
function markdownCodeSpanStateAt(state, text) {
const raw = typeof text === 'string' ? text : '';
let ticks = state && Number.isInteger(state.markdownCodeSpanTicks) ? state.markdownCodeSpanTicks : 0;
let fromPrior = ticks > 0;
for (let i = 0; i < raw.length;) {
if (raw[i] !== '`') {
i += 1;
continue;
}
const run = countBacktickRun(raw, i);
if (ticks === 0) {
if (run >= 3 && atMarkdownFenceLineStart(raw, i)) {
i += run;
continue;
}
if (state && insideCodeFenceWithState(state, raw.slice(0, i))) {
i += run;
continue;
}
ticks = run;
fromPrior = false;
} else if (run === ticks) {
ticks = 0;
fromPrior = false;
}
i += run;
}
return { ticks, fromPrior };
}
function markdownCodeSpanCloses(text, ticks) {
const raw = typeof text === 'string' ? text : '';
if (!Number.isInteger(ticks) || ticks <= 0) {
return false;
}
for (let i = 0; i < raw.length;) {
if (raw[i] !== '`') {
i += 1;
continue;
}
const run = countBacktickRun(raw, i);
if (run === ticks) {
return true;
}
i += run;
}
return false;
}
function shouldResetUnclosedMarkdownPrefix(state, prefix, suffix) {
const markdown = markdownCodeSpanStateAt(state, prefix);
return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks);
}
function countBacktickRun(text, start) {
let count = 0;
while (start + count < text.length && text[start + count] === '`') {
count += 1;
}
return count;
}
function atMarkdownFenceLineStart(text, idx) {
for (let i = idx - 1; i >= 0; i -= 1) {
const ch = text[i];
if (ch === ' ' || ch === '\t') {
continue;
}
return ch === '\n' || ch === '\r';
}
return true;
}
function consumeToolCapture(state, toolNames) {
const captured = state.capture || '';
if (!captured) {

View File

@@ -122,3 +122,58 @@ func TestProcessToolSieveInlineMarkdownToolCallSplitAcrossChunksDoesNotTrigger(t
t.Fatalf("expected inline example text preserved, got %q", textContent.String())
}
}
func TestProcessToolSieveUnclosedInlineMarkdownBeforeToolDoesTrigger(t *testing.T) {
var state State
input := "note with stray ` before real call " +
"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">real.md</parameter></invoke></tool_calls>"
var events []Event
events = append(events, ProcessChunk(&state, input, []string{"read_file"})...)
events = append(events, Flush(&state, []string{"read_file"})...)
var textContent strings.Builder
var calls []string
for _, evt := range events {
textContent.WriteString(evt.Content)
for _, call := range evt.ToolCalls {
if path, _ := call.Input["path"].(string); path != "" {
calls = append(calls, path)
}
}
}
if len(calls) != 1 || calls[0] != "real.md" {
t.Fatalf("expected real tool call after stray backtick, got %#v from events %#v", calls, events)
}
if !strings.Contains(textContent.String(), "stray ` before real call") {
t.Fatalf("expected stray-backtick prefix preserved, got %q", textContent.String())
}
}
func TestProcessToolSieveUnclosedInlineMarkdownBeforeSplitToolDoesTriggerOnFlush(t *testing.T) {
var state State
chunks := []string{
"note with stray ` before real call ",
"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">real.md</parameter></invoke></tool_calls>",
}
var events []Event
for _, c := range chunks {
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
}
events = append(events, Flush(&state, []string{"read_file"})...)
var calls []string
for _, evt := range events {
for _, call := range evt.ToolCalls {
if path, _ := call.Input["path"].(string); path != "" {
calls = append(calls, path)
}
}
}
if len(calls) != 1 || calls[0] != "real.md" {
t.Fatalf("expected split real tool call after stray backtick, got %#v from events %#v", calls, events)
}
}

View File

@@ -57,10 +57,17 @@ func ProcessChunk(state *State, chunk string, toolNames []string) []Event {
break
}
start := findToolSegmentStart(state, pending)
if start == holdToolSegmentStart {
break
}
if start >= 0 {
prefix := pending[:start]
if prefix != "" {
resetMarkdownSpan := shouldResetUnclosedMarkdownPrefix(state, prefix, pending[start:])
state.noteText(prefix)
if resetMarkdownSpan {
state.markdownCodeSpanTicks = 0
}
events = append(events, Event{Content: prefix})
}
state.pending.Reset()
@@ -88,6 +95,13 @@ func Flush(state *State, toolNames []string) []Event {
return nil
}
events := ProcessChunk(state, "", toolNames)
if state.pending.Len() > 0 && state.markdownCodeSpanTicks > 0 {
// At end of stream, an unmatched backtick is literal Markdown text.
// Re-scan pending content so a real tool call after that stray
// backtick is not permanently hidden by inline-code state.
state.markdownCodeSpanTicks = 0
events = append(events, ProcessChunk(state, "", toolNames)...)
}
if len(state.pendingToolCalls) > 0 {
events = append(events, Event{ToolCalls: state.pendingToolCalls})
state.pendingToolRaw = ""
@@ -155,9 +169,18 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string
return "", ""
}
if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 {
if insideCodeFenceWithState(state, s[:xmlIdx]) || insideMarkdownCodeSpanWithState(state, s[:xmlIdx]) {
if insideCodeFenceWithState(state, s[:xmlIdx]) {
return s, ""
}
markdown := markdownCodeSpanStateAt(state, s[:xmlIdx])
if markdown.ticks > 0 {
if markdownCodeSpanCloses(s[xmlIdx:], markdown.ticks) {
return s, ""
}
if markdown.fromPrior {
return "", s
}
}
if xmlIdx > 0 {
return s[:xmlIdx], s[xmlIdx:]
}
@@ -166,6 +189,8 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string
return s, ""
}
const holdToolSegmentStart = -2
func findToolSegmentStart(state *State, s string) int {
if s == "" {
return -1
@@ -177,13 +202,86 @@ func findToolSegmentStart(state *State, s string) int {
return -1
}
start := includeDuplicateLeadingLessThan(s, tag.Start)
if !insideCodeFenceWithState(state, s[:start]) && !insideMarkdownCodeSpanWithState(state, s[:start]) {
if insideCodeFenceWithState(state, s[:start]) {
offset = tag.End + 1
continue
}
markdown := markdownCodeSpanStateAt(state, s[:start])
if markdown.ticks == 0 {
return start
}
offset = tag.End + 1
if markdownCodeSpanCloses(s[start:], markdown.ticks) {
offset = tag.End + 1
continue
}
if markdown.fromPrior {
return holdToolSegmentStart
}
return start
}
}
type markdownCodeSpanScan struct {
ticks int
fromPrior bool
}
func markdownCodeSpanStateAt(state *State, text string) markdownCodeSpanScan {
ticks := 0
fromPrior := false
if state != nil && state.markdownCodeSpanTicks > 0 {
ticks = state.markdownCodeSpanTicks
fromPrior = true
}
for i := 0; i < len(text); {
if text[i] != '`' {
i++
continue
}
run := countBacktickRun(text, i)
if ticks == 0 {
if run >= 3 && atMarkdownFenceLineStart(text, i) {
i += run
continue
}
if state != nil && insideCodeFenceWithState(state, text[:i]) {
i += run
continue
}
ticks = run
fromPrior = false
} else if run == ticks {
ticks = 0
fromPrior = false
}
i += run
}
return markdownCodeSpanScan{ticks: ticks, fromPrior: fromPrior}
}
func markdownCodeSpanCloses(text string, ticks int) bool {
if ticks <= 0 {
return false
}
for i := 0; i < len(text); {
if text[i] != '`' {
i++
continue
}
run := countBacktickRun(text, i)
if run == ticks {
return true
}
i += run
}
return false
}
func shouldResetUnclosedMarkdownPrefix(state *State, prefix, suffix string) bool {
markdown := markdownCodeSpanStateAt(state, prefix)
return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks)
}
func includeDuplicateLeadingLessThan(s string, idx int) int {
for idx > 0 && s[idx-1] == '<' {
idx--

View File

@@ -80,13 +80,6 @@ func insideCodeFence(text string) bool {
return len(simulateCodeFenceState(nil, 0, 0, true, text).stack) > 0
}
func insideMarkdownCodeSpanWithState(state *State, text string) bool {
if state == nil {
return simulateMarkdownCodeSpanTicks(nil, 0, text) > 0
}
return simulateMarkdownCodeSpanTicks(state, state.markdownCodeSpanTicks, text) > 0
}
func updateMarkdownCodeSpanState(state *State, text string) {
if state == nil || !hasMeaningfulText(text) {
return

View File

@@ -632,6 +632,27 @@ test('sieve ignores inline markdown tool example split across chunks', () => {
assert.equal(text.includes('完毕'), true);
});
test('sieve emits real tool after unclosed inline markdown in same chunk', () => {
const events = runSieve([
'note with stray ` before real call <tool_calls><invoke name="read_file"><parameter name="path">real.md</parameter></invoke></tool_calls>',
], ['read_file']);
const text = collectText(events);
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
assert.equal(finalCalls.length, 1);
assert.equal(finalCalls[0].input.path, 'real.md');
assert.equal(text.includes('stray ` before real call'), true);
});
test('sieve emits real tool after unclosed inline markdown across chunks', () => {
const events = runSieve([
'note with stray ` before real call ',
'<tool_calls><invoke name="read_file"><parameter name="path">real.md</parameter></invoke></tool_calls>',
], ['read_file']);
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
assert.equal(finalCalls.length, 1);
assert.equal(finalCalls[0].input.path, 'real.md');
});
test('sieve emits real tool after split inline markdown tool example closes', () => {
const events = runSieve([
'示例:`',