mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-10 19:27:41 +08:00
Fix tool detection when unclosed backtick precedes tool call
Handles cases where a stray backtick opens an inline code span but is never closed. Previously, any subsequent XML tool tag was treated as inside markdown code and ignored. Now, tool tags are detected after an unclosed backtick, and the markdown state is reset when the backtick is confirmed to be literal text at stream boundaries. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,6 @@ const {
|
||||
resetIncrementalToolState,
|
||||
noteText,
|
||||
insideCodeFenceWithState,
|
||||
insideMarkdownCodeSpanWithState,
|
||||
} = require('./state');
|
||||
const { trimWrappingJSONFence } = require('./jsonscan');
|
||||
const {
|
||||
@@ -71,10 +70,17 @@ function processToolSieveChunk(state, chunk, toolNames) {
|
||||
break;
|
||||
}
|
||||
const start = findToolSegmentStart(state, pending);
|
||||
if (start === HOLD_TOOL_SEGMENT_START) {
|
||||
break;
|
||||
}
|
||||
if (start >= 0) {
|
||||
const prefix = pending.slice(0, start);
|
||||
if (prefix) {
|
||||
const resetMarkdownSpan = shouldResetUnclosedMarkdownPrefix(state, prefix, pending.slice(start));
|
||||
noteText(state, prefix);
|
||||
if (resetMarkdownSpan) {
|
||||
state.markdownCodeSpanTicks = 0;
|
||||
}
|
||||
events.push({ type: 'text', text: prefix });
|
||||
}
|
||||
state.pending = '';
|
||||
@@ -99,6 +105,10 @@ function flushToolSieve(state, toolNames) {
|
||||
return [];
|
||||
}
|
||||
const events = processToolSieveChunk(state, '', toolNames);
|
||||
if (state.pending && Number.isInteger(state.markdownCodeSpanTicks) && state.markdownCodeSpanTicks > 0) {
|
||||
state.markdownCodeSpanTicks = 0;
|
||||
events.push(...processToolSieveChunk(state, '', toolNames));
|
||||
}
|
||||
if (Array.isArray(state.pendingToolCalls) && state.pendingToolCalls.length > 0) {
|
||||
events.push({ type: 'tool_calls', calls: state.pendingToolCalls });
|
||||
state.pendingToolRaw = '';
|
||||
@@ -162,9 +172,18 @@ function splitSafeContentForToolDetection(state, s) {
|
||||
// Only hold back partial XML tool tags.
|
||||
const xmlIdx = findPartialXMLToolTagStart(text);
|
||||
if (xmlIdx >= 0) {
|
||||
if (insideCodeFenceWithState(state, text.slice(0, xmlIdx)) || insideMarkdownCodeSpanWithState(state, text.slice(0, xmlIdx))) {
|
||||
if (insideCodeFenceWithState(state, text.slice(0, xmlIdx))) {
|
||||
return [text, ''];
|
||||
}
|
||||
const markdown = markdownCodeSpanStateAt(state, text.slice(0, xmlIdx));
|
||||
if (markdown.ticks > 0) {
|
||||
if (markdownCodeSpanCloses(text.slice(xmlIdx), markdown.ticks)) {
|
||||
return [text, ''];
|
||||
}
|
||||
if (markdown.fromPrior) {
|
||||
return ['', text];
|
||||
}
|
||||
}
|
||||
if (xmlIdx > 0) {
|
||||
return [text.slice(0, xmlIdx), text.slice(xmlIdx)];
|
||||
}
|
||||
@@ -173,6 +192,8 @@ function splitSafeContentForToolDetection(state, s) {
|
||||
return [text, ''];
|
||||
}
|
||||
|
||||
const HOLD_TOOL_SEGMENT_START = -2;
|
||||
|
||||
function findToolSegmentStart(state, s) {
|
||||
if (!s) {
|
||||
return -1;
|
||||
@@ -183,13 +204,98 @@ function findToolSegmentStart(state, s) {
|
||||
if (!tag) {
|
||||
return -1;
|
||||
}
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, tag.start)) && !insideMarkdownCodeSpanWithState(state, s.slice(0, tag.start))) {
|
||||
if (insideCodeFenceWithState(state, s.slice(0, tag.start))) {
|
||||
offset = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
const markdown = markdownCodeSpanStateAt(state, s.slice(0, tag.start));
|
||||
if (markdown.ticks === 0) {
|
||||
return tag.start;
|
||||
}
|
||||
offset = tag.end + 1;
|
||||
if (markdownCodeSpanCloses(s.slice(tag.start), markdown.ticks)) {
|
||||
offset = tag.end + 1;
|
||||
continue;
|
||||
}
|
||||
if (markdown.fromPrior) {
|
||||
return HOLD_TOOL_SEGMENT_START;
|
||||
}
|
||||
return tag.start;
|
||||
}
|
||||
}
|
||||
|
||||
function markdownCodeSpanStateAt(state, text) {
|
||||
const raw = typeof text === 'string' ? text : '';
|
||||
let ticks = state && Number.isInteger(state.markdownCodeSpanTicks) ? state.markdownCodeSpanTicks : 0;
|
||||
let fromPrior = ticks > 0;
|
||||
for (let i = 0; i < raw.length;) {
|
||||
if (raw[i] !== '`') {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
const run = countBacktickRun(raw, i);
|
||||
if (ticks === 0) {
|
||||
if (run >= 3 && atMarkdownFenceLineStart(raw, i)) {
|
||||
i += run;
|
||||
continue;
|
||||
}
|
||||
if (state && insideCodeFenceWithState(state, raw.slice(0, i))) {
|
||||
i += run;
|
||||
continue;
|
||||
}
|
||||
ticks = run;
|
||||
fromPrior = false;
|
||||
} else if (run === ticks) {
|
||||
ticks = 0;
|
||||
fromPrior = false;
|
||||
}
|
||||
i += run;
|
||||
}
|
||||
return { ticks, fromPrior };
|
||||
}
|
||||
|
||||
function markdownCodeSpanCloses(text, ticks) {
|
||||
const raw = typeof text === 'string' ? text : '';
|
||||
if (!Number.isInteger(ticks) || ticks <= 0) {
|
||||
return false;
|
||||
}
|
||||
for (let i = 0; i < raw.length;) {
|
||||
if (raw[i] !== '`') {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
const run = countBacktickRun(raw, i);
|
||||
if (run === ticks) {
|
||||
return true;
|
||||
}
|
||||
i += run;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function shouldResetUnclosedMarkdownPrefix(state, prefix, suffix) {
|
||||
const markdown = markdownCodeSpanStateAt(state, prefix);
|
||||
return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks);
|
||||
}
|
||||
|
||||
function countBacktickRun(text, start) {
|
||||
let count = 0;
|
||||
while (start + count < text.length && text[start + count] === '`') {
|
||||
count += 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
function atMarkdownFenceLineStart(text, idx) {
|
||||
for (let i = idx - 1; i >= 0; i -= 1) {
|
||||
const ch = text[i];
|
||||
if (ch === ' ' || ch === '\t') {
|
||||
continue;
|
||||
}
|
||||
return ch === '\n' || ch === '\r';
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function consumeToolCapture(state, toolNames) {
|
||||
const captured = state.capture || '';
|
||||
if (!captured) {
|
||||
|
||||
@@ -122,3 +122,58 @@ func TestProcessToolSieveInlineMarkdownToolCallSplitAcrossChunksDoesNotTrigger(t
|
||||
t.Fatalf("expected inline example text preserved, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveUnclosedInlineMarkdownBeforeToolDoesTrigger(t *testing.T) {
|
||||
var state State
|
||||
input := "note with stray ` before real call " +
|
||||
"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">real.md</parameter></invoke></tool_calls>"
|
||||
|
||||
var events []Event
|
||||
events = append(events, ProcessChunk(&state, input, []string{"read_file"})...)
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
var calls []string
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
for _, call := range evt.ToolCalls {
|
||||
if path, _ := call.Input["path"].(string); path != "" {
|
||||
calls = append(calls, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(calls) != 1 || calls[0] != "real.md" {
|
||||
t.Fatalf("expected real tool call after stray backtick, got %#v from events %#v", calls, events)
|
||||
}
|
||||
if !strings.Contains(textContent.String(), "stray ` before real call") {
|
||||
t.Fatalf("expected stray-backtick prefix preserved, got %q", textContent.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveUnclosedInlineMarkdownBeforeSplitToolDoesTriggerOnFlush(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"note with stray ` before real call ",
|
||||
"<tool_calls><invoke name=\"read_file\"><parameter name=\"path\">real.md</parameter></invoke></tool_calls>",
|
||||
}
|
||||
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var calls []string
|
||||
for _, evt := range events {
|
||||
for _, call := range evt.ToolCalls {
|
||||
if path, _ := call.Input["path"].(string); path != "" {
|
||||
calls = append(calls, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(calls) != 1 || calls[0] != "real.md" {
|
||||
t.Fatalf("expected split real tool call after stray backtick, got %#v from events %#v", calls, events)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,10 +57,17 @@ func ProcessChunk(state *State, chunk string, toolNames []string) []Event {
|
||||
break
|
||||
}
|
||||
start := findToolSegmentStart(state, pending)
|
||||
if start == holdToolSegmentStart {
|
||||
break
|
||||
}
|
||||
if start >= 0 {
|
||||
prefix := pending[:start]
|
||||
if prefix != "" {
|
||||
resetMarkdownSpan := shouldResetUnclosedMarkdownPrefix(state, prefix, pending[start:])
|
||||
state.noteText(prefix)
|
||||
if resetMarkdownSpan {
|
||||
state.markdownCodeSpanTicks = 0
|
||||
}
|
||||
events = append(events, Event{Content: prefix})
|
||||
}
|
||||
state.pending.Reset()
|
||||
@@ -88,6 +95,13 @@ func Flush(state *State, toolNames []string) []Event {
|
||||
return nil
|
||||
}
|
||||
events := ProcessChunk(state, "", toolNames)
|
||||
if state.pending.Len() > 0 && state.markdownCodeSpanTicks > 0 {
|
||||
// At end of stream, an unmatched backtick is literal Markdown text.
|
||||
// Re-scan pending content so a real tool call after that stray
|
||||
// backtick is not permanently hidden by inline-code state.
|
||||
state.markdownCodeSpanTicks = 0
|
||||
events = append(events, ProcessChunk(state, "", toolNames)...)
|
||||
}
|
||||
if len(state.pendingToolCalls) > 0 {
|
||||
events = append(events, Event{ToolCalls: state.pendingToolCalls})
|
||||
state.pendingToolRaw = ""
|
||||
@@ -155,9 +169,18 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string
|
||||
return "", ""
|
||||
}
|
||||
if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 {
|
||||
if insideCodeFenceWithState(state, s[:xmlIdx]) || insideMarkdownCodeSpanWithState(state, s[:xmlIdx]) {
|
||||
if insideCodeFenceWithState(state, s[:xmlIdx]) {
|
||||
return s, ""
|
||||
}
|
||||
markdown := markdownCodeSpanStateAt(state, s[:xmlIdx])
|
||||
if markdown.ticks > 0 {
|
||||
if markdownCodeSpanCloses(s[xmlIdx:], markdown.ticks) {
|
||||
return s, ""
|
||||
}
|
||||
if markdown.fromPrior {
|
||||
return "", s
|
||||
}
|
||||
}
|
||||
if xmlIdx > 0 {
|
||||
return s[:xmlIdx], s[xmlIdx:]
|
||||
}
|
||||
@@ -166,6 +189,8 @@ func splitSafeContentForToolDetection(state *State, s string) (safe, hold string
|
||||
return s, ""
|
||||
}
|
||||
|
||||
const holdToolSegmentStart = -2
|
||||
|
||||
func findToolSegmentStart(state *State, s string) int {
|
||||
if s == "" {
|
||||
return -1
|
||||
@@ -177,13 +202,86 @@ func findToolSegmentStart(state *State, s string) int {
|
||||
return -1
|
||||
}
|
||||
start := includeDuplicateLeadingLessThan(s, tag.Start)
|
||||
if !insideCodeFenceWithState(state, s[:start]) && !insideMarkdownCodeSpanWithState(state, s[:start]) {
|
||||
if insideCodeFenceWithState(state, s[:start]) {
|
||||
offset = tag.End + 1
|
||||
continue
|
||||
}
|
||||
markdown := markdownCodeSpanStateAt(state, s[:start])
|
||||
if markdown.ticks == 0 {
|
||||
return start
|
||||
}
|
||||
offset = tag.End + 1
|
||||
if markdownCodeSpanCloses(s[start:], markdown.ticks) {
|
||||
offset = tag.End + 1
|
||||
continue
|
||||
}
|
||||
if markdown.fromPrior {
|
||||
return holdToolSegmentStart
|
||||
}
|
||||
return start
|
||||
}
|
||||
}
|
||||
|
||||
type markdownCodeSpanScan struct {
|
||||
ticks int
|
||||
fromPrior bool
|
||||
}
|
||||
|
||||
func markdownCodeSpanStateAt(state *State, text string) markdownCodeSpanScan {
|
||||
ticks := 0
|
||||
fromPrior := false
|
||||
if state != nil && state.markdownCodeSpanTicks > 0 {
|
||||
ticks = state.markdownCodeSpanTicks
|
||||
fromPrior = true
|
||||
}
|
||||
for i := 0; i < len(text); {
|
||||
if text[i] != '`' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
run := countBacktickRun(text, i)
|
||||
if ticks == 0 {
|
||||
if run >= 3 && atMarkdownFenceLineStart(text, i) {
|
||||
i += run
|
||||
continue
|
||||
}
|
||||
if state != nil && insideCodeFenceWithState(state, text[:i]) {
|
||||
i += run
|
||||
continue
|
||||
}
|
||||
ticks = run
|
||||
fromPrior = false
|
||||
} else if run == ticks {
|
||||
ticks = 0
|
||||
fromPrior = false
|
||||
}
|
||||
i += run
|
||||
}
|
||||
return markdownCodeSpanScan{ticks: ticks, fromPrior: fromPrior}
|
||||
}
|
||||
|
||||
func markdownCodeSpanCloses(text string, ticks int) bool {
|
||||
if ticks <= 0 {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(text); {
|
||||
if text[i] != '`' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
run := countBacktickRun(text, i)
|
||||
if run == ticks {
|
||||
return true
|
||||
}
|
||||
i += run
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func shouldResetUnclosedMarkdownPrefix(state *State, prefix, suffix string) bool {
|
||||
markdown := markdownCodeSpanStateAt(state, prefix)
|
||||
return markdown.ticks > 0 && !markdown.fromPrior && !markdownCodeSpanCloses(suffix, markdown.ticks)
|
||||
}
|
||||
|
||||
func includeDuplicateLeadingLessThan(s string, idx int) int {
|
||||
for idx > 0 && s[idx-1] == '<' {
|
||||
idx--
|
||||
|
||||
@@ -80,13 +80,6 @@ func insideCodeFence(text string) bool {
|
||||
return len(simulateCodeFenceState(nil, 0, 0, true, text).stack) > 0
|
||||
}
|
||||
|
||||
func insideMarkdownCodeSpanWithState(state *State, text string) bool {
|
||||
if state == nil {
|
||||
return simulateMarkdownCodeSpanTicks(nil, 0, text) > 0
|
||||
}
|
||||
return simulateMarkdownCodeSpanTicks(state, state.markdownCodeSpanTicks, text) > 0
|
||||
}
|
||||
|
||||
func updateMarkdownCodeSpanState(state *State, text string) {
|
||||
if state == nil || !hasMeaningfulText(text) {
|
||||
return
|
||||
|
||||
@@ -632,6 +632,27 @@ test('sieve ignores inline markdown tool example split across chunks', () => {
|
||||
assert.equal(text.includes('完毕'), true);
|
||||
});
|
||||
|
||||
test('sieve emits real tool after unclosed inline markdown in same chunk', () => {
|
||||
const events = runSieve([
|
||||
'note with stray ` before real call <tool_calls><invoke name="read_file"><parameter name="path">real.md</parameter></invoke></tool_calls>',
|
||||
], ['read_file']);
|
||||
const text = collectText(events);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].input.path, 'real.md');
|
||||
assert.equal(text.includes('stray ` before real call'), true);
|
||||
});
|
||||
|
||||
test('sieve emits real tool after unclosed inline markdown across chunks', () => {
|
||||
const events = runSieve([
|
||||
'note with stray ` before real call ',
|
||||
'<tool_calls><invoke name="read_file"><parameter name="path">real.md</parameter></invoke></tool_calls>',
|
||||
], ['read_file']);
|
||||
const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
assert.equal(finalCalls.length, 1);
|
||||
assert.equal(finalCalls[0].input.path, 'real.md');
|
||||
});
|
||||
|
||||
test('sieve emits real tool after split inline markdown tool example closes', () => {
|
||||
const events = runSieve([
|
||||
'示例:`',
|
||||
|
||||
Reference in New Issue
Block a user