From 0fb1bc66112d7ef1056f500f66205ce3aa972a7f Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 26 Apr 2026 09:44:59 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B7=A5=E5=85=B7=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stream-tool-sieve/parse_payload.js | 186 +++++++++++++++-- .../js/helpers/stream-tool-sieve/sieve-xml.js | 44 +++- internal/toolcall/toolcalls_markup.go | 15 +- internal/toolcall/toolcalls_parse.go | 54 +++++ internal/toolcall/toolcalls_parse_markup.go | 195 ++++++++++++++++-- internal/toolcall/toolcalls_test.go | 26 +++ internal/toolstream/tool_sieve_xml.go | 46 ++++- internal/toolstream/tool_sieve_xml_test.go | 59 ++++++ tests/node/stream-tool-sieve.test.js | 54 +++++ 9 files changed, 631 insertions(+), 48 deletions(-) diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 49579f9..05334d3 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -1,8 +1,5 @@ 'use strict'; -const TOOLS_WRAPPER_PATTERN = /]*>([\s\S]*?)<\/tool_calls>/gi; -const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?invoke>/gi; -const PARAMETER_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?parameter\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/gi; const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi; const CDATA_PATTERN = /^$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; @@ -25,9 +22,9 @@ function parseMarkupToolCalls(text) { return []; } const out = []; - for (const wrapper of raw.matchAll(TOOLS_WRAPPER_PATTERN)) { - const body = toStringSafe(wrapper[1]); - for (const block of body.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) { + for (const wrapper of findXmlElementBlocks(raw, 'tool_calls')) { + const body = toStringSafe(wrapper.body); + for (const block of findXmlElementBlocks(body, 'invoke')) { const parsed = parseMarkupSingleToolCall(block); if (parsed) { out.push(parsed); @@ -38,12 +35,12 @@ function parseMarkupToolCalls(text) { } function parseMarkupSingleToolCall(block) { - const attrs = parseTagAttributes(block[1]); + const attrs = parseTagAttributes(block.attrs); const name = toStringSafe(attrs.name).trim(); if (!name) { return null; } - const inner = toStringSafe(block[2]).trim(); + const inner = toStringSafe(block.body).trim(); if (inner) { try { @@ -63,13 +60,13 @@ function parseMarkupSingleToolCall(block) { } } const input = {}; - for (const match of inner.matchAll(PARAMETER_BLOCK_PATTERN)) { - const parameterAttrs = parseTagAttributes(match[1]); + for (const match of findXmlElementBlocks(inner, 'parameter')) { + const parameterAttrs = parseTagAttributes(match.attrs); const paramName = toStringSafe(parameterAttrs.name).trim(); if (!paramName) { continue; } - appendMarkupValue(input, paramName, parseMarkupValue(match[2])); + appendMarkupValue(input, paramName, parseMarkupValue(match.body)); } if (Object.keys(input).length === 0 && inner.trim() !== '') { return null; @@ -77,6 +74,154 @@ function parseMarkupSingleToolCall(block) { return { name, input }; } +function findXmlElementBlocks(text, tag) { + const source = toStringSafe(text); + const name = toStringSafe(tag).toLowerCase(); + if (!source || !name) { + return []; + } + const out = []; + let pos = 0; + while (pos < source.length) { + const start = findXmlStartTagOutsideCDATA(source, name, pos); + if (!start) { + break; + } + const end = findMatchingXmlEndTagOutsideCDATA(source, name, start.bodyStart); + if (!end) { + break; + } + out.push({ + attrs: start.attrs, + body: source.slice(start.bodyStart, end.closeStart), + start: start.start, + end: end.closeEnd, + }); + pos = end.closeEnd; + } + return out; +} + +function findXmlStartTagOutsideCDATA(text, tag, from) { + const lower = text.toLowerCase(); + const target = `<${tag}`; + for (let i = Math.max(0, from || 0); i < text.length;) { + const skipped = skipXmlIgnoredSection(lower, i); + if (skipped.blocked) { + return null; + } + if (skipped.advanced) { + i = skipped.next; + continue; + } + if (lower.startsWith(target, i) && hasXmlTagBoundary(text, i + target.length)) { + const tagEnd = findXmlTagEnd(text, i + target.length); + if (tagEnd < 0) { + return null; + } + return { + start: i, + bodyStart: tagEnd + 1, + attrs: text.slice(i + target.length, tagEnd), + }; + } + i += 1; + } + return null; +} + +function findMatchingXmlEndTagOutsideCDATA(text, tag, from) { + const lower = text.toLowerCase(); + const openTarget = `<${tag}`; + const closeTarget = `', i + ''.length }; + } + if (lower.startsWith('', i + ''.length }; + } + return { advanced: false, blocked: false, next: i }; +} + +function findXmlTagEnd(text, from) { + let quote = ''; + for (let i = Math.max(0, from || 0); i < text.length; i += 1) { + const ch = text[i]; + if (quote) { + if (ch === quote) { + quote = ''; + } + continue; + } + if (ch === '"' || ch === "'") { + quote = ch; + continue; + } + if (ch === '>') { + return i; + } + } + return -1; +} + +function hasXmlTagBoundary(text, idx) { + if (idx >= text.length) { + return true; + } + return [' ', '\t', '\n', '\r', '>', '/'].includes(text[idx]); +} + +function isSelfClosingXmlTag(startTag) { + return toStringSafe(startTag).trim().endsWith('/'); +} + function parseMarkupInput(raw) { const s = toStringSafe(raw).trim(); if (!s) { @@ -120,6 +265,10 @@ function parseMarkupKVObject(text) { } function parseMarkupValue(raw) { + const cdata = extractStandaloneCDATA(raw); + if (cdata.ok) { + return cdata.value; + } const s = toStringSafe(extractRawTagValue(raw)).trim(); if (!s) { return ''; @@ -152,9 +301,9 @@ function extractRawTagValue(inner) { } // 1. Check for CDATA - const cdataMatch = s.match(CDATA_PATTERN); - if (cdataMatch && cdataMatch[1] !== undefined) { - return cdataMatch[1]; + const cdata = extractStandaloneCDATA(s); + if (cdata.ok) { + return cdata.value; } // 2. Fallback to unescaping standard HTML entities @@ -172,6 +321,15 @@ function unescapeHtml(safe) { .replace(/'/g, "'"); } +function extractStandaloneCDATA(inner) { + const s = toStringSafe(inner).trim(); + const cdataMatch = s.match(CDATA_PATTERN); + if (cdataMatch && cdataMatch[1] !== undefined) { + return { ok: true, value: cdataMatch[1] }; + } + return { ok: false, value: '' }; +} + function parseTagAttributes(raw) { const source = toStringSafe(raw); const out = {}; diff --git a/internal/js/helpers/stream-tool-sieve/sieve-xml.js b/internal/js/helpers/stream-tool-sieve/sieve-xml.js index cc8ee43..90ea280 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve-xml.js +++ b/internal/js/helpers/stream-tool-sieve/sieve-xml.js @@ -16,9 +16,10 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) { if (openIdx < 0) { continue; } - // Find the LAST occurrence of the specific closing tag. - const closeIdx = lower.lastIndexOf(pair.close); - if (closeIdx < openIdx) { + // Ignore closing tags that appear inside CDATA payloads, such as + // write-file content containing tool-call documentation examples. + const closeIdx = findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length); + if (closeIdx < 0) { // Opening tag present but specific closing tag hasn't arrived. // Return not-ready so buffering continues until the wrapper closes. return { ready: false, prefix: '', calls: [], suffix: '' }; @@ -46,8 +47,9 @@ function consumeXMLToolCapture(captured, toolNames, trimWrappingJSONFence) { function hasOpenXMLToolTag(captured) { const lower = captured.toLowerCase(); for (const pair of XML_TOOL_TAG_PAIRS) { - if (lower.includes(pair.open)) { - if (!lower.includes(pair.close)) { + const openIdx = lower.indexOf(pair.open); + if (openIdx >= 0) { + if (findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length) < 0) { return true; } } @@ -74,6 +76,38 @@ function findPartialXMLToolTagStart(s) { return -1; } +function findXMLCloseOutsideCDATA(s, closeTag, start) { + const text = typeof s === 'string' ? s : ''; + const target = String(closeTag || '').toLowerCase(); + if (!text || !target) { + return -1; + } + const lower = text.toLowerCase(); + for (let i = Math.max(0, start || 0); i < text.length;) { + if (lower.startsWith('', i + ''.length; + continue; + } + if (lower.startsWith('', i + ''.length; + continue; + } + if (lower.startsWith(target, i)) { + return i; + } + i += 1; + } + return -1; +} + module.exports = { consumeXMLToolCapture, hasOpenXMLToolTag, diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go index 3d8e657..b01ba21 100644 --- a/internal/toolcall/toolcalls_markup.go +++ b/internal/toolcall/toolcalls_markup.go @@ -43,6 +43,9 @@ func parseMarkupKVObject(text string) map[string]any { } func parseMarkupValue(inner string) any { + if value, ok := extractStandaloneCDATA(inner); ok { + return value + } value := strings.TrimSpace(extractRawTagValue(inner)) if value == "" { return "" @@ -89,8 +92,8 @@ func extractRawTagValue(inner string) string { } // 1. Check for CDATA - if present, it's the ultimate "safe" container. - if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 { - return cdataMatches[1] // Return raw content between CDATA brackets + if value, ok := extractStandaloneCDATA(trimmed); ok { + return value // Return raw content between CDATA brackets } // 2. If no CDATA, we still want to be robust. @@ -102,3 +105,11 @@ func extractRawTagValue(inner string) string { // but for KV objects we usually want the value. return html.UnescapeString(inner) } + +func extractStandaloneCDATA(inner string) (string, bool) { + trimmed := strings.TrimSpace(inner) + if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 { + return cdataMatches[1], true + } + return "", false +} diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go index 16743ac..a950c2c 100644 --- a/internal/toolcall/toolcalls_parse.go +++ b/internal/toolcall/toolcalls_parse.go @@ -87,7 +87,13 @@ func stripFencedCodeBlocks(text string) string { lines := strings.SplitAfter(text, "\n") inFence := false fenceMarker := "" + inCDATA := false for _, line := range lines { + if inCDATA || cdataStartsBeforeFence(line) { + b.WriteString(line) + inCDATA = updateCDATAState(inCDATA, line) + continue + } trimmed := strings.TrimLeft(line, " \t") if !inFence { if marker, ok := parseFenceOpen(trimmed); ok { @@ -111,6 +117,54 @@ func stripFencedCodeBlocks(text string) string { return b.String() } +func cdataStartsBeforeFence(line string) bool { + cdataIdx := strings.Index(strings.ToLower(line), "") + if end < 0 { + return true + } + pos += end + len("]]>") + state = false + continue + } + start := strings.Index(lower[pos:], "]*>\s*(.*?)\s*`) -var xmlInvokePattern = regexp.MustCompile(`(?is)]*)>\s*(.*?)\s*`) -var xmlParameterPattern = regexp.MustCompile(`(?is)]*)>\s*(.*?)\s*`) var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`) var xmlToolCallsClosePattern = regexp.MustCompile(`(?is)`) var xmlInvokeStartPattern = regexp.MustCompile(`(?is)]*\bname\s*=\s*("([^"]*)"|'([^']*)')`) func parseXMLToolCalls(text string) []ParsedToolCall { - wrappers := xmlToolCallsWrapperPattern.FindAllStringSubmatch(text, -1) + wrappers := findXMLElementBlocks(text, "tool_calls") if len(wrappers) == 0 { repaired := repairMissingXMLToolCallsOpeningWrapper(text) if repaired != text { - wrappers = xmlToolCallsWrapperPattern.FindAllStringSubmatch(repaired, -1) + wrappers = findXMLElementBlocks(repaired, "tool_calls") } } if len(wrappers) == 0 { @@ -27,10 +24,7 @@ func parseXMLToolCalls(text string) []ParsedToolCall { } out := make([]ParsedToolCall, 0, len(wrappers)) for _, wrapper := range wrappers { - if len(wrapper) < 2 { - continue - } - for _, block := range xmlInvokePattern.FindAllStringSubmatch(wrapper[1], -1) { + for _, block := range findXMLElementBlocks(wrapper.Body, "invoke") { call, ok := parseSingleXMLToolCall(block) if !ok { continue @@ -66,17 +60,14 @@ func repairMissingXMLToolCallsOpeningWrapper(text string) string { return text[:invokeLoc[0]] + "" + text[invokeLoc[0]:closeLoc[0]] + "" + text[closeLoc[1]:] } -func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) { - if len(block) < 3 { - return ParsedToolCall{}, false - } - attrs := parseXMLTagAttributes(block[1]) +func parseSingleXMLToolCall(block xmlElementBlock) (ParsedToolCall, bool) { + attrs := parseXMLTagAttributes(block.Attrs) name := strings.TrimSpace(html.UnescapeString(attrs["name"])) if name == "" { return ParsedToolCall{}, false } - inner := strings.TrimSpace(block[2]) + inner := strings.TrimSpace(block.Body) if strings.HasPrefix(inner, "{") { var payload map[string]any if err := json.Unmarshal([]byte(inner), &payload); err == nil { @@ -94,16 +85,13 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) { } input := map[string]any{} - for _, paramMatch := range xmlParameterPattern.FindAllStringSubmatch(inner, -1) { - if len(paramMatch) < 3 { - continue - } - paramAttrs := parseXMLTagAttributes(paramMatch[1]) + for _, paramMatch := range findXMLElementBlocks(inner, "parameter") { + paramAttrs := parseXMLTagAttributes(paramMatch.Attrs) paramName := strings.TrimSpace(html.UnescapeString(paramAttrs["name"])) if paramName == "" { continue } - value := parseInvokeParameterValue(paramMatch[2]) + value := parseInvokeParameterValue(paramMatch.Body) appendMarkupValue(input, paramName, value) } @@ -116,6 +104,168 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) { return ParsedToolCall{Name: name, Input: input}, true } +type xmlElementBlock struct { + Attrs string + Body string + Start int + End int +} + +func findXMLElementBlocks(text, tag string) []xmlElementBlock { + if text == "" || tag == "" { + return nil + } + var out []xmlElementBlock + pos := 0 + for pos < len(text) { + start, bodyStart, attrs, ok := findXMLStartTagOutsideCDATA(text, tag, pos) + if !ok { + break + } + closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart) + if !ok { + break + } + out = append(out, xmlElementBlock{ + Attrs: attrs, + Body: text[bodyStart:closeStart], + Start: start, + End: closeEnd, + }) + pos = closeEnd + } + return out +} + +func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) { + lower := strings.ToLower(text) + target := "<" + strings.ToLower(tag) + for i := maxInt(from, 0); i < len(text); { + next, advanced, blocked := skipXMLIgnoredSection(lower, i) + if blocked { + return -1, -1, "", false + } + if advanced { + i = next + continue + } + if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) { + end := findXMLTagEnd(text, i+len(target)) + if end < 0 { + return -1, -1, "", false + } + return i, end + 1, text[i+len(target) : end], true + } + i++ + } + return -1, -1, "", false +} + +func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) { + lower := strings.ToLower(text) + openTarget := "<" + strings.ToLower(tag) + closeTarget := "") + if end < 0 { + return 0, false, true + } + return i + len(""), true, false + case strings.HasPrefix(lower[i:], "") + if end < 0 { + return 0, false, true + } + return i + len(""), true, false + default: + return i, false, false + } +} + +func findXMLTagEnd(text string, from int) int { + quote := byte(0) + for i := maxInt(from, 0); i < len(text); i++ { + ch := text[i] + if quote != 0 { + if ch == quote { + quote = 0 + } + continue + } + if ch == '"' || ch == '\'' { + quote = ch + continue + } + if ch == '>' { + return i + } + } + return -1 +} + +func hasXMLTagBoundary(text string, idx int) bool { + if idx >= len(text) { + return true + } + switch text[idx] { + case ' ', '\t', '\n', '\r', '>', '/': + return true + default: + return false + } +} + +func isSelfClosingXMLTag(startTag string) bool { + return strings.HasSuffix(strings.TrimSpace(startTag), "/") +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + func parseXMLTagAttributes(raw string) map[string]string { if strings.TrimSpace(raw) == "" { return map[string]string{} @@ -143,6 +293,9 @@ func parseInvokeParameterValue(raw string) any { if trimmed == "" { return "" } + if value, ok := extractStandaloneCDATA(trimmed); ok { + return value + } if parsed := parseStructuredToolCallInput(trimmed); len(parsed) > 0 { if len(parsed) == 1 { if rawValue, ok := parsed["_raw"].(string); ok { diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 8d26f73..c4bfe51 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -54,6 +54,32 @@ echo "hello" } } +func TestParseToolCallsKeepsToolSyntaxInsideCDATAAsParameterText(t *testing.T) { + payload := strings.Join([]string{ + "# Release notes", + "", + "```xml", + "", + " ", + " x", + " ", + "", + "```", + }, "\n") + text := `DS2API-4.0-Release-Notes.md` + calls := ParseToolCalls(text, []string{"Write"}) + if len(calls) != 1 { + t.Fatalf("expected 1 call, got %#v", calls) + } + content, _ := calls[0].Input["content"].(string) + if content != payload { + t.Fatalf("expected CDATA payload with nested tool syntax to survive intact, got %q", content) + } + if calls[0].Input["file_path"] != "DS2API-4.0-Release-Notes.md" { + t.Fatalf("expected file_path parameter, got %#v", calls[0].Input) + } +} + func TestParseToolCallsSupportsInvokeParameters(t *testing.T) { text := `beijingc` calls := ParseToolCalls(text, []string{"get_weather"}) diff --git a/internal/toolstream/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go index 6d6cbc4..72cbbaa 100644 --- a/internal/toolstream/tool_sieve_xml.go +++ b/internal/toolstream/tool_sieve_xml.go @@ -35,9 +35,10 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, if openIdx < 0 { continue } - // Find the LAST occurrence of the specific closing tag to get the outermost block. - closeIdx := strings.LastIndex(lower, pair.close) - if closeIdx < openIdx { + // Find the matching closing tag outside CDATA. Long write-file tool + // calls often contain XML examples in CDATA, including . + closeIdx := findXMLCloseOutsideCDATA(captured, pair.close, openIdx+len(pair.open)) + if closeIdx < 0 { // Opening tag is present but its specific closing tag hasn't arrived. // Return not-ready so we keep buffering until the canonical wrapper closes. return "", nil, "", false @@ -57,7 +58,7 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, } if !strings.Contains(lower, "") + closeIdx := findXMLCloseOutsideCDATA(captured, "", invokeIdx) if invokeIdx >= 0 && closeIdx > invokeIdx { closeEnd := closeIdx + len("") xmlBlock := "" + captured[invokeIdx:closeIdx] + "" @@ -79,8 +80,9 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, func hasOpenXMLToolTag(captured string) bool { lower := strings.ToLower(captured) for _, pair := range xmlToolCallTagPairs { - if strings.Contains(lower, pair.open) { - if !strings.Contains(lower, pair.close) { + openIdx := strings.Index(lower, pair.open) + if openIdx >= 0 { + if findXMLCloseOutsideCDATA(captured, pair.close, openIdx+len(pair.open)) < 0 { return true } } @@ -88,6 +90,38 @@ func hasOpenXMLToolTag(captured string) bool { return false } +func findXMLCloseOutsideCDATA(s, closeTag string, start int) int { + if s == "" || closeTag == "" { + return -1 + } + if start < 0 { + start = 0 + } + lower := strings.ToLower(s) + target := strings.ToLower(closeTag) + for i := start; i < len(s); { + switch { + case strings.HasPrefix(lower[i:], "") + if end < 0 { + return -1 + } + i += len("") + case strings.HasPrefix(lower[i:], "") + if end < 0 { + return -1 + } + i += len("") + case strings.HasPrefix(lower[i:], target): + return i + default: + i++ + } + } + return -1 +} + // findPartialXMLToolTagStart checks if the string ends with a partial canonical // XML wrapper tag (e.g., "", + " ", + " x", + " ", + "", + "```", + "tail", + }, "\n") + innerClose := strings.Index(payload, "") + len("") + chunks := []string{ + "\n \n \n DS2API-4.0-Release-Notes.md\n \n", + } + + var events []Event + for i, c := range chunks { + next := ProcessChunk(&state, c, []string{"Write"}) + if i <= 1 { + for _, evt := range next { + if evt.Content != "" || len(evt.ToolCalls) > 0 { + t.Fatalf("expected no events before outer closing tag, chunk=%d events=%#v", i, next) + } + } + } + events = append(events, next...) + } + events = append(events, Flush(&state, []string{"Write"})...) + + var textContent strings.Builder + var gotPayload string + toolCalls := 0 + for _, evt := range events { + textContent.WriteString(evt.Content) + if len(evt.ToolCalls) > 0 { + toolCalls += len(evt.ToolCalls) + gotPayload, _ = evt.ToolCalls[0].Input["content"].(string) + } + } + + if toolCalls != 1 { + t.Fatalf("expected one parsed tool call, got %d events=%#v", toolCalls, events) + } + if textContent.Len() != 0 { + t.Fatalf("expected no leaked text, got %q", textContent.String()) + } + if gotPayload != payload { + t.Fatalf("expected full CDATA payload to survive intact, got len=%d want=%d", len(gotPayload), len(payload)) + } +} + func TestProcessToolSieveXMLWithLeadingText(t *testing.T) { var state State // Model outputs some prose then an XML tool call. diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 1e5012a..cc6ae93 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -118,6 +118,60 @@ test('sieve keeps long XML tool calls buffered until the closing tag arrives', ( assert.equal(finalCalls[0].input.content, longContent); }); +test('sieve keeps CDATA tool examples buffered until the outer closing tag arrives', () => { + const content = [ + '# DS2API 4.0 更新内容', + '', + 'x'.repeat(4096), + '```xml', + '', + ' ', + ' x', + ' ', + '', + '```', + 'tail', + ].join('\n'); + const innerClose = content.indexOf('') + ''.length; + const state = createToolSieveState(); + const chunks = [ + '\n \n \n DS2API-4.0-Release-Notes.md\n \n', + ]; + const events = []; + chunks.forEach((chunk, idx) => { + const next = processToolSieveChunk(state, chunk, ['Write']); + if (idx <= 1) { + assert.deepEqual(next, []); + } + events.push(...next); + }); + events.push(...flushToolSieve(state, ['Write'])); + + const leakedText = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(leakedText, ''); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Write'); + assert.equal(finalCalls[0].input.content, content); +}); + +test('parseToolCalls keeps XML-looking CDATA content intact', () => { + const content = [ + '# Release notes', + '```xml', + 'x', + '```', + ].join('\n'); + const payload = `DS2API-4.0-Release-Notes.md`; + const calls = parseToolCalls(payload, ['Write']); + assert.equal(calls.length, 1); + assert.equal(calls[0].input.content, content); + assert.equal(calls[0].input.file_path, 'DS2API-4.0-Release-Notes.md'); +}); + test('sieve passes JSON tool_calls payload through as text (XML-only)', () => { const events = runSieve( ['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],