diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 8779f59..612e186 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -3,10 +3,21 @@ const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s; const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi; const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi; -const TOOL_CALL_MARKUP_NAME_TAG_PATTERN = /<(?:[a-z0-9_:-]+:)?(name|function)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/i; -const TOOL_CALL_MARKUP_ARGS_TAG_PATTERN = /<(?:[a-z0-9_:-]+:)?(input|arguments|argument|parameters|parameter|args|params)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/i; const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi; const TOOL_CALL_MARKUP_ATTR_PATTERN = /(name|function|tool)\s*=\s*"([^"]+)"/i; +const TOOL_CALL_MARKUP_NAME_PATTERNS = [ + /<(?:[a-z0-9_:-]+:)?name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?name>/i, + /<(?:[a-z0-9_:-]+:)?function\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function>/i, +]; +const TOOL_CALL_MARKUP_ARGS_PATTERNS = [ + /<(?:[a-z0-9_:-]+:)?input\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?input>/i, + /<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?arguments>/i, + /<(?:[a-z0-9_:-]+:)?argument\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?argument>/i, + /<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameters>/i, + /<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/i, + /<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i, + /<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i, +]; const { toStringSafe, @@ -141,19 +152,16 @@ function parseMarkupSingleToolCall(attrs, inner) { name = toStringSafe(attrMatch[2]).trim(); } if (!name) { - const m = inner.match(TOOL_CALL_MARKUP_NAME_TAG_PATTERN); - if (m && m[2]) { - name = stripTagText(m[2]); - } + name = stripTagText(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS)); } if (!name) { return null; } let input = {}; - const argsMatch = inner.match(TOOL_CALL_MARKUP_ARGS_TAG_PATTERN); - if (argsMatch && argsMatch[2]) { - input = parseMarkupInput(argsMatch[2]); + const argsRaw = findMarkupTagValue(inner, TOOL_CALL_MARKUP_ARGS_PATTERNS); + if (argsRaw) { + input = parseMarkupInput(argsRaw); } else { const kv = parseMarkupKVObject(inner); if (Object.keys(kv).length > 0) { @@ -207,6 +215,17 @@ function stripTagText(text) { return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim(); } +function findMarkupTagValue(text, patterns) { + const source = toStringSafe(text); + for (const p of patterns) { + const m = source.match(p); + if (m && m[1]) { + return toStringSafe(m[1]); + } + } + return ''; +} + function parseToolCallList(v) { if (!Array.isArray(v)) { return []; diff --git a/internal/util/toolcalls_markup.go b/internal/util/toolcalls_markup.go index 1fef7a7..cc0f8bb 100644 --- a/internal/util/toolcalls_markup.go +++ b/internal/util/toolcalls_markup.go @@ -13,11 +13,24 @@ var toolCallMarkupTagPatternByName = map[string]*regexp.Regexp{ "invoke": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)>(.*?)`), } var toolCallMarkupSelfClosingPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)/>`) -var toolCallMarkupNameTagPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?(?:name|function)\b[^>]*>(.*?)`) -var toolCallMarkupArgsTagPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?(?:input|arguments|argument|parameters|parameter|args|params)\b[^>]*>(.*?)`) var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)`) var toolCallMarkupAttrPattern = regexp.MustCompile(`(?is)(name|function|tool)\s*=\s*"([^"]+)"`) var anyTagPattern = regexp.MustCompile(`(?is)<[^>]+>`) +var toolCallMarkupNameTagNames = []string{"name", "function"} +var toolCallMarkupNamePatternByTag = map[string]*regexp.Regexp{ + "name": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?name\b[^>]*>(.*?)`), + "function": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function\b[^>]*>(.*?)`), +} +var toolCallMarkupArgsTagNames = []string{"input", "arguments", "argument", "parameters", "parameter", "args", "params"} +var toolCallMarkupArgsPatternByTag = map[string]*regexp.Regexp{ + "input": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?input\b[^>]*>(.*?)`), + "arguments": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>(.*?)`), + "argument": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?argument\b[^>]*>(.*?)`), + "parameters": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>(.*?)`), + "parameter": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>(.*?)`), + "args": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?args\b[^>]*>(.*?)`), + "params": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?params\b[^>]*>(.*?)`), +} func parseMarkupToolCalls(text string) []ParsedToolCall { trimmed := strings.TrimSpace(text) @@ -63,17 +76,15 @@ func parseMarkupSingleToolCall(attrs string, inner string) ParsedToolCall { name = strings.TrimSpace(m[2]) } if name == "" { - if m := toolCallMarkupNameTagPattern.FindStringSubmatch(inner); len(m) >= 2 { - name = strings.TrimSpace(stripTagText(m[1])) - } + name = findMarkupTagValue(inner, toolCallMarkupNameTagNames, toolCallMarkupNamePatternByTag) } if name == "" { return ParsedToolCall{} } input := map[string]any{} - if m := toolCallMarkupArgsTagPattern.FindStringSubmatch(inner); len(m) >= 2 { - input = parseMarkupInput(m[1]) + if argsRaw := findMarkupTagValue(inner, toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); argsRaw != "" { + input = parseMarkupInput(argsRaw) } else if kv := parseMarkupKVObject(inner); len(kv) > 0 { input = kv } @@ -132,3 +143,19 @@ func parseMarkupKVObject(text string) map[string]any { func stripTagText(text string) string { return strings.TrimSpace(anyTagPattern.ReplaceAllString(text, "")) } + +func findMarkupTagValue(text string, tagNames []string, patternByTag map[string]*regexp.Regexp) string { + for _, tag := range tagNames { + pattern := patternByTag[tag] + if pattern == nil { + continue + } + if m := pattern.FindStringSubmatch(text); len(m) >= 2 { + value := strings.TrimSpace(m[1]) + if value != "" { + return value + } + } + } + return "" +} diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index 0e682dc..e830092 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -137,3 +137,11 @@ func TestParseToolCallsAllowsPunctuationVariantToolName(t *testing.T) { t.Fatalf("expected canonical tool name read_file, got %q", calls[0].Name) } } + +func TestParseToolCallsDoesNotAcceptMismatchedMarkupTags(t *testing.T) { + text := `read_file{"path":"README.md"}` + calls := ParseToolCalls(text, []string{"read_file"}) + if len(calls) != 0 { + t.Fatalf("expected mismatched tags to be rejected, got %#v", calls) + } +} diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index e68f2ff..20c00b8 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -249,3 +249,9 @@ test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => { assert.equal(second.length, 1); assert.equal(first[0].id, second[0].id); }); + +test('parseToolCalls rejects mismatched markup tags', () => { + const payload = 'read_file{"path":"README.md"}'; + const calls = parseToolCalls(payload, ['read_file']); + assert.equal(calls.length, 0); +});