mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 00:15:28 +08:00
Merge pull request #87 from CJackHwang/dev
Merge pull request #82 from CJackHwang/codex/linear-mention-cja-10-ds2api-go-runtime-js Align Go/JS tool-call parsing semantics and expand compat fixtures
This commit is contained in:
40
docs/toolcall-semantics.md
Normal file
40
docs/toolcall-semantics.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Tool call parsing semantics (Go canonical spec)
|
||||
|
||||
This document defines the cross-runtime contract for `ParseToolCallsDetailed` / `parseToolCallsDetailed`.
|
||||
|
||||
## Output contract
|
||||
|
||||
- `calls`: accepted tool calls with normalized tool names.
|
||||
- `sawToolCallSyntax`: true when tool-call-like syntax is detected (`tool_calls`, `<tool_call>`, `<function_call>`, `<invoke>`) or a valid call is parsed.
|
||||
- `rejectedByPolicy`: true when parser extracted call syntax but all calls are rejected by allow-list policy.
|
||||
- `rejectedToolNames`: de-duplicated rejected tool names in first-seen order.
|
||||
|
||||
## Parse pipeline
|
||||
|
||||
1. Strip fenced code blocks for non-standalone parsing.
|
||||
2. Build candidates from:
|
||||
- full text,
|
||||
- fenced JSON snippets,
|
||||
- extracted JSON objects around `tool_calls`,
|
||||
- first `{` to last `}` object slice.
|
||||
3. Parse each candidate in order:
|
||||
- JSON payload parser (`tool_calls`, list, single call object),
|
||||
- markup parser (`<tool_call>`, `<function_call>`, `<invoke>`; supports attributes + nested fields).
|
||||
4. Stop at first candidate that yields at least one call.
|
||||
|
||||
## Name normalization policy
|
||||
|
||||
When matching parsed names against configured tools:
|
||||
|
||||
1. exact match,
|
||||
2. case-insensitive match,
|
||||
3. namespace tail match (`a.b.c` => `c`),
|
||||
4. loose alnum match (remove non `[a-z0-9]`, compare).
|
||||
|
||||
## Standalone mode
|
||||
|
||||
Standalone mode (`ParseStandaloneToolCallsDetailed`) parses the whole input directly (no candidate slicing), while still applying:
|
||||
|
||||
- example-context guard,
|
||||
- JSON then markup fallback,
|
||||
- the same allow-list normalization policy.
|
||||
@@ -73,22 +73,31 @@ func TestGoCompatToolcallFixtures(t *testing.T) {
|
||||
mustLoadJSON(t, fixturePath, &fixture)
|
||||
|
||||
var expected struct {
|
||||
Calls []util.ParsedToolCall `json:"calls"`
|
||||
Calls []util.ParsedToolCall `json:"calls"`
|
||||
SawToolCallSyntax bool `json:"sawToolCallSyntax"`
|
||||
RejectedByPolicy bool `json:"rejectedByPolicy"`
|
||||
RejectedToolNames []string `json:"rejectedToolNames"`
|
||||
}
|
||||
mustLoadJSON(t, expectedPath, &expected)
|
||||
|
||||
var got []util.ParsedToolCall
|
||||
var got util.ToolCallParseResult
|
||||
switch strings.ToLower(strings.TrimSpace(fixture.Mode)) {
|
||||
case "standalone":
|
||||
got = util.ParseStandaloneToolCalls(fixture.Text, fixture.ToolNames)
|
||||
got = util.ParseStandaloneToolCallsDetailed(fixture.Text, fixture.ToolNames)
|
||||
default:
|
||||
got = util.ParseToolCalls(fixture.Text, fixture.ToolNames)
|
||||
got = util.ParseToolCallsDetailed(fixture.Text, fixture.ToolNames)
|
||||
}
|
||||
if len(got) == 0 && len(expected.Calls) == 0 {
|
||||
continue
|
||||
if got.Calls == nil {
|
||||
got.Calls = []util.ParsedToolCall{}
|
||||
}
|
||||
if !reflect.DeepEqual(got, expected.Calls) {
|
||||
t.Fatalf("toolcall fixture %s mismatch:\n got=%#v\nwant=%#v", name, got, expected.Calls)
|
||||
if got.RejectedToolNames == nil {
|
||||
got.RejectedToolNames = []string{}
|
||||
}
|
||||
if !reflect.DeepEqual(got.Calls, expected.Calls) ||
|
||||
got.SawToolCallSyntax != expected.SawToolCallSyntax ||
|
||||
got.RejectedByPolicy != expected.RejectedByPolicy ||
|
||||
!reflect.DeepEqual(got.RejectedToolNames, expected.RejectedToolNames) {
|
||||
t.Fatalf("toolcall fixture %s mismatch:\n got=%#v\nwant=%#v", name, got, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,11 @@ const {
|
||||
stripFencedCodeBlocks,
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
} = require('./parse_payload');
|
||||
|
||||
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
|
||||
|
||||
function extractToolNames(tools) {
|
||||
if (!Array.isArray(tools) || tools.length === 0) {
|
||||
return [];
|
||||
@@ -41,12 +44,15 @@ function parseToolCallsDetailed(text, toolNames) {
|
||||
if (!toStringSafe(sanitized)) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = sanitized.toLowerCase().includes('tool_calls');
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(sanitized);
|
||||
|
||||
const candidates = buildToolCallCandidates(sanitized);
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
parsed = parseToolCallsPayload(c);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(c);
|
||||
}
|
||||
if (parsed.length > 0) {
|
||||
result.sawToolCallSyntax = true;
|
||||
break;
|
||||
@@ -73,15 +79,17 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
if (!trimmed) {
|
||||
return result;
|
||||
}
|
||||
if (trimmed.includes('```')) {
|
||||
return result;
|
||||
}
|
||||
if (looksLikeToolExampleContext(trimmed)) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = trimmed.toLowerCase().includes('tool_calls');
|
||||
if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) {
|
||||
return result;
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
|
||||
let parsed = parseToolCallsPayload(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
parsed = parseMarkupToolCalls(trimmed);
|
||||
}
|
||||
|
||||
const parsed = parseToolCallsPayload(trimmed);
|
||||
if (parsed.length === 0) {
|
||||
return result;
|
||||
}
|
||||
@@ -146,7 +154,7 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
if (allowed.has(tc.name)) {
|
||||
matchedName = tc.name;
|
||||
} else {
|
||||
matchedName = allowedCanonical.get(tc.name.toLowerCase()) || '';
|
||||
matchedName = resolveAllowedToolName(tc.name, allowed, allowedCanonical);
|
||||
}
|
||||
if (!matchedName) {
|
||||
if (!seenRejected.has(tc.name)) {
|
||||
@@ -163,6 +171,45 @@ function filterToolCallsDetailed(parsed, toolNames) {
|
||||
return { calls, rejectedToolNames: rejected };
|
||||
}
|
||||
|
||||
function resolveAllowedToolName(name, allowed, allowedCanonical) {
|
||||
const normalizedName = toStringSafe(name).trim();
|
||||
if (!normalizedName) {
|
||||
return '';
|
||||
}
|
||||
if (allowed.has(normalizedName)) {
|
||||
return normalizedName;
|
||||
}
|
||||
const lower = normalizedName.toLowerCase();
|
||||
if (allowedCanonical.has(lower)) {
|
||||
return allowedCanonical.get(lower);
|
||||
}
|
||||
const idx = lower.lastIndexOf('.');
|
||||
if (idx >= 0 && idx < lower.length - 1) {
|
||||
const tail = lower.slice(idx + 1);
|
||||
if (allowedCanonical.has(tail)) {
|
||||
return allowedCanonical.get(tail);
|
||||
}
|
||||
}
|
||||
const loose = lower.replace(TOOL_NAME_LOOSE_PATTERN, '');
|
||||
if (!loose) {
|
||||
return '';
|
||||
}
|
||||
for (const [candidateLower, canonical] of allowedCanonical.entries()) {
|
||||
if (candidateLower.replace(TOOL_NAME_LOOSE_PATTERN, '') === loose) {
|
||||
return canonical;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function looksLikeToolCallSyntax(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
return lower.includes('tool_calls')
|
||||
|| lower.includes('<tool_call')
|
||||
|| lower.includes('<function_call')
|
||||
|| lower.includes('<invoke');
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
extractToolNames,
|
||||
parseToolCalls,
|
||||
|
||||
@@ -1,6 +1,23 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_CALL_PATTERN = /\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}/s;
|
||||
const TOOL_CALL_MARKUP_BLOCK_PATTERN = /<(?:[a-z0-9_:-]+:)?(tool_call|function_call|invoke)\b([^>]*)>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
const TOOL_CALL_MARKUP_SELFCLOSE_PATTERN = /<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)\/>/gi;
|
||||
const TOOL_CALL_MARKUP_KV_PATTERN = /<(?:[a-z0-9_:-]+:)?([a-z0-9_.-]+)\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?\1>/gi;
|
||||
const TOOL_CALL_MARKUP_ATTR_PATTERN = /(name|function|tool)\s*=\s*"([^"]+)"/i;
|
||||
const TOOL_CALL_MARKUP_NAME_PATTERNS = [
|
||||
/<(?:[a-z0-9_:-]+:)?name\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?name>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?function\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?function>/i,
|
||||
];
|
||||
const TOOL_CALL_MARKUP_ARGS_PATTERNS = [
|
||||
/<(?:[a-z0-9_:-]+:)?input\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?input>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?arguments>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?argument\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?argument>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameters>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?parameter>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?args\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?args>/i,
|
||||
/<(?:[a-z0-9_:-]+:)?params\b[^>]*>([\s\S]*?)<\/(?:[a-z0-9_:-]+:)?params>/i,
|
||||
];
|
||||
|
||||
const {
|
||||
toStringSafe,
|
||||
@@ -103,6 +120,112 @@ function parseToolCallsPayload(payload) {
|
||||
return one ? [one] : [];
|
||||
}
|
||||
|
||||
function parseMarkupToolCalls(text) {
|
||||
const raw = toStringSafe(text).trim();
|
||||
if (!raw) {
|
||||
return [];
|
||||
}
|
||||
const out = [];
|
||||
for (const m of raw.matchAll(TOOL_CALL_MARKUP_BLOCK_PATTERN)) {
|
||||
const parsed = parseMarkupSingleToolCall(toStringSafe(m[2]).trim(), toStringSafe(m[3]).trim());
|
||||
if (parsed) {
|
||||
out.push(parsed);
|
||||
}
|
||||
}
|
||||
for (const m of raw.matchAll(TOOL_CALL_MARKUP_SELFCLOSE_PATTERN)) {
|
||||
const parsed = parseMarkupSingleToolCall(toStringSafe(m[1]).trim(), '');
|
||||
if (parsed) {
|
||||
out.push(parsed);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function parseMarkupSingleToolCall(attrs, inner) {
|
||||
const embedded = parseToolCallsPayload(inner);
|
||||
if (embedded.length > 0) {
|
||||
return embedded[0];
|
||||
}
|
||||
let name = '';
|
||||
const attrMatch = attrs.match(TOOL_CALL_MARKUP_ATTR_PATTERN);
|
||||
if (attrMatch && attrMatch[2]) {
|
||||
name = toStringSafe(attrMatch[2]).trim();
|
||||
}
|
||||
if (!name) {
|
||||
name = stripTagText(findMarkupTagValue(inner, TOOL_CALL_MARKUP_NAME_PATTERNS));
|
||||
}
|
||||
if (!name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let input = {};
|
||||
const argsRaw = findMarkupTagValue(inner, TOOL_CALL_MARKUP_ARGS_PATTERNS);
|
||||
if (argsRaw) {
|
||||
input = parseMarkupInput(argsRaw);
|
||||
} else {
|
||||
const kv = parseMarkupKVObject(inner);
|
||||
if (Object.keys(kv).length > 0) {
|
||||
input = kv;
|
||||
}
|
||||
}
|
||||
return { name, input };
|
||||
}
|
||||
|
||||
function parseMarkupInput(raw) {
|
||||
const s = toStringSafe(raw).trim();
|
||||
if (!s) {
|
||||
return {};
|
||||
}
|
||||
const parsed = parseToolCallInput(s);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed) && Object.keys(parsed).length > 0) {
|
||||
return parsed;
|
||||
}
|
||||
const kv = parseMarkupKVObject(s);
|
||||
if (Object.keys(kv).length > 0) {
|
||||
return kv;
|
||||
}
|
||||
return { _raw: stripTagText(s) };
|
||||
}
|
||||
|
||||
function parseMarkupKVObject(text) {
|
||||
const raw = toStringSafe(text).trim();
|
||||
if (!raw) {
|
||||
return {};
|
||||
}
|
||||
const out = {};
|
||||
for (const m of raw.matchAll(TOOL_CALL_MARKUP_KV_PATTERN)) {
|
||||
const key = toStringSafe(m[1]).trim();
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
const valueRaw = stripTagText(m[2]);
|
||||
if (!valueRaw) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
out[key] = JSON.parse(valueRaw);
|
||||
} catch (_err) {
|
||||
out[key] = valueRaw;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function stripTagText(text) {
|
||||
return toStringSafe(text).replace(/<[^>]+>/g, ' ').trim();
|
||||
}
|
||||
|
||||
function findMarkupTagValue(text, patterns) {
|
||||
const source = toStringSafe(text);
|
||||
for (const p of patterns) {
|
||||
const m = source.match(p);
|
||||
if (m && m[1]) {
|
||||
return toStringSafe(m[1]);
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function parseToolCallList(v) {
|
||||
if (!Array.isArray(v)) {
|
||||
return [];
|
||||
@@ -193,4 +316,5 @@ module.exports = {
|
||||
stripFencedCodeBlocks,
|
||||
buildToolCallCandidates,
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
};
|
||||
|
||||
161
internal/util/toolcalls_markup.go
Normal file
161
internal/util/toolcalls_markup.go
Normal file
@@ -0,0 +1,161 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var toolCallMarkupTagNames = []string{"tool_call", "function_call", "invoke"}
|
||||
var toolCallMarkupTagPatternByName = map[string]*regexp.Regexp{
|
||||
"tool_call": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?tool_call\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?tool_call>`),
|
||||
"function_call": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function_call\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?function_call>`),
|
||||
"invoke": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)>(.*?)</(?:[a-z0-9_:-]+:)?invoke>`),
|
||||
}
|
||||
var toolCallMarkupSelfClosingPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?invoke\b([^>]*)/>`)
|
||||
var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)>`)
|
||||
var toolCallMarkupAttrPattern = regexp.MustCompile(`(?is)(name|function|tool)\s*=\s*"([^"]+)"`)
|
||||
var anyTagPattern = regexp.MustCompile(`(?is)<[^>]+>`)
|
||||
var toolCallMarkupNameTagNames = []string{"name", "function"}
|
||||
var toolCallMarkupNamePatternByTag = map[string]*regexp.Regexp{
|
||||
"name": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?name\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?name>`),
|
||||
"function": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?function\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?function>`),
|
||||
}
|
||||
var toolCallMarkupArgsTagNames = []string{"input", "arguments", "argument", "parameters", "parameter", "args", "params"}
|
||||
var toolCallMarkupArgsPatternByTag = map[string]*regexp.Regexp{
|
||||
"input": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?input\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?input>`),
|
||||
"arguments": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?arguments\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?arguments>`),
|
||||
"argument": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?argument\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?argument>`),
|
||||
"parameters": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameters\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?parameters>`),
|
||||
"parameter": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?parameter\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?parameter>`),
|
||||
"args": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?args\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?args>`),
|
||||
"params": regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?params\b[^>]*>(.*?)</(?:[a-z0-9_:-]+:)?params>`),
|
||||
}
|
||||
|
||||
func parseMarkupToolCalls(text string) []ParsedToolCall {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if trimmed == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
out := make([]ParsedToolCall, 0)
|
||||
for _, tagName := range toolCallMarkupTagNames {
|
||||
pattern := toolCallMarkupTagPatternByName[tagName]
|
||||
for _, m := range pattern.FindAllStringSubmatch(trimmed, -1) {
|
||||
if len(m) < 3 {
|
||||
continue
|
||||
}
|
||||
attrs := strings.TrimSpace(m[1])
|
||||
inner := strings.TrimSpace(m[2])
|
||||
if parsed := parseMarkupSingleToolCall(attrs, inner); parsed.Name != "" {
|
||||
out = append(out, parsed)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, m := range toolCallMarkupSelfClosingPattern.FindAllStringSubmatch(trimmed, -1) {
|
||||
if len(m) < 2 {
|
||||
continue
|
||||
}
|
||||
if parsed := parseMarkupSingleToolCall(strings.TrimSpace(m[1]), ""); parsed.Name != "" {
|
||||
out = append(out, parsed)
|
||||
}
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseMarkupSingleToolCall(attrs string, inner string) ParsedToolCall {
|
||||
if parsed := parseToolCallsPayload(inner); len(parsed) > 0 {
|
||||
return parsed[0]
|
||||
}
|
||||
|
||||
name := ""
|
||||
if m := toolCallMarkupAttrPattern.FindStringSubmatch(attrs); len(m) >= 3 {
|
||||
name = strings.TrimSpace(m[2])
|
||||
}
|
||||
if name == "" {
|
||||
name = findMarkupTagValue(inner, toolCallMarkupNameTagNames, toolCallMarkupNamePatternByTag)
|
||||
}
|
||||
if name == "" {
|
||||
return ParsedToolCall{}
|
||||
}
|
||||
|
||||
input := map[string]any{}
|
||||
if argsRaw := findMarkupTagValue(inner, toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); argsRaw != "" {
|
||||
input = parseMarkupInput(argsRaw)
|
||||
} else if kv := parseMarkupKVObject(inner); len(kv) > 0 {
|
||||
input = kv
|
||||
}
|
||||
return ParsedToolCall{Name: name, Input: input}
|
||||
}
|
||||
|
||||
func parseMarkupInput(raw string) map[string]any {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return map[string]any{}
|
||||
}
|
||||
if parsed := parseToolCallInput(raw); len(parsed) > 0 {
|
||||
return parsed
|
||||
}
|
||||
if kv := parseMarkupKVObject(raw); len(kv) > 0 {
|
||||
return kv
|
||||
}
|
||||
return map[string]any{"_raw": stripTagText(raw)}
|
||||
}
|
||||
|
||||
func parseMarkupKVObject(text string) map[string]any {
|
||||
matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := map[string]any{}
|
||||
for _, m := range matches {
|
||||
if len(m) < 4 {
|
||||
continue
|
||||
}
|
||||
key := strings.TrimSpace(m[1])
|
||||
endKey := strings.TrimSpace(m[3])
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(key, endKey) {
|
||||
continue
|
||||
}
|
||||
value := strings.TrimSpace(stripTagText(m[2]))
|
||||
if value == "" {
|
||||
continue
|
||||
}
|
||||
var jsonValue any
|
||||
if json.Unmarshal([]byte(value), &jsonValue) == nil {
|
||||
out[key] = jsonValue
|
||||
continue
|
||||
}
|
||||
out[key] = value
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func stripTagText(text string) string {
|
||||
return strings.TrimSpace(anyTagPattern.ReplaceAllString(text, ""))
|
||||
}
|
||||
|
||||
func findMarkupTagValue(text string, tagNames []string, patternByTag map[string]*regexp.Regexp) string {
|
||||
for _, tag := range tagNames {
|
||||
pattern := patternByTag[tag]
|
||||
if pattern == nil {
|
||||
continue
|
||||
}
|
||||
if m := pattern.FindStringSubmatch(text); len(m) >= 2 {
|
||||
value := strings.TrimSpace(m[1])
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@@ -33,12 +33,19 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = strings.Contains(strings.ToLower(text), "tool_calls")
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(text)
|
||||
|
||||
candidates := buildToolCallCandidates(text)
|
||||
var parsed []ParsedToolCall
|
||||
for _, candidate := range candidates {
|
||||
if tc := parseToolCallsPayload(candidate); len(tc) > 0 {
|
||||
tc := parseToolCallsPayload(candidate)
|
||||
if len(tc) == 0 {
|
||||
tc = parseXMLToolCalls(candidate)
|
||||
}
|
||||
if len(tc) == 0 {
|
||||
tc = parseMarkupToolCalls(candidate)
|
||||
}
|
||||
if len(tc) > 0 {
|
||||
parsed = tc
|
||||
result.SawToolCallSyntax = true
|
||||
break
|
||||
@@ -72,17 +79,21 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
|
||||
if looksLikeToolExampleContext(trimmed) {
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = strings.Contains(strings.ToLower(trimmed), "tool_calls")
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed)
|
||||
candidates := []string{trimmed}
|
||||
for _, candidate := range candidates {
|
||||
candidate = strings.TrimSpace(candidate)
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(candidate, "{") && !strings.HasPrefix(candidate, "[") {
|
||||
continue
|
||||
parsed := parseToolCallsPayload(candidate)
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseXMLToolCalls(candidate)
|
||||
}
|
||||
if parsed := parseToolCallsPayload(candidate); len(parsed) > 0 {
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseMarkupToolCalls(candidate)
|
||||
}
|
||||
if len(parsed) > 0 {
|
||||
result.SawToolCallSyntax = true
|
||||
calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames)
|
||||
result.Calls = calls
|
||||
@@ -110,27 +121,32 @@ func filterToolCallsDetailed(parsed []ParsedToolCall, availableToolNames []strin
|
||||
}
|
||||
if len(allowed) == 0 {
|
||||
rejectedSet := map[string]struct{}{}
|
||||
rejected := make([]string, 0, len(parsed))
|
||||
for _, tc := range parsed {
|
||||
if tc.Name == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := rejectedSet[tc.Name]; ok {
|
||||
continue
|
||||
}
|
||||
rejectedSet[tc.Name] = struct{}{}
|
||||
}
|
||||
rejected := make([]string, 0, len(rejectedSet))
|
||||
for name := range rejectedSet {
|
||||
rejected = append(rejected, name)
|
||||
rejected = append(rejected, tc.Name)
|
||||
}
|
||||
return nil, rejected
|
||||
}
|
||||
out := make([]ParsedToolCall, 0, len(parsed))
|
||||
rejectedSet := map[string]struct{}{}
|
||||
rejected := make([]string, 0)
|
||||
for _, tc := range parsed {
|
||||
if tc.Name == "" {
|
||||
continue
|
||||
}
|
||||
matchedName := resolveAllowedToolName(tc.Name, allowed, allowedCanonical)
|
||||
if matchedName == "" {
|
||||
rejectedSet[tc.Name] = struct{}{}
|
||||
if _, ok := rejectedSet[tc.Name]; !ok {
|
||||
rejectedSet[tc.Name] = struct{}{}
|
||||
rejected = append(rejected, tc.Name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
tc.Name = matchedName
|
||||
@@ -139,10 +155,6 @@ func filterToolCallsDetailed(parsed []ParsedToolCall, availableToolNames []strin
|
||||
}
|
||||
out = append(out, tc)
|
||||
}
|
||||
rejected := make([]string, 0, len(rejectedSet))
|
||||
for name := range rejectedSet {
|
||||
rejected = append(rejected, name)
|
||||
}
|
||||
return out, rejected
|
||||
}
|
||||
|
||||
@@ -190,6 +202,14 @@ func parseToolCallsPayload(payload string) []ParsedToolCall {
|
||||
return nil
|
||||
}
|
||||
|
||||
func looksLikeToolCallSyntax(text string) bool {
|
||||
lower := strings.ToLower(text)
|
||||
return strings.Contains(lower, "tool_calls") ||
|
||||
strings.Contains(lower, "<tool_call") ||
|
||||
strings.Contains(lower, "<function_call") ||
|
||||
strings.Contains(lower, "<invoke")
|
||||
}
|
||||
|
||||
func parseToolCallList(v any) []ParsedToolCall {
|
||||
items, ok := v.([]any)
|
||||
if !ok {
|
||||
|
||||
@@ -93,6 +93,15 @@ func parseSingleXMLToolCall(block string) (ParsedToolCall, bool) {
|
||||
if err := dec.DecodeElement(&v, &t); err == nil && strings.TrimSpace(v) != "" {
|
||||
name = strings.TrimSpace(v)
|
||||
}
|
||||
case "input", "arguments", "argument", "args", "params":
|
||||
var v string
|
||||
if err := dec.DecodeElement(&v, &t); err == nil && strings.TrimSpace(v) != "" {
|
||||
if parsed := parseToolCallInput(strings.TrimSpace(v)); len(parsed) > 0 {
|
||||
for k, vv := range parsed {
|
||||
params[k] = vv
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
if inParams || inTool {
|
||||
var v string
|
||||
@@ -210,6 +219,13 @@ func parseInvokeFunctionCallStyle(text string) (ParsedToolCall, bool) {
|
||||
input[k] = v
|
||||
}
|
||||
}
|
||||
if len(input) == 0 {
|
||||
if argsRaw := findMarkupTagValue(m[2], toolCallMarkupArgsTagNames, toolCallMarkupArgsPatternByTag); argsRaw != "" {
|
||||
input = parseMarkupInput(argsRaw)
|
||||
} else if kv := parseMarkupKVObject(m[2]); len(kv) > 0 {
|
||||
input = kv
|
||||
}
|
||||
}
|
||||
return ParsedToolCall{Name: name, Input: input}, true
|
||||
}
|
||||
|
||||
|
||||
@@ -271,3 +271,11 @@ func TestParseToolCallsSupportsMultipleAntmlFunctionCalls(t *testing.T) {
|
||||
t.Fatalf("expected canonical names [bash read], got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsDoesNotAcceptMismatchedMarkupTags(t *testing.T) {
|
||||
text := `<tool_call><name>read_file</function><arguments>{"path":"README.md"}</arguments></tool_call>`
|
||||
calls := ParseToolCalls(text, []string{"read_file"})
|
||||
if len(calls) != 0 {
|
||||
t.Fatalf("expected mismatched tags to be rejected, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
{
|
||||
"calls": []
|
||||
}
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": true,
|
||||
"rejectedToolNames": [
|
||||
"unknown_tool"
|
||||
]
|
||||
}
|
||||
@@ -6,5 +6,8 @@
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
{
|
||||
"calls": []
|
||||
}
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": false,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
13
tests/compat/expected/toolcalls_function_call_tag.json
Normal file
13
tests/compat/expected/toolcalls_function_call_tag.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
13
tests/compat/expected/toolcalls_invoke_attr.json
Normal file
13
tests/compat/expected/toolcalls_invoke_attr.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
13
tests/compat/expected/toolcalls_loose_normalize.json
Normal file
13
tests/compat/expected/toolcalls_loose_normalize.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
{
|
||||
"calls": []
|
||||
}
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": false,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,3 +1,6 @@
|
||||
{
|
||||
"calls": []
|
||||
}
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -6,5 +6,8 @@
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
@@ -1,3 +1,8 @@
|
||||
{
|
||||
"calls": []
|
||||
}
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": true,
|
||||
"rejectedToolNames": [
|
||||
"unknown_tool"
|
||||
]
|
||||
}
|
||||
13
tests/compat/expected/toolcalls_xml_tool_call.json
Normal file
13
tests/compat/expected/toolcalls_xml_tool_call.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
}
|
||||
6
tests/compat/fixtures/toolcalls/function_call_tag.json
Normal file
6
tests/compat/fixtures/toolcalls/function_call_tag.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"text": "<function_call><function>read_file</function><parameters>{\"path\":\"README.MD\"}</parameters></function_call>",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
6
tests/compat/fixtures/toolcalls/invoke_attr.json
Normal file
6
tests/compat/fixtures/toolcalls/invoke_attr.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"text": "<invoke name=\"read_file\"><argument>{\"path\":\"README.MD\"}</argument></invoke>",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
6
tests/compat/fixtures/toolcalls/loose_normalize.json
Normal file
6
tests/compat/fixtures/toolcalls/loose_normalize.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"read-file\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"text": "{\"tool_calls\":[{\"name\":\"company.fs.read_file\",\"input\":{\"path\":\"README.MD\"}}]}",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
6
tests/compat/fixtures/toolcalls/xml_tool_call.json
Normal file
6
tests/compat/fixtures/toolcalls/xml_tool_call.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"text": "<tool_call><name>read_file</name><arguments>{\"path\":\"README.MD\"}</arguments></tool_call>",
|
||||
"tool_names": [
|
||||
"read_file"
|
||||
]
|
||||
}
|
||||
@@ -6,7 +6,7 @@ const fs = require('node:fs');
|
||||
const path = require('node:path');
|
||||
|
||||
const chatStream = require('../../api/chat-stream.js');
|
||||
const { parseToolCalls, parseStandaloneToolCalls } = require('../../internal/js/helpers/stream-tool-sieve.js');
|
||||
const { parseToolCallsDetailed, parseStandaloneToolCallsDetailed } = require('../../internal/js/helpers/stream-tool-sieve.js');
|
||||
|
||||
const { parseChunkForContent, estimateTokens } = chatStream.__test;
|
||||
|
||||
@@ -44,9 +44,12 @@ test('js compat: toolcall fixtures', () => {
|
||||
const fixture = readJSON(path.join(fixtureDir, file));
|
||||
const expected = readJSON(path.join(expectedDir, `toolcalls_${name}.json`));
|
||||
const mode = typeof fixture.mode === 'string' ? fixture.mode.trim().toLowerCase() : '';
|
||||
const parser = mode === 'standalone' ? parseStandaloneToolCalls : parseToolCalls;
|
||||
const parser = mode === 'standalone' ? parseStandaloneToolCallsDetailed : parseToolCallsDetailed;
|
||||
const got = parser(fixture.text, fixture.tool_names || []);
|
||||
assert.deepEqual(got, expected.calls, `${name}: calls mismatch`);
|
||||
assert.deepEqual(got.calls, expected.calls, `${name}: calls mismatch`);
|
||||
assert.equal(got.sawToolCallSyntax, expected.sawToolCallSyntax, `${name}: sawToolCallSyntax mismatch`);
|
||||
assert.equal(got.rejectedByPolicy, expected.rejectedByPolicy, `${name}: rejectedByPolicy mismatch`);
|
||||
assert.deepEqual(got.rejectedToolNames, expected.rejectedToolNames, `${name}: rejectedToolNames mismatch`);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -264,3 +264,9 @@ test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
|
||||
assert.equal(second.length, 1);
|
||||
assert.equal(first[0].id, second[0].id);
|
||||
});
|
||||
|
||||
test('parseToolCalls rejects mismatched markup tags', () => {
|
||||
const payload = '<tool_call><name>read_file</function><arguments>{"path":"README.md"}</arguments></tool_call>';
|
||||
const calls = parseToolCalls(payload, ['read_file']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user