package toolcall import ( "strings" "unicode/utf8" ) type toolMarkupNameAlias struct { raw string canonical string dsmlOnly bool } var toolMarkupNames = []toolMarkupNameAlias{ {raw: "tool_calls", canonical: "tool_calls"}, {raw: "tool-calls", canonical: "tool_calls", dsmlOnly: true}, {raw: "invoke", canonical: "invoke"}, {raw: "parameter", canonical: "parameter"}, } type ToolMarkupTag struct { Start int End int NameStart int NameEnd int Name string Closing bool SelfClosing bool DSMLLike bool Canonical bool } func ContainsToolMarkupSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) { for i := 0; i < len(text); { next, advanced, blocked := skipXMLIgnoredSection(text, i) if blocked { return hasDSML, hasCanonical } if advanced { i = next continue } if tag, ok := scanToolMarkupTagAt(text, i); ok { if tag.DSMLLike { hasDSML = true } else { hasCanonical = true } if hasDSML && hasCanonical { return true, true } i = tag.End + 1 continue } i++ } return hasDSML, hasCanonical } func ContainsToolCallWrapperSyntaxOutsideIgnored(text string) (hasDSML, hasCanonical bool) { for i := 0; i < len(text); { next, advanced, blocked := skipXMLIgnoredSection(text, i) if blocked { return hasDSML, hasCanonical } if advanced { i = next continue } if tag, ok := scanToolMarkupTagAt(text, i); ok { if tag.Name != "tool_calls" { i = tag.End + 1 continue } if tag.DSMLLike { hasDSML = true } else { hasCanonical = true } if hasDSML && hasCanonical { return true, true } i = tag.End + 1 continue } i++ } return hasDSML, hasCanonical } func FindToolMarkupTagOutsideIgnored(text string, start int) (ToolMarkupTag, bool) { for i := maxInt(start, 0); i < len(text); { next, advanced, blocked := skipXMLIgnoredSection(text, i) if blocked { return ToolMarkupTag{}, false } if advanced { i = next continue } if tag, ok := scanToolMarkupTagAt(text, i); ok { return tag, true } i++ } return ToolMarkupTag{}, false } func FindMatchingToolMarkupClose(text string, open ToolMarkupTag) (ToolMarkupTag, bool) { if text == "" || open.Name == "" || open.Closing || open.End >= len(text) { return ToolMarkupTag{}, false } depth := 1 for pos := open.End + 1; pos < len(text); { tag, ok := FindToolMarkupTagOutsideIgnored(text, pos) if !ok { return ToolMarkupTag{}, false } if tag.Name != open.Name { pos = tag.End + 1 continue } if tag.Closing { depth-- if depth == 0 { return tag, true } } else if !tag.SelfClosing { depth++ } pos = tag.End + 1 } return ToolMarkupTag{}, false } func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { if start < 0 || start >= len(text) || text[start] != '<' { return ToolMarkupTag{}, false } i := start + 1 for i < len(text) && text[i] == '<' { i++ } closing := false if i < len(text) && text[i] == '/' { closing = true i++ } prefixStart := i i, dsmlLike := consumeToolMarkupNamePrefix(text, i) name, nameLen := matchToolMarkupName(text, i, dsmlLike) if nameLen == 0 { fallbackName, fallbackStart, fallbackLen, ok := matchToolMarkupNameAfterArbitraryPrefix(text, prefixStart) if !ok { return ToolMarkupTag{}, false } name = fallbackName i = fallbackStart nameLen = fallbackLen dsmlLike = true } nameEnd := i + nameLen nameEndBeforePipes := nameEnd for next, ok := consumeToolMarkupPipe(text, nameEnd); ok; next, ok = consumeToolMarkupPipe(text, nameEnd) { nameEnd = next } hasTrailingPipe := nameEnd > nameEndBeforePipes if !hasToolMarkupBoundary(text, nameEnd) { return ToolMarkupTag{}, false } end := findXMLTagEnd(text, nameEnd) if end < 0 { if !hasTrailingPipe { return ToolMarkupTag{}, false } end = nameEnd - 1 } if hasTrailingPipe { if nextLT := strings.IndexByte(text[nameEnd:], '<'); nextLT >= 0 && end >= nameEnd+nextLT { end = nameEnd - 1 } } trimmed := strings.TrimSpace(text[start : end+1]) return ToolMarkupTag{ Start: start, End: end, NameStart: i, NameEnd: nameEnd, Name: name, Closing: closing, SelfClosing: strings.HasSuffix(trimmed, "/>"), DSMLLike: dsmlLike, Canonical: !dsmlLike, }, true } func IsPartialToolMarkupTagPrefix(text string) bool { if text == "" || text[0] != '<' || strings.Contains(text, ">") || strings.Contains(text, ">") { return false } i := 1 for i < len(text) && text[i] == '<' { i++ } if i >= len(text) { return true } if text[i] == '/' { i++ } for i <= len(text) { if i == len(text) { return true } if hasToolMarkupNamePrefix(text, i) { return true } if hasASCIIPartialPrefixFoldAt(text, i, "dsml") { return true } if hasPartialToolMarkupNameAfterArbitraryPrefix(text, i) { return true } next, ok := consumeToolMarkupNamePrefixOnce(text, i) if !ok { return false } i = next } return false } func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) { dsmlLike := false for { next, ok := consumeToolMarkupNamePrefixOnce(text, idx) if !ok { return idx, dsmlLike } idx = next dsmlLike = true } } func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) { if next, ok := consumeToolMarkupPipe(text, idx); ok { return next, true } if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') { return idx + 1, true } if hasASCIIPrefixFoldAt(text, idx, "dsml") { dsmlLen, _ := matchASCIIPrefixFoldAt(text, idx, "dsml") next := idx + dsmlLen if sep, size := normalizedASCIIAt(text, next); sep == '-' || sep == '_' { next += size } return next, true } if next, ok := consumeArbitraryToolMarkupNamePrefix(text, idx); ok { return next, true } return idx, false } func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) { nextSegment, ok := consumeToolMarkupPrefixSegment(text, idx) if !ok { return idx, false } j := nextSegment for { nextSegment, ok = consumeToolMarkupPrefixSegment(text, j) if !ok { break } j = nextSegment } k := j for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') { k++ } next, ok := consumeToolMarkupPipe(text, k) if !ok { if sep, size := normalizedASCIIAt(text, k); sep == '_' || sep == '-' { next = k + size ok = true } } if !ok { return idx, false } for next < len(text) && (text[next] == ' ' || text[next] == '\t' || text[next] == '\r' || text[next] == '\n') { next++ } if !hasToolMarkupNamePrefix(text, next) { return idx, false } return next, true } func consumeToolMarkupPrefixSegment(text string, idx int) (int, bool) { ch, size := normalizedASCIIAt(text, idx) if size <= 0 { return idx, false } if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { return idx + size, true } return idx, false } func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool { if start < 0 || start >= len(text) { return false } idx := start matched := 0 for matched < len(prefix) && idx < len(text) { ch, size := normalizedASCIIAt(text, idx) if size <= 0 || asciiLower(ch) != asciiLower(prefix[matched]) { return false } idx += size matched++ } return matched > 0 && matched < len(prefix) && idx == len(text) } func hasToolMarkupNamePrefix(text string, start int) bool { for _, name := range toolMarkupNames { if hasASCIIPrefixFoldAt(text, start, name.raw) { return true } if hasASCIIPartialPrefixFoldAt(text, start, name.raw) { return true } } return false } func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) { for _, name := range toolMarkupNames { if name.dsmlOnly && !dsmlLike { continue } if nameLen, ok := matchASCIIPrefixFoldAt(text, start, name.raw); ok { return name.canonical, nameLen } } return "", 0 } func matchToolMarkupNameAfterArbitraryPrefix(text string, start int) (string, int, int, bool) { for idx := start; idx < len(text); { if isToolMarkupTagTerminator(text, idx) { return "", 0, 0, false } for _, name := range toolMarkupNames { nameLen, ok := matchASCIIPrefixFoldAt(text, idx, name.raw) if !ok { continue } if !toolMarkupPrefixAllowsLocalName(text[start:idx]) { continue } return name.canonical, idx, nameLen, true } _, size := utf8.DecodeRuneInString(text[idx:]) if size <= 0 { size = 1 } idx += size } return "", 0, 0, false } func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool { for idx := start; idx < len(text); { if isToolMarkupTagTerminator(text, idx) { return false } if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasToolMarkupNamePrefix(text, idx) { return true } _, size := utf8.DecodeRuneInString(text[idx:]) if size <= 0 { size = 1 } idx += size } return false } func toolMarkupPrefixAllowsLocalName(prefix string) bool { if prefix == "" { return false } if strings.Contains(normalizedASCIILowerString(prefix), "dsml") { return true } if strings.ContainsAny(prefix, "=\"'") { return false } r, _ := utf8.DecodeLastRuneInString(prefix) r = normalizeFullwidthASCII(r) return (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') && (r < '0' || r > '9') } func normalizedASCIILowerString(text string) string { var b strings.Builder b.Grow(len(text)) for _, r := range text { r = normalizeFullwidthASCII(r) if r >= 'A' && r <= 'Z' { r += 'a' - 'A' } if r <= 0x7f { b.WriteRune(r) } } return b.String() } func isToolMarkupTagTerminator(text string, idx int) bool { if idx >= len(text) { return false } if text[idx] == '>' { return true } r, _ := utf8.DecodeRuneInString(text[idx:]) return normalizeFullwidthASCII(r) == '>' } func consumeToolMarkupPipe(text string, idx int) (int, bool) { if idx >= len(text) { return idx, false } if text[idx] == '|' { return idx + 1, true } if text[idx] == '\x02' { return idx + 1, true } if strings.HasPrefix(text[idx:], "|") { return idx + len("|"), true } if strings.HasPrefix(text[idx:], "␂") { return idx + len("␂"), true } return idx, false } func hasToolMarkupBoundary(text string, idx int) bool { if idx >= len(text) { return true } switch text[idx] { case ' ', '\t', '\n', '\r', '>', '/': return true default: r, _ := utf8.DecodeRuneInString(text[idx:]) return normalizeFullwidthASCII(r) == '>' } } func normalizedASCIIAt(text string, idx int) (byte, int) { if idx < 0 || idx >= len(text) { return 0, 0 } r, size := utf8.DecodeRuneInString(text[idx:]) if r == utf8.RuneError && size == 0 { return 0, 0 } normalized := normalizeFullwidthASCII(r) if normalized > 0x7f { return 0, 0 } return byte(normalized), size } func normalizeFullwidthASCII(r rune) rune { if r >= '!' && r <= '~' { return r - 0xFEE0 } return r }