mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-15 13:45:10 +08:00
feat(toolcall): harden confusable candidate spans
This commit is contained in:
@@ -34,6 +34,14 @@ func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) {
|
||||
raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
if got != "ABCDE" {
|
||||
t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) {
|
||||
raw := "Answer prefix<think>internal reasoning that never closes"
|
||||
got := sanitizeLeakedOutput(raw)
|
||||
|
||||
@@ -18,10 +18,16 @@ var leakedThinkTagPattern = regexp.MustCompile(`(?is)</?\s*think\s*>`)
|
||||
// - U+2581 variant: <|begin▁of▁sentence|>
|
||||
var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`)
|
||||
|
||||
// leakedThoughtMarkerPattern matches leaked thought control markers in both
|
||||
// explicit and compact forms:
|
||||
// - ASCII underscore: <| of_thought |>, <| begin_of_thought |>
|
||||
// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|>
|
||||
var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[|\|]>`)
|
||||
|
||||
// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms:
|
||||
// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|>
|
||||
// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|>
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`)
|
||||
|
||||
// leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through
|
||||
// when the sieve fails to capture them. These are applied only to complete
|
||||
@@ -48,6 +54,7 @@ func sanitizeLeakedOutput(text string) string {
|
||||
out = stripDanglingThinkSuffix(out)
|
||||
out = leakedThinkTagPattern.ReplaceAllString(out, "")
|
||||
out = leakedBOSMarkerPattern.ReplaceAllString(out, "")
|
||||
out = leakedThoughtMarkerPattern.ReplaceAllString(out, "")
|
||||
out = leakedMetaMarkerPattern.ReplaceAllString(out, "")
|
||||
out = stripLeakedToolCallWrapperBlocks(out)
|
||||
out = sanitizeLeakedAgentXMLBlocks(out)
|
||||
|
||||
@@ -7,6 +7,10 @@ const {
|
||||
SKIP_EXACT_PATHS,
|
||||
} = require('../shared/deepseek-constants');
|
||||
|
||||
const LEAKED_BOS_MARKER_PATTERN = /<[||]\s*begin[_▁]of[_▁]sentence\s*[||]>/gi;
|
||||
const LEAKED_THOUGHT_MARKER_PATTERN = /<[||]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[||]>/gi;
|
||||
const LEAKED_META_MARKER_PATTERN = /<[||]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[||]>/gi;
|
||||
|
||||
|
||||
|
||||
function stripThinkTags(text) {
|
||||
@@ -621,7 +625,11 @@ function stripReferenceMarkersText(text) {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, '');
|
||||
return text
|
||||
.replace(/\[(?:citation|reference):\s*\d+\]/gi, '')
|
||||
.replace(LEAKED_BOS_MARKER_PATTERN, '')
|
||||
.replace(LEAKED_THOUGHT_MARKER_PATTERN, '')
|
||||
.replace(LEAKED_META_MARKER_PATTERN, '');
|
||||
}
|
||||
|
||||
function asString(v) {
|
||||
|
||||
@@ -7,6 +7,9 @@ const {
|
||||
parseMarkupToolCalls,
|
||||
stripFencedCodeBlocks,
|
||||
containsToolCallWrapperSyntaxOutsideIgnored,
|
||||
normalizeDSMLToolCallMarkup,
|
||||
hasRepairableXMLToolCallsWrapper,
|
||||
indexToolCDATAOpen,
|
||||
sanitizeLooseCDATA,
|
||||
} = require('./parse_payload');
|
||||
|
||||
@@ -37,19 +40,23 @@ function parseToolCalls(text, toolNames) {
|
||||
|
||||
function parseToolCallsDetailed(text, toolNames) {
|
||||
const result = emptyParseResult();
|
||||
const normalized = toStringSafe(text);
|
||||
if (!normalized) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(raw)) {
|
||||
return result;
|
||||
}
|
||||
const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim());
|
||||
if (!normalized.ok || !normalized.text) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text);
|
||||
// XML markup parsing only.
|
||||
let parsed = parseMarkupToolCalls(normalized);
|
||||
if (parsed.length === 0 && normalized.toLowerCase().includes('<![cdata[')) {
|
||||
const recovered = sanitizeLooseCDATA(normalized);
|
||||
if (recovered !== normalized) {
|
||||
let parsed = parseMarkupToolCalls(normalized.text);
|
||||
if (parsed.length === 0 && indexToolCDATAOpen(normalized.text, 0) >= 0) {
|
||||
const recovered = sanitizeLooseCDATA(normalized.text);
|
||||
if (recovered !== normalized.text) {
|
||||
parsed = parseMarkupToolCalls(recovered);
|
||||
}
|
||||
}
|
||||
@@ -70,19 +77,23 @@ function parseStandaloneToolCalls(text, toolNames) {
|
||||
|
||||
function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
const result = emptyParseResult();
|
||||
const trimmed = toStringSafe(text);
|
||||
if (!trimmed) {
|
||||
const raw = toStringSafe(text);
|
||||
if (!raw) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(raw)) {
|
||||
return result;
|
||||
}
|
||||
const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim());
|
||||
if (!normalized.ok || !normalized.text) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text);
|
||||
// XML markup parsing only.
|
||||
let parsed = parseMarkupToolCalls(trimmed);
|
||||
if (parsed.length === 0 && trimmed.toLowerCase().includes('<![cdata[')) {
|
||||
const recovered = sanitizeLooseCDATA(trimmed);
|
||||
if (recovered !== trimmed) {
|
||||
let parsed = parseMarkupToolCalls(normalized.text);
|
||||
if (parsed.length === 0 && indexToolCDATAOpen(normalized.text, 0) >= 0) {
|
||||
const recovered = sanitizeLooseCDATA(normalized.text);
|
||||
if (recovered !== normalized.text) {
|
||||
parsed = parseMarkupToolCalls(recovered);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -114,6 +114,39 @@ function hasOpenXMLToolTag(captured) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function shouldKeepBareInvokeCapture(captured) {
|
||||
const invokeTag = findFirstToolTag(captured, 0, 'invoke', false);
|
||||
if (!invokeTag) {
|
||||
return false;
|
||||
}
|
||||
const wrapperOpen = findFirstToolTag(captured, 0, 'tool_calls', false);
|
||||
if (wrapperOpen && wrapperOpen.start <= invokeTag.start) {
|
||||
return false;
|
||||
}
|
||||
const closeTag = findFirstToolTag(captured, invokeTag.start + 1, 'tool_calls', true);
|
||||
if (closeTag && closeTag.start > invokeTag.start) {
|
||||
return true;
|
||||
}
|
||||
const startEnd = invokeTag.end;
|
||||
if (startEnd < 0) {
|
||||
return true;
|
||||
}
|
||||
const body = captured.slice(startEnd + 1);
|
||||
const trimmedBody = body.replace(/^[ \t\r\n]+/, '');
|
||||
if (!trimmedBody) {
|
||||
return true;
|
||||
}
|
||||
const invokeCloseTag = findFirstToolTag(captured, startEnd + 1, 'invoke', true);
|
||||
if (invokeCloseTag) {
|
||||
return captured.slice(invokeCloseTag.end + 1).trim() === '';
|
||||
}
|
||||
const paramTag = findFirstToolTag(body, 0, 'parameter', false);
|
||||
if (paramTag && body.slice(0, paramTag.start).trim() === '') {
|
||||
return true;
|
||||
}
|
||||
return trimmedBody.startsWith('{') || trimmedBody.startsWith('[');
|
||||
}
|
||||
|
||||
function findFirstToolTag(text, from, name, closing) {
|
||||
for (let pos = Math.max(0, from || 0); pos < text.length;) {
|
||||
const tag = findToolMarkupTagOutsideIgnored(text, pos);
|
||||
@@ -131,5 +164,6 @@ function findFirstToolTag(text, from, name, closing) {
|
||||
module.exports = {
|
||||
consumeXMLToolCapture,
|
||||
hasOpenXMLToolTag,
|
||||
shouldKeepBareInvokeCapture,
|
||||
findPartialXMLToolTagStart: findPartialToolMarkupStart,
|
||||
};
|
||||
|
||||
@@ -12,6 +12,7 @@ const {
|
||||
const {
|
||||
consumeXMLToolCapture: consumeXMLToolCaptureImpl,
|
||||
hasOpenXMLToolTag,
|
||||
shouldKeepBareInvokeCapture,
|
||||
findPartialXMLToolTagStart,
|
||||
} = require('./sieve-xml');
|
||||
function processToolSieveChunk(state, chunk, toolNames) {
|
||||
@@ -203,6 +204,9 @@ function consumeToolCapture(state, toolNames) {
|
||||
if (hasOpenXMLToolTag(captured)) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
if (shouldKeepBareInvokeCapture(captured)) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
|
||||
// No XML tool tags detected — release captured content as text.
|
||||
return {
|
||||
|
||||
@@ -1,4 +1,689 @@
|
||||
package toolcall
|
||||
|
||||
// toolcalls_candidates.go is reserved for tool-call candidate helper logic.
|
||||
// It exists to satisfy the refactor line gate target list.
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type canonicalToolMarkupAttr struct {
|
||||
Key string
|
||||
Value string
|
||||
}
|
||||
|
||||
func canonicalizeToolCallCandidateSpans(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
for i := 0; i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(text, i)
|
||||
if blocked {
|
||||
b.WriteString(text[i:])
|
||||
break
|
||||
}
|
||||
if advanced {
|
||||
b.WriteString(text[i:next])
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
tag, ok := scanToolMarkupTagAt(text, i)
|
||||
if !ok {
|
||||
b.WriteByte(text[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
b.WriteString(canonicalizeRecognizedToolMarkupTag(text[tag.Start:tag.End+1], tag))
|
||||
i = tag.End + 1
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func canonicalizeRecognizedToolMarkupTag(raw string, tag ToolMarkupTag) string {
|
||||
if raw == "" {
|
||||
return raw
|
||||
}
|
||||
idx := 0
|
||||
if delimLen := xmlTagStartDelimiterLenAt(raw, idx); delimLen > 0 {
|
||||
idx += delimLen
|
||||
}
|
||||
for {
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
if delimLen := xmlTagStartDelimiterLenAt(raw, idx); delimLen > 0 {
|
||||
idx += delimLen
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
if tag.Closing {
|
||||
if next, ok := consumeToolMarkupClosingSlash(raw, idx); ok {
|
||||
idx = next
|
||||
}
|
||||
}
|
||||
idx, _ = consumeToolMarkupNamePrefix(raw, idx)
|
||||
afterName, ok := consumeToolKeyword(raw, idx, rawNameForTag(tag))
|
||||
if !ok {
|
||||
afterName = idx
|
||||
}
|
||||
|
||||
attrs := parseCanonicalToolMarkupAttrs(raw, afterName)
|
||||
|
||||
var b strings.Builder
|
||||
b.Grow(len(raw) + 8)
|
||||
b.WriteByte('<')
|
||||
if tag.Closing {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
if tag.DSMLLike {
|
||||
b.WriteString("|DSML|")
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
for _, attr := range attrs {
|
||||
if attr.Key == "" {
|
||||
continue
|
||||
}
|
||||
b.WriteByte(' ')
|
||||
b.WriteString(attr.Key)
|
||||
b.WriteString(`="`)
|
||||
b.WriteString(quoteCanonicalXMLAttrValue(attr.Value))
|
||||
b.WriteByte('"')
|
||||
}
|
||||
if tag.SelfClosing {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteByte('>')
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func rawNameForTag(tag ToolMarkupTag) string {
|
||||
for _, name := range toolMarkupNames {
|
||||
if name.canonical == tag.Name {
|
||||
return name.raw
|
||||
}
|
||||
}
|
||||
return tag.Name
|
||||
}
|
||||
|
||||
func parseCanonicalToolMarkupAttrs(raw string, idx int) []canonicalToolMarkupAttr {
|
||||
if raw == "" || idx >= len(raw) {
|
||||
return nil
|
||||
}
|
||||
var out []canonicalToolMarkupAttr
|
||||
for idx < len(raw) {
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
if idx >= len(raw) {
|
||||
break
|
||||
}
|
||||
if spacingLen := toolMarkupWhitespaceLikeLenAt(raw, idx); spacingLen > 0 {
|
||||
idx += spacingLen
|
||||
continue
|
||||
}
|
||||
if xmlTagEndDelimiterLenAt(raw, idx) > 0 {
|
||||
break
|
||||
}
|
||||
if next, ok := consumeToolMarkupPipe(raw, idx); ok {
|
||||
idx = next
|
||||
continue
|
||||
}
|
||||
if next, ok := consumeToolMarkupClosingSlash(raw, idx); ok {
|
||||
idx = next
|
||||
continue
|
||||
}
|
||||
|
||||
keyStart := idx
|
||||
for idx < len(raw) {
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
if idx >= len(raw) {
|
||||
break
|
||||
}
|
||||
if spacingLen := toolMarkupWhitespaceLikeLenAt(raw, idx); spacingLen > 0 {
|
||||
break
|
||||
}
|
||||
if toolMarkupEqualsLenAt(raw, idx) > 0 || xmlTagEndDelimiterLenAt(raw, idx) > 0 {
|
||||
break
|
||||
}
|
||||
if _, ok := consumeToolMarkupPipe(raw, idx); ok {
|
||||
break
|
||||
}
|
||||
if _, ok := consumeToolMarkupClosingSlash(raw, idx); ok {
|
||||
break
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(raw[idx:])
|
||||
if size <= 0 {
|
||||
idx++
|
||||
} else {
|
||||
idx += size
|
||||
}
|
||||
}
|
||||
keyEnd := idx
|
||||
key := normalizeCanonicalToolAttrKey(raw[keyStart:keyEnd])
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
for {
|
||||
spacingLen := toolMarkupWhitespaceLikeLenAt(raw, idx)
|
||||
if spacingLen == 0 {
|
||||
break
|
||||
}
|
||||
idx += spacingLen
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
}
|
||||
if eqLen := toolMarkupEqualsLenAt(raw, idx); eqLen > 0 {
|
||||
idx += eqLen
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
for {
|
||||
spacingLen := toolMarkupWhitespaceLikeLenAt(raw, idx)
|
||||
if spacingLen == 0 {
|
||||
break
|
||||
}
|
||||
idx += spacingLen
|
||||
idx = skipToolMarkupIgnorables(raw, idx)
|
||||
}
|
||||
if key == "" {
|
||||
_, size := utf8.DecodeRuneInString(raw[idx:])
|
||||
if size <= 0 {
|
||||
idx++
|
||||
} else {
|
||||
idx += size
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
value := ""
|
||||
if quote, quoteLen := xmlQuotePairAt(raw, idx); quoteLen > 0 {
|
||||
valueStart := idx + quoteLen
|
||||
idx = valueStart
|
||||
for idx < len(raw) {
|
||||
if closeLen := xmlQuoteCloseDelimiterLenAt(raw, idx, quote); closeLen > 0 {
|
||||
value = raw[valueStart:idx]
|
||||
idx += closeLen
|
||||
break
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(raw[idx:])
|
||||
if size <= 0 {
|
||||
idx++
|
||||
} else {
|
||||
idx += size
|
||||
}
|
||||
}
|
||||
} else {
|
||||
valueStart := idx
|
||||
for idx < len(raw) {
|
||||
if spacingLen := toolMarkupWhitespaceLikeLenAt(raw, idx); spacingLen > 0 {
|
||||
break
|
||||
}
|
||||
if xmlTagEndDelimiterLenAt(raw, idx) > 0 || toolMarkupEqualsLenAt(raw, idx) > 0 {
|
||||
break
|
||||
}
|
||||
if _, ok := consumeToolMarkupPipe(raw, idx); ok {
|
||||
break
|
||||
}
|
||||
if _, ok := consumeToolMarkupClosingSlash(raw, idx); ok {
|
||||
break
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(raw[idx:])
|
||||
if size <= 0 {
|
||||
idx++
|
||||
} else {
|
||||
idx += size
|
||||
}
|
||||
}
|
||||
value = raw[valueStart:idx]
|
||||
}
|
||||
|
||||
out = append(out, canonicalToolMarkupAttr{
|
||||
Key: key,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeCanonicalToolAttrKey(raw string) string {
|
||||
trimmed := strings.TrimSpace(removeToolMarkupIgnorables(raw))
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
if next, ok := consumeToolKeyword(trimmed, 0, "name"); ok {
|
||||
if skipToolMarkupIgnorables(trimmed, next) == len(trimmed) {
|
||||
return "name"
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func quoteCanonicalXMLAttrValue(raw string) string {
|
||||
if raw == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.ReplaceAll(raw, `"`, """)
|
||||
}
|
||||
|
||||
func removeToolMarkupIgnorables(raw string) string {
|
||||
if raw == "" {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(raw))
|
||||
for i := 0; i < len(raw); {
|
||||
if ignorableLen := toolMarkupIgnorableLenAt(raw, i); ignorableLen > 0 {
|
||||
i += ignorableLen
|
||||
continue
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(raw[i:])
|
||||
if size <= 0 {
|
||||
b.WriteByte(raw[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
b.WriteRune(r)
|
||||
i += size
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func skipToolMarkupIgnorables(text string, idx int) int {
|
||||
for idx < len(text) {
|
||||
if ignorableLen := toolMarkupIgnorableLenAt(text, idx); ignorableLen > 0 {
|
||||
idx += ignorableLen
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
func toolMarkupIgnorableLenAt(text string, idx int) int {
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(text[idx:])
|
||||
if size <= 0 {
|
||||
return 0
|
||||
}
|
||||
if unicode.Is(unicode.Cf, r) {
|
||||
return size
|
||||
}
|
||||
if unicode.IsControl(r) && !unicode.IsSpace(r) {
|
||||
return size
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func toolMarkupEqualsLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '=':
|
||||
return 1
|
||||
case strings.HasPrefix(text[idx:], "="):
|
||||
return len("=")
|
||||
case strings.HasPrefix(text[idx:], "﹦"):
|
||||
return len("﹦")
|
||||
case strings.HasPrefix(text[idx:], "꞊"):
|
||||
return len("꞊")
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func toolMarkupDashLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '-':
|
||||
return 1
|
||||
case strings.HasPrefix(text[idx:], "‐"):
|
||||
return len("‐")
|
||||
case strings.HasPrefix(text[idx:], "‑"):
|
||||
return len("‑")
|
||||
case strings.HasPrefix(text[idx:], "‒"):
|
||||
return len("‒")
|
||||
case strings.HasPrefix(text[idx:], "–"):
|
||||
return len("–")
|
||||
case strings.HasPrefix(text[idx:], "—"):
|
||||
return len("—")
|
||||
case strings.HasPrefix(text[idx:], "―"):
|
||||
return len("―")
|
||||
case strings.HasPrefix(text[idx:], "−"):
|
||||
return len("−")
|
||||
case strings.HasPrefix(text[idx:], "﹣"):
|
||||
return len("﹣")
|
||||
case strings.HasPrefix(text[idx:], "-"):
|
||||
return len("-")
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func toolMarkupUnderscoreLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '_':
|
||||
return 1
|
||||
case strings.HasPrefix(text[idx:], "_"):
|
||||
return len("_")
|
||||
case strings.HasPrefix(text[idx:], "﹍"):
|
||||
return len("﹍")
|
||||
case strings.HasPrefix(text[idx:], "﹎"):
|
||||
return len("﹎")
|
||||
case strings.HasPrefix(text[idx:], "﹏"):
|
||||
return len("﹏")
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func consumeToolKeyword(text string, idx int, keyword string) (int, bool) {
|
||||
next := idx
|
||||
for i := 0; i < len(keyword); i++ {
|
||||
next = skipToolMarkupIgnorables(text, next)
|
||||
if next >= len(text) {
|
||||
return idx, false
|
||||
}
|
||||
target := asciiLower(keyword[i])
|
||||
switch target {
|
||||
case '_':
|
||||
if underscoreLen := toolMarkupUnderscoreLenAt(text, next); underscoreLen > 0 {
|
||||
next += underscoreLen
|
||||
continue
|
||||
}
|
||||
return idx, false
|
||||
case '-':
|
||||
if dashLen := toolMarkupDashLenAt(text, next); dashLen > 0 {
|
||||
next += dashLen
|
||||
continue
|
||||
}
|
||||
return idx, false
|
||||
default:
|
||||
r, size := utf8.DecodeRuneInString(text[next:])
|
||||
if size <= 0 {
|
||||
return idx, false
|
||||
}
|
||||
folded, ok := foldToolKeywordRune(r)
|
||||
if !ok || folded != target {
|
||||
return idx, false
|
||||
}
|
||||
next += size
|
||||
}
|
||||
}
|
||||
return next, true
|
||||
}
|
||||
|
||||
func foldToolKeywordRune(r rune) (byte, bool) {
|
||||
if r >= 'A' && r <= 'Z' {
|
||||
r = r - 'A' + 'A'
|
||||
}
|
||||
if r >= 'a' && r <= 'z' {
|
||||
r = r - 'a' + 'a'
|
||||
}
|
||||
r = unicode.ToLower(r)
|
||||
switch r {
|
||||
case 'a', 'c', 'd', 'e', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'v':
|
||||
return byte(r), true
|
||||
case 'а', 'Α', 'α':
|
||||
return 'a', true
|
||||
case 'с', 'С', 'ϲ', 'Ϲ':
|
||||
return 'c', true
|
||||
case 'ԁ', 'ⅾ':
|
||||
return 'd', true
|
||||
case 'е', 'Е', 'Ε', 'ε':
|
||||
return 'e', true
|
||||
case 'і', 'І', 'Ι', 'ι', 'ı':
|
||||
return 'i', true
|
||||
case 'к', 'К', 'Κ', 'κ':
|
||||
return 'k', true
|
||||
case 'ⅼ':
|
||||
return 'l', true
|
||||
case 'м', 'М', 'Μ', 'μ':
|
||||
return 'm', true
|
||||
case 'ո':
|
||||
return 'n', true
|
||||
case 'о', 'О', 'Ο', 'ο':
|
||||
return 'o', true
|
||||
case 'р', 'Р', 'Ρ', 'ρ':
|
||||
return 'p', true
|
||||
case 'ѕ', 'Ѕ':
|
||||
return 's', true
|
||||
case 'т', 'Т', 'Τ', 'τ':
|
||||
return 't', true
|
||||
case 'ν', 'Ν', 'ѵ', 'ⅴ':
|
||||
return 'v', true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func toolMarkupWhitespaceLikeLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch text[idx] {
|
||||
case ' ', '\t', '\n', '\r':
|
||||
return 1
|
||||
}
|
||||
if strings.HasPrefix(text[idx:], "▁") {
|
||||
return len("▁")
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(text[idx:])
|
||||
if size > 0 && unicode.IsSpace(r) {
|
||||
return size
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func consumeToolMarkupPipe(text string, idx int) (int, bool) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx >= len(text) {
|
||||
return idx, false
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '|':
|
||||
return idx + 1, true
|
||||
case strings.HasPrefix(text[idx:], "|"):
|
||||
return idx + len("|"), true
|
||||
case strings.HasPrefix(text[idx:], "│"):
|
||||
return idx + len("│"), true
|
||||
case strings.HasPrefix(text[idx:], "∣"):
|
||||
return idx + len("∣"), true
|
||||
case strings.HasPrefix(text[idx:], "❘"):
|
||||
return idx + len("❘"), true
|
||||
case strings.HasPrefix(text[idx:], "ǀ"):
|
||||
return idx + len("ǀ"), true
|
||||
case strings.HasPrefix(text[idx:], "│"):
|
||||
return idx + len("│"), true
|
||||
default:
|
||||
return idx, false
|
||||
}
|
||||
}
|
||||
|
||||
func consumeToolMarkupClosingSlash(text string, idx int) (int, bool) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx >= len(text) {
|
||||
return idx, false
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '/':
|
||||
return idx + 1, true
|
||||
case strings.HasPrefix(text[idx:], "/"):
|
||||
return idx + len("/"), true
|
||||
case strings.HasPrefix(text[idx:], "∕"):
|
||||
return idx + len("∕"), true
|
||||
case strings.HasPrefix(text[idx:], "⁄"):
|
||||
return idx + len("⁄"), true
|
||||
case strings.HasPrefix(text[idx:], "⧸"):
|
||||
return idx + len("⧸"), true
|
||||
default:
|
||||
return idx, false
|
||||
}
|
||||
}
|
||||
|
||||
func xmlTagStartDelimiterLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '<':
|
||||
return 1
|
||||
case strings.HasPrefix(text[idx:], "<"):
|
||||
return len("<")
|
||||
case strings.HasPrefix(text[idx:], "﹤"):
|
||||
return len("﹤")
|
||||
case strings.HasPrefix(text[idx:], "〈"):
|
||||
return len("〈")
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func xmlTagEndDelimiterLenAt(text string, idx int) int {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '>':
|
||||
return 1
|
||||
case strings.HasPrefix(text[idx:], ">"):
|
||||
return len(">")
|
||||
case strings.HasPrefix(text[idx:], "﹥"):
|
||||
return len("﹥")
|
||||
case strings.HasPrefix(text[idx:], "〉"):
|
||||
return len("〉")
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func xmlTagEndDelimiterLenEndingAt(text string, end int) int {
|
||||
if end < 0 || end >= len(text) {
|
||||
return 0
|
||||
}
|
||||
if text[end] == '>' {
|
||||
return 1
|
||||
}
|
||||
if end+1 >= len(">") && text[end+1-len(">"):end+1] == ">" {
|
||||
return len(">")
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func xmlQuotePairAt(text string, idx int) (string, int) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return "", 0
|
||||
}
|
||||
switch {
|
||||
case text[idx] == '"':
|
||||
return `"`, 1
|
||||
case text[idx] == '\'':
|
||||
return `'`, 1
|
||||
case strings.HasPrefix(text[idx:], "“"):
|
||||
return "”", len("“")
|
||||
case strings.HasPrefix(text[idx:], "‘"):
|
||||
return "’", len("‘")
|
||||
case strings.HasPrefix(text[idx:], """):
|
||||
return """, len(""")
|
||||
case strings.HasPrefix(text[idx:], "'"):
|
||||
return "'", len("'")
|
||||
case strings.HasPrefix(text[idx:], "„"):
|
||||
return "”", len("„")
|
||||
case strings.HasPrefix(text[idx:], "‟"):
|
||||
return "”", len("‟")
|
||||
default:
|
||||
return "", 0
|
||||
}
|
||||
}
|
||||
|
||||
func xmlQuoteCloseDelimiterLenAt(text string, idx int, quote string) int {
|
||||
if quote == "" || idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
if strings.HasPrefix(text[idx:], quote) {
|
||||
return len(quote)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func hasRepairableXMLToolCallsWrapper(text string) bool {
|
||||
if strings.TrimSpace(text) == "" {
|
||||
return false
|
||||
}
|
||||
if strings.Contains(strings.ToLower(text), "<tool_calls") {
|
||||
return false
|
||||
}
|
||||
closeMatches := xmlToolCallsClosePattern.FindAllStringIndex(text, -1)
|
||||
if len(closeMatches) == 0 {
|
||||
return false
|
||||
}
|
||||
invokeLoc := xmlInvokeStartPattern.FindStringIndex(text)
|
||||
if invokeLoc == nil {
|
||||
return false
|
||||
}
|
||||
closeLoc := closeMatches[len(closeMatches)-1]
|
||||
return invokeLoc[0] < closeLoc[0]
|
||||
}
|
||||
|
||||
func toolCDATAOpenLenAt(text string, idx int) int {
|
||||
start := skipToolMarkupIgnorables(text, idx)
|
||||
ltLen := xmlTagStartDelimiterLenAt(text, start)
|
||||
if ltLen == 0 {
|
||||
return 0
|
||||
}
|
||||
pos := start + ltLen
|
||||
for skipped := 0; skipped <= 4 && pos < len(text); skipped++ {
|
||||
pos = skipToolMarkupIgnorables(text, pos)
|
||||
if pos >= len(text) {
|
||||
return 0
|
||||
}
|
||||
if text[pos] == '[' {
|
||||
pos++
|
||||
next, ok := consumeToolKeyword(text, pos, "cdata")
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
pos = skipToolMarkupIgnorables(text, next)
|
||||
if pos >= len(text) || text[pos] != '[' {
|
||||
return 0
|
||||
}
|
||||
pos++
|
||||
return pos - idx
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(text[pos:])
|
||||
if size <= 0 || !isToolMarkupSeparator(r) {
|
||||
return 0
|
||||
}
|
||||
pos += size
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func indexToolCDATAOpen(text string, start int) int {
|
||||
for i := maxInt(start, 0); i < len(text); i++ {
|
||||
if toolCDATAOpenLenAt(text, i) > 0 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func findTrailingToolCDATACloseStart(text string) int {
|
||||
for i := len(text) - 1; i >= 0; i-- {
|
||||
if closeLen := toolCDATACloseLenAt(text, i); closeLen > 0 && i+closeLen == len(text) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
@@ -2,18 +2,18 @@ package toolcall
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func normalizeDSMLToolCallMarkup(text string) (string, bool) {
|
||||
if text == "" {
|
||||
return "", true
|
||||
}
|
||||
hasAliasLikeMarkup, _ := ContainsToolMarkupSyntaxOutsideIgnored(text)
|
||||
if !hasAliasLikeMarkup {
|
||||
return text, true
|
||||
canonicalized := canonicalizeToolCallCandidateSpans(text)
|
||||
hasDSMLLikeMarkup, hasCanonicalMarkup := ContainsToolMarkupSyntaxOutsideIgnored(canonicalized)
|
||||
if !hasDSMLLikeMarkup && !hasCanonicalMarkup {
|
||||
return canonicalized, true
|
||||
}
|
||||
return rewriteDSMLToolMarkupOutsideIgnored(text), true
|
||||
return rewriteDSMLToolMarkupOutsideIgnored(canonicalized), true
|
||||
}
|
||||
|
||||
func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
@@ -39,76 +39,19 @@ func rewriteDSMLToolMarkupOutsideIgnored(text string) string {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if tag.DSMLLike {
|
||||
b.WriteByte('<')
|
||||
if tag.Closing {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
tail := normalizeToolMarkupTagTailForXML(text[tag.NameEnd : tag.End+1])
|
||||
b.WriteString(tail)
|
||||
if !strings.HasSuffix(tail, ">") {
|
||||
b.WriteByte('>')
|
||||
}
|
||||
i = tag.End + 1
|
||||
continue
|
||||
b.WriteByte('<')
|
||||
if tag.Closing {
|
||||
b.WriteByte('/')
|
||||
}
|
||||
b.WriteString(tag.Name)
|
||||
if delimLen := xmlTagEndDelimiterLenEndingAt(text, tag.End); delimLen > 0 {
|
||||
b.WriteString(text[tag.NameEnd : tag.End+1-delimLen])
|
||||
b.WriteByte('>')
|
||||
} else {
|
||||
b.WriteString(text[tag.NameEnd : tag.End+1])
|
||||
b.WriteByte('>')
|
||||
}
|
||||
b.WriteString(text[tag.Start : tag.End+1])
|
||||
i = tag.End + 1
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func normalizeToolMarkupTagTailForXML(tail string) string {
|
||||
if tail == "" {
|
||||
return ""
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(tail))
|
||||
quote := rune(0)
|
||||
for i := 0; i < len(tail); {
|
||||
r, size := utf8.DecodeRuneInString(tail[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
b.WriteByte(tail[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
ch := normalizeFullwidthASCII(r)
|
||||
if quote != 0 {
|
||||
b.WriteRune(ch)
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
switch ch {
|
||||
case '"', '\'':
|
||||
quote = ch
|
||||
b.WriteRune(ch)
|
||||
case '|', '!':
|
||||
j := i + size
|
||||
for j < len(tail) {
|
||||
next, nextSize := utf8.DecodeRuneInString(tail[j:])
|
||||
if nextSize <= 0 {
|
||||
break
|
||||
}
|
||||
if next == ' ' || next == '\t' || next == '\r' || next == '\n' {
|
||||
j += nextSize
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
next, _ := normalizedASCIIAt(tail, j)
|
||||
if next != '>' {
|
||||
b.WriteRune(ch)
|
||||
}
|
||||
case '>', '/', '=':
|
||||
b.WriteRune(ch)
|
||||
default:
|
||||
b.WriteString(tail[i : i+size])
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
@@ -105,32 +105,18 @@ func extractRawTagValue(inner string) string {
|
||||
|
||||
func extractStandaloneCDATA(inner string) (string, bool) {
|
||||
trimmed := strings.TrimSpace(inner)
|
||||
if bodyStart, ok := matchToolCDATAOpenAt(trimmed, 0); ok {
|
||||
end := findStandaloneCDATAEnd(trimmed, bodyStart)
|
||||
if end < 0 {
|
||||
return trimmed[bodyStart:], true
|
||||
if openLen := toolCDATAOpenLenAt(trimmed, 0); openLen > 0 {
|
||||
if closeStart := findTrailingToolCDATACloseStart(trimmed); closeStart >= openLen {
|
||||
return trimmed[openLen:closeStart], true
|
||||
}
|
||||
return trimmed[bodyStart:end], true
|
||||
if end := findToolCDATAEnd(trimmed, openLen); end >= 0 {
|
||||
return trimmed[openLen:end], true
|
||||
}
|
||||
return trimmed[openLen:], true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func findStandaloneCDATAEnd(text string, from int) int {
|
||||
end := -1
|
||||
for searchFrom := from; searchFrom < len(text); {
|
||||
next := indexToolCDATAClose(text, searchFrom)
|
||||
if next < 0 {
|
||||
break
|
||||
}
|
||||
closeEnd := next + toolCDATACloseLenAt(text, next)
|
||||
if strings.TrimSpace(text[closeEnd:]) == "" {
|
||||
end = next
|
||||
}
|
||||
searchFrom = closeEnd
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
func parseJSONLiteralValue(raw string) (any, bool) {
|
||||
trimmed := strings.TrimSpace(raw)
|
||||
if trimmed == "" {
|
||||
@@ -159,24 +145,22 @@ func SanitizeLooseCDATA(text string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
const openMarker = "<![cdata["
|
||||
const closeMarker = "]]>"
|
||||
|
||||
var b strings.Builder
|
||||
b.Grow(len(text))
|
||||
changed := false
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
start := indexASCIIFold(text, pos, openMarker)
|
||||
start := indexToolCDATAOpen(text, pos)
|
||||
if start < 0 {
|
||||
b.WriteString(text[pos:])
|
||||
break
|
||||
}
|
||||
contentStart := start + len(openMarker)
|
||||
openLen := toolCDATAOpenLenAt(text, start)
|
||||
contentStart := start + openLen
|
||||
b.WriteString(text[pos:start])
|
||||
|
||||
if endRel := indexASCIIFold(text, contentStart, closeMarker); endRel >= 0 {
|
||||
end := endRel + len(closeMarker)
|
||||
if endRel := findToolCDATAEnd(text, contentStart); endRel >= 0 {
|
||||
end := endRel + toolCDATACloseLenAt(text, endRel)
|
||||
b.WriteString(text[start:end])
|
||||
pos = end
|
||||
continue
|
||||
|
||||
@@ -53,7 +53,6 @@ func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult {
|
||||
if trimmed == "" {
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed)
|
||||
trimmed = stripFencedCodeBlocks(trimmed)
|
||||
trimmed = strings.TrimSpace(trimmed)
|
||||
if trimmed == "" {
|
||||
@@ -64,8 +63,9 @@ func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult {
|
||||
if !ok {
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(normalized) || hasRepairableXMLToolCallsWrapper(normalized)
|
||||
parsed := parseXMLToolCalls(normalized)
|
||||
if len(parsed) == 0 && strings.Contains(strings.ToLower(normalized), "<![cdata[") {
|
||||
if len(parsed) == 0 && indexToolCDATAOpen(normalized, 0) >= 0 {
|
||||
recovered := SanitizeLooseCDATA(normalized)
|
||||
if recovered != normalized {
|
||||
parsed = parseXMLToolCalls(recovered)
|
||||
@@ -154,7 +154,7 @@ func stripFencedCodeBlocks(text string) string {
|
||||
}
|
||||
|
||||
func cdataStartsBeforeFence(line string) bool {
|
||||
cdataIdx := strings.Index(strings.ToLower(line), "<![cdata[")
|
||||
cdataIdx := indexToolCDATAOpen(line, 0)
|
||||
if cdataIdx < 0 {
|
||||
return false
|
||||
}
|
||||
@@ -183,11 +183,14 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
|
||||
fenceMarker := cdataFenceMarker
|
||||
lineForFence := line
|
||||
if !state {
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
start := indexToolCDATAOpen(line, pos)
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos = start + len("<![cdata[")
|
||||
pos = start + toolCDATAOpenLenAt(line, start)
|
||||
if pos > len(line) {
|
||||
pos = len(line)
|
||||
}
|
||||
state = true
|
||||
lineForFence = line[pos:]
|
||||
}
|
||||
@@ -205,22 +208,36 @@ func updateCDATAStateForStrip(inCDATA bool, cdataFenceMarker, line string) (bool
|
||||
}
|
||||
|
||||
for pos < len(line) {
|
||||
endPos := indexASCIIFold(line, pos, "]]>")
|
||||
endPos := -1
|
||||
closeLen := 0
|
||||
for search := pos; search < len(line); search++ {
|
||||
if foundLen := toolCDATACloseLenAt(line, search); foundLen > 0 {
|
||||
endPos = search
|
||||
closeLen = foundLen
|
||||
break
|
||||
}
|
||||
}
|
||||
if endPos < 0 {
|
||||
return true, fenceMarker
|
||||
}
|
||||
pos = endPos + len("]]>")
|
||||
pos = endPos + closeLen
|
||||
if pos > len(line) {
|
||||
pos = len(line)
|
||||
}
|
||||
if fenceMarker != "" {
|
||||
continue
|
||||
}
|
||||
if cdataEndLooksStructural(line, pos) || strings.TrimSpace(line[pos:]) == "" {
|
||||
state = false
|
||||
for pos < len(line) {
|
||||
start := indexASCIIFold(line, pos, "<![cdata[")
|
||||
start := indexToolCDATAOpen(line, pos)
|
||||
if start < 0 {
|
||||
return false, ""
|
||||
}
|
||||
pos = start + len("<![cdata[")
|
||||
pos = start + toolCDATAOpenLenAt(line, start)
|
||||
if pos > len(line) {
|
||||
pos = len(line)
|
||||
}
|
||||
state = true
|
||||
trimmedTail := strings.TrimLeft(line[pos:], " \t")
|
||||
if marker, ok := parseFenceOpen(trimmedTail); ok {
|
||||
|
||||
@@ -229,27 +229,13 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked
|
||||
}
|
||||
|
||||
func matchToolCDATAOpenAt(text string, start int) (int, bool) {
|
||||
i, ok := consumeToolMarkupLessThan(text, start)
|
||||
if !ok {
|
||||
return start, false
|
||||
}
|
||||
for skipped := 0; skipped <= 4 && i < len(text); skipped++ {
|
||||
if cdataLen, ok := matchASCIIPrefixFoldAt(text, i, "[cdata["); ok {
|
||||
return i + cdataLen, true
|
||||
}
|
||||
r, size := utf8.DecodeRuneInString(text[i:])
|
||||
if size <= 0 || !isToolCDATAOpenSeparator(r) {
|
||||
break
|
||||
}
|
||||
i += size
|
||||
openLen := toolCDATAOpenLenAt(text, start)
|
||||
if openLen > 0 {
|
||||
return start + openLen, true
|
||||
}
|
||||
return start, false
|
||||
}
|
||||
|
||||
func isToolCDATAOpenSeparator(r rune) bool {
|
||||
return isToolMarkupSeparator(r)
|
||||
}
|
||||
|
||||
func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool {
|
||||
_, ok := matchASCIIPrefixFoldAt(text, start, prefix)
|
||||
return ok
|
||||
@@ -280,23 +266,6 @@ func asciiLower(b byte) byte {
|
||||
return b
|
||||
}
|
||||
|
||||
// indexASCIIFold returns the absolute byte position in s where substr (ASCII-only) is
|
||||
// found case-insensitively, scanning forward from start. Returns -1 if not found.
|
||||
// Unlike strings.Index on a lowercased copy, this does not allocate or risk byte-length
|
||||
// mismatch when non-ASCII runes change width under case folding.
|
||||
func indexASCIIFold(s string, start int, substr string) int {
|
||||
if start < 0 || len(s)-start < len(substr) {
|
||||
return -1
|
||||
}
|
||||
end := len(s) - len(substr) + 1
|
||||
for i := start; i < end; i++ {
|
||||
if hasASCIIPrefixFoldAt(s, i, substr) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func findToolCDATAEnd(text string, from int) int {
|
||||
if from < 0 || from >= len(text) {
|
||||
return -1
|
||||
@@ -342,13 +311,19 @@ func indexToolCDATAClose(text string, from int) int {
|
||||
}
|
||||
|
||||
func toolCDATACloseLenAt(text string, idx int) int {
|
||||
if idx < 0 || idx >= len(text) {
|
||||
return 0
|
||||
}
|
||||
if strings.HasPrefix(text[idx:], "]]〉") {
|
||||
return len("]]〉")
|
||||
}
|
||||
if strings.HasPrefix(text[idx:], "]]>") {
|
||||
return len("]]>")
|
||||
}
|
||||
return len("]]>")
|
||||
if strings.HasPrefix(text[idx:], "]]>") {
|
||||
return len("]]>")
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func cdataEndLooksStructural(text string, after int) bool {
|
||||
|
||||
@@ -2,6 +2,7 @@ package toolcall
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
@@ -148,9 +149,9 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) {
|
||||
i = next
|
||||
}
|
||||
closing := false
|
||||
if i < len(text) && text[i] == '/' {
|
||||
if next, ok := consumeToolMarkupClosingSlash(text, i); ok {
|
||||
closing = true
|
||||
i++
|
||||
i = next
|
||||
}
|
||||
prefixStart := i
|
||||
i, dsmlLike := consumeToolMarkupNamePrefix(text, i)
|
||||
@@ -252,17 +253,18 @@ func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) {
|
||||
}
|
||||
|
||||
func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if next, ok := consumeToolMarkupSeparator(text, idx); ok {
|
||||
return next, true
|
||||
}
|
||||
if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') {
|
||||
return idx + 1, true
|
||||
if spacingLen := toolMarkupWhitespaceLikeLenAt(text, idx); spacingLen > 0 {
|
||||
return idx + spacingLen, true
|
||||
}
|
||||
if hasASCIIPrefixFoldAt(text, idx, "dsml") {
|
||||
dsmlLen, _ := matchASCIIPrefixFoldAt(text, idx, "dsml")
|
||||
next := idx + dsmlLen
|
||||
if sep, size := normalizedASCIIAt(text, next); sep == '-' || sep == '_' {
|
||||
next += size
|
||||
if next, ok := consumeToolKeyword(text, idx, "dsml"); ok {
|
||||
if dashLen := toolMarkupDashLenAt(text, next); dashLen > 0 {
|
||||
next += dashLen
|
||||
} else if underscoreLen := toolMarkupUnderscoreLenAt(text, next); underscoreLen > 0 {
|
||||
next += underscoreLen
|
||||
}
|
||||
return next, true
|
||||
}
|
||||
@@ -353,8 +355,8 @@ func matchToolMarkupName(text string, start int, dsmlLike bool) (string, int) {
|
||||
if name.dsmlOnly && !dsmlLike {
|
||||
continue
|
||||
}
|
||||
if nameLen, ok := matchASCIIPrefixFoldAt(text, start, name.raw); ok {
|
||||
return name.canonical, nameLen
|
||||
if next, ok := consumeToolKeyword(text, start, name.raw); ok {
|
||||
return name.canonical, next - start
|
||||
}
|
||||
}
|
||||
return "", 0
|
||||
@@ -366,14 +368,14 @@ func matchToolMarkupNameAfterArbitraryPrefix(text string, start int) (string, in
|
||||
return "", 0, 0, false
|
||||
}
|
||||
for _, name := range toolMarkupNames {
|
||||
nameLen, ok := matchASCIIPrefixFoldAt(text, idx, name.raw)
|
||||
next, ok := consumeToolKeyword(text, idx, name.raw)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if !toolMarkupPrefixAllowsLocalNameAt(text, start, idx) {
|
||||
continue
|
||||
}
|
||||
return name.canonical, idx, nameLen, true
|
||||
return name.canonical, idx, next - idx, true
|
||||
}
|
||||
_, size := utf8.DecodeRuneInString(text[idx:])
|
||||
if size <= 0 {
|
||||
@@ -477,6 +479,7 @@ func isToolMarkupTagTerminator(text string, idx int) bool {
|
||||
}
|
||||
|
||||
func consumeToolMarkupSeparator(text string, idx int) (int, bool) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx >= len(text) {
|
||||
return idx, false
|
||||
}
|
||||
@@ -495,6 +498,9 @@ func isToolMarkupSeparator(r rune) bool {
|
||||
if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
|
||||
return false
|
||||
}
|
||||
if r == '▁' || unicode.IsSpace(r) {
|
||||
return false
|
||||
}
|
||||
if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') {
|
||||
return false
|
||||
}
|
||||
@@ -502,6 +508,7 @@ func isToolMarkupSeparator(r rune) bool {
|
||||
}
|
||||
|
||||
func consumeToolMarkupLessThan(text string, idx int) (int, bool) {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
ch, size := normalizedASCIIAt(text, idx)
|
||||
if size <= 0 || ch != '<' {
|
||||
return idx, false
|
||||
@@ -510,16 +517,17 @@ func consumeToolMarkupLessThan(text string, idx int) (int, bool) {
|
||||
}
|
||||
|
||||
func hasToolMarkupBoundary(text string, idx int) bool {
|
||||
idx = skipToolMarkupIgnorables(text, idx)
|
||||
if idx >= len(text) {
|
||||
return true
|
||||
}
|
||||
switch text[idx] {
|
||||
case ' ', '\t', '\n', '\r', '>', '/':
|
||||
if toolMarkupWhitespaceLikeLenAt(text, idx) > 0 {
|
||||
return true
|
||||
default:
|
||||
r, _ := utf8.DecodeRuneInString(text[idx:])
|
||||
return normalizeFullwidthASCII(r) == '>'
|
||||
}
|
||||
if _, ok := consumeToolMarkupClosingSlash(text, idx); ok {
|
||||
return true
|
||||
}
|
||||
return xmlTagEndDelimiterLenAt(text, idx) > 0
|
||||
}
|
||||
|
||||
func normalizedASCIIAt(text string, idx int) (byte, int) {
|
||||
|
||||
@@ -1201,3 +1201,108 @@ func TestFindMatchingToolMarkupCloseBoundaryConditions(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithFullwidthClosingSlash(t *testing.T) {
|
||||
text := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with fullwidth closing slash, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "execute_code" || calls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected fullwidth-closing-slash DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithSentencePieceSeparatorAndFullwidthGT(t *testing.T) {
|
||||
text := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with sentencepiece separator and fullwidth terminator, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "execute_code" || calls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected sentencepiece/fullwidth-terminator DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsDSMLShellWithFullwidthLTUnicodeSpaceAndFullwidthAttributes(t *testing.T) {
|
||||
text := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 DSML call with fullwidth opening delimiter and Unicode attribute confusables, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "execute_code" || calls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected fullwidth-opening/Unicode-attr DSML parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsCanonicalizesConfusableCandidateShellOnly(t *testing.T) {
|
||||
text := "<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls>" +
|
||||
"<|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d>" +
|
||||
"<|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print(\"hi\")]]>" +
|
||||
"</|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls>"
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one confusable-shell call, got %#v", calls)
|
||||
}
|
||||
if calls[0].Name != "execute_code" || calls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected confusable-shell parse result: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsKeepsConfusableMarkupInsideCDATAAsText(t *testing.T) {
|
||||
value := "<inv\u03bfke>literal</inv\u03bfke>"
|
||||
text := "<tool_calls><invoke name=\"Write\"><parameter name=\"description\"><![\u200b\u0421D\u0410T\u0410[" + value + "]]></parameter></invoke></tool_calls>"
|
||||
calls := ParseToolCalls(text, []string{"Write"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected one Write call, got %#v", calls)
|
||||
}
|
||||
if got, _ := calls[0].Input["description"].(string); got != value {
|
||||
t.Fatalf("expected confusable markup example inside CDATA to stay raw, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsRepairsMissingOpeningWrapperWithConfusableShell(t *testing.T) {
|
||||
text := "Before tool call\n" +
|
||||
"<inv\u03bfk\u0435 n\u0430me=\"read_file\"><par\u0430meter n\u0430me=\"path\"><![\u200b\u0421D\u0410T\u0410[README.md]]></par\u0430meter></inv\u03bfk\u0435>\n" +
|
||||
"</to\u03bfl_calls>\n" +
|
||||
"after"
|
||||
res := ParseToolCallsDetailed(text, []string{"read_file"})
|
||||
if len(res.Calls) != 1 {
|
||||
t.Fatalf("expected repaired confusable wrapper to parse one call, got %#v", res)
|
||||
}
|
||||
if got, _ := res.Calls[0].Input["path"].(string); got != "README.md" {
|
||||
t.Fatalf("expected repaired confusable wrapper to preserve args, got %#v", res.Calls[0].Input)
|
||||
}
|
||||
if !res.SawToolCallSyntax {
|
||||
t.Fatalf("expected repaired confusable wrapper to mark tool syntax seen, got %#v", res)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsDoesNotAcceptConfusableNearMissTagName(t *testing.T) {
|
||||
text := "<tool_calls><inv\u03bfker name=\"execute_code\"><parameter name=\"code\">pwd</parameter></inv\u03bfker></tool_calls>"
|
||||
calls := ParseToolCalls(text, []string{"execute_code"})
|
||||
if len(calls) != 0 {
|
||||
t.Fatalf("expected confusable near-miss tag name to remain non-executable, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindMatchingToolMarkupCloseBoundaryConditionsSupportsConfusableDelimiters(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
text string
|
||||
open ToolMarkupTag
|
||||
wantOk bool
|
||||
}{
|
||||
{"valid_fullwidth_closing_slash", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: 11}, true},
|
||||
{"valid_fullwidth_opening_delimiter", "<tool_calls></tool_calls>", ToolMarkupTag{Name: "tool_calls", End: len("<tool_calls>") - 1}, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, ok := FindMatchingToolMarkupClose(tt.text, tt.open)
|
||||
if ok != tt.wantOk {
|
||||
t.Errorf("FindMatchingToolMarkupClose(%q, %+v) ok = %v, want %v", tt.text, tt.open, ok, tt.wantOk)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,6 +141,9 @@ func shouldKeepBareInvokeCapture(captured string) bool {
|
||||
if invokeCloseTag, ok := findFirstToolMarkupTagByNameFrom(captured, startEnd+1, "invoke", true); ok {
|
||||
return strings.TrimSpace(captured[invokeCloseTag.End+1:]) == ""
|
||||
}
|
||||
if paramTag, ok := findFirstToolMarkupTagByName(body, 0, "parameter"); ok && strings.TrimSpace(body[:paramTag.Start]) == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
trimmedLower := strings.ToLower(trimmedBody)
|
||||
return strings.HasPrefix(trimmedLower, "<parameter") ||
|
||||
@@ -149,14 +152,14 @@ func shouldKeepBareInvokeCapture(captured string) bool {
|
||||
}
|
||||
|
||||
func findPartialXMLToolTagStart(s string) int {
|
||||
lastLT := strings.LastIndex(s, "<")
|
||||
lastLT := lastToolMarkupStartDelimiterIndex(s)
|
||||
if lastLT < 0 {
|
||||
return -1
|
||||
}
|
||||
start := includeDuplicateLeadingLessThan(s, lastLT)
|
||||
tail := s[start:]
|
||||
// If there's a '>' in the tail, the tag is closed — not partial.
|
||||
if strings.Contains(tail, ">") {
|
||||
// If there's a tag terminator in the tail, the tag is closed — not partial.
|
||||
if strings.Contains(tail, ">") || strings.Contains(tail, ">") {
|
||||
return -1
|
||||
}
|
||||
if toolcall.IsPartialToolMarkupTagPrefix(tail) {
|
||||
@@ -164,3 +167,12 @@ func findPartialXMLToolTagStart(s string) int {
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func lastToolMarkupStartDelimiterIndex(s string) int {
|
||||
asciiIdx := strings.LastIndex(s, "<")
|
||||
fullwidthIdx := strings.LastIndex(s, "<")
|
||||
if asciiIdx > fullwidthIdx {
|
||||
return asciiIdx
|
||||
}
|
||||
return fullwidthIdx
|
||||
}
|
||||
|
||||
@@ -1335,3 +1335,166 @@ func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) {
|
||||
t.Fatalf("unexpected ideographic-comma DSML drift call: %#v", calls[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthClosingSlashAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML|tool_calls><|DSML|invoke name="execute_code"><|DSML|parameter name="code"><![CDATA[print("hi")]]></|DSML|parameter></|DSML|invoke></DSML|tool_calls> sao cụm này lại đc trả là 1 message`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from fullwidth closing slash block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from fullwidth closing slash block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " sao cụm này lại đc trả là 1 message" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesSentencePieceSeparatorAndFullwidthTerminator(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML▁tool_calls|><|DSML▁invoke▁name="execute_code"><|DSML▁parameter▁name="code"><![CDATA[print("hi")]]></|DSML▁parameter></|DSML▁invoke></|DSML▁tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from sentencepiece/fullwidth-terminator block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from sentencepiece/fullwidth-terminator block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesFullwidthOpeningDelimiterAndUnicodeAttributes(t *testing.T) {
|
||||
var state State
|
||||
chunk := `<|DSML tool_calls><|DSML invoke name=“execute_code”><|DSML parameter name=“code”><![CDATA[print("hi")]]></DSML|parameter></DSML|invoke></DSML|tool_calls> suffix`
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from fullwidth-opening/Unicode-attr block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from fullwidth-opening/Unicode-attr block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveParsesConfusableCandidateShellAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunk := "<|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls><|\ufeffDSML|inv\u03bfk\u0435 n\u0430me\uff1d\u201cexecute_code\u201d><|\u200bDSML|par\u0430meter n\u0430me\uff1d\u201ccode\u201d><![\ufeff\u0421D\u0410T\u0410[print(\"hi\")]]></|\u200bDSML|par\u0430meter></|\u200bDSML|inv\u03bfk\u0435></|\u200b\uff24\u0405\u039cL|to\u03bfl\uff3fcalls> suffix"
|
||||
events := ProcessChunk(&state, chunk, []string{"execute_code"})
|
||||
events = append(events, Flush(&state, []string{"execute_code"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected exactly one parsed tool call from confusable-shell block, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "execute_code" || parsed.ToolCalls[0].Input["code"] != `print("hi")` {
|
||||
t.Fatalf("unexpected parsed call from confusable-shell block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " suffix" {
|
||||
t.Fatalf("expected suffix text to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveRepairsConfusableMissingWrapperAndKeepsSuffixText(t *testing.T) {
|
||||
var state State
|
||||
chunks := []string{
|
||||
"<inv\u03bfk\u0435 n\u0430me=\"read_file\">\n",
|
||||
" <par\u0430meter n\u0430me=\"path\"><![\u200b\u0421D\u0410T\u0410[README.md]]></par\u0430meter>\n",
|
||||
"</inv\u03bfk\u0435>\n",
|
||||
"</to\u03bfl_calls> trailing prose",
|
||||
}
|
||||
var events []Event
|
||||
for _, c := range chunks {
|
||||
events = append(events, ProcessChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
var parsed Event
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
if len(evt.ToolCalls) > 0 {
|
||||
parsed = evt
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 1 {
|
||||
t.Fatalf("expected repaired confusable missing-wrapper stream to emit one tool call, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if parsed.ToolCalls[0].Name != "read_file" || parsed.ToolCalls[0].Input["path"] != "README.md" {
|
||||
t.Fatalf("unexpected parsed call from repaired confusable missing-wrapper block: %#v", parsed.ToolCalls[0])
|
||||
}
|
||||
if got := textContent.String(); got != " trailing prose" {
|
||||
t.Fatalf("expected suffix prose to be preserved, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveKeepsConfusableNearMissWrapperAsText(t *testing.T) {
|
||||
var state State
|
||||
chunk := "<to\u03bfl_callz><inv\u03bfke name=\"read_file\"><parameter name=\"path\">README.md</parameter></inv\u03bfke></to\u03bfl_callz>"
|
||||
events := ProcessChunk(&state, chunk, []string{"read_file"})
|
||||
events = append(events, Flush(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent strings.Builder
|
||||
toolCalls := 0
|
||||
for _, evt := range events {
|
||||
textContent.WriteString(evt.Content)
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
if toolCalls != 0 {
|
||||
t.Fatalf("expected confusable near-miss wrapper to remain text, got %d events=%#v", toolCalls, events)
|
||||
}
|
||||
if got := textContent.String(); got != chunk {
|
||||
t.Fatalf("expected confusable near-miss wrapper to pass through unchanged, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user