工具优化

This commit is contained in:
CJACK
2026-04-26 09:44:59 +08:00
parent 0bfddf7943
commit 0fb1bc6611
9 changed files with 631 additions and 48 deletions

View File

@@ -43,6 +43,9 @@ func parseMarkupKVObject(text string) map[string]any {
}
func parseMarkupValue(inner string) any {
if value, ok := extractStandaloneCDATA(inner); ok {
return value
}
value := strings.TrimSpace(extractRawTagValue(inner))
if value == "" {
return ""
@@ -89,8 +92,8 @@ func extractRawTagValue(inner string) string {
}
// 1. Check for CDATA - if present, it's the ultimate "safe" container.
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
return cdataMatches[1] // Return raw content between CDATA brackets
if value, ok := extractStandaloneCDATA(trimmed); ok {
return value // Return raw content between CDATA brackets
}
// 2. If no CDATA, we still want to be robust.
@@ -102,3 +105,11 @@ func extractRawTagValue(inner string) string {
// but for KV objects we usually want the value.
return html.UnescapeString(inner)
}
func extractStandaloneCDATA(inner string) (string, bool) {
trimmed := strings.TrimSpace(inner)
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
return cdataMatches[1], true
}
return "", false
}

View File

@@ -87,7 +87,13 @@ func stripFencedCodeBlocks(text string) string {
lines := strings.SplitAfter(text, "\n")
inFence := false
fenceMarker := ""
inCDATA := false
for _, line := range lines {
if inCDATA || cdataStartsBeforeFence(line) {
b.WriteString(line)
inCDATA = updateCDATAState(inCDATA, line)
continue
}
trimmed := strings.TrimLeft(line, " \t")
if !inFence {
if marker, ok := parseFenceOpen(trimmed); ok {
@@ -111,6 +117,54 @@ func stripFencedCodeBlocks(text string) string {
return b.String()
}
func cdataStartsBeforeFence(line string) bool {
cdataIdx := strings.Index(strings.ToLower(line), "<![cdata[")
if cdataIdx < 0 {
return false
}
fenceIdx := firstFenceMarkerIndex(line)
return fenceIdx < 0 || cdataIdx < fenceIdx
}
func firstFenceMarkerIndex(line string) int {
idxBacktick := strings.Index(line, "```")
idxTilde := strings.Index(line, "~~~")
switch {
case idxBacktick < 0:
return idxTilde
case idxTilde < 0:
return idxBacktick
case idxBacktick < idxTilde:
return idxBacktick
default:
return idxTilde
}
}
func updateCDATAState(inCDATA bool, line string) bool {
lower := strings.ToLower(line)
pos := 0
state := inCDATA
for pos < len(lower) {
if state {
end := strings.Index(lower[pos:], "]]>")
if end < 0 {
return true
}
pos += end + len("]]>")
state = false
continue
}
start := strings.Index(lower[pos:], "<![cdata[")
if start < 0 {
return false
}
pos += start + len("<![cdata[")
state = true
}
return state
}
func parseFenceOpen(line string) (string, bool) {
if len(line) < 3 {
return "", false

View File

@@ -7,19 +7,16 @@ import (
"strings"
)
var xmlToolCallsWrapperPattern = regexp.MustCompile(`(?is)<tool_calls\b[^>]*>\s*(.*?)\s*</tool_calls>`)
var xmlInvokePattern = regexp.MustCompile(`(?is)<invoke\b([^>]*)>\s*(.*?)\s*</invoke>`)
var xmlParameterPattern = regexp.MustCompile(`(?is)<parameter\b([^>]*)>\s*(.*?)\s*</parameter>`)
var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
var xmlToolCallsClosePattern = regexp.MustCompile(`(?is)</tool_calls>`)
var xmlInvokeStartPattern = regexp.MustCompile(`(?is)<invoke\b[^>]*\bname\s*=\s*("([^"]*)"|'([^']*)')`)
func parseXMLToolCalls(text string) []ParsedToolCall {
wrappers := xmlToolCallsWrapperPattern.FindAllStringSubmatch(text, -1)
wrappers := findXMLElementBlocks(text, "tool_calls")
if len(wrappers) == 0 {
repaired := repairMissingXMLToolCallsOpeningWrapper(text)
if repaired != text {
wrappers = xmlToolCallsWrapperPattern.FindAllStringSubmatch(repaired, -1)
wrappers = findXMLElementBlocks(repaired, "tool_calls")
}
}
if len(wrappers) == 0 {
@@ -27,10 +24,7 @@ func parseXMLToolCalls(text string) []ParsedToolCall {
}
out := make([]ParsedToolCall, 0, len(wrappers))
for _, wrapper := range wrappers {
if len(wrapper) < 2 {
continue
}
for _, block := range xmlInvokePattern.FindAllStringSubmatch(wrapper[1], -1) {
for _, block := range findXMLElementBlocks(wrapper.Body, "invoke") {
call, ok := parseSingleXMLToolCall(block)
if !ok {
continue
@@ -66,17 +60,14 @@ func repairMissingXMLToolCallsOpeningWrapper(text string) string {
return text[:invokeLoc[0]] + "<tool_calls>" + text[invokeLoc[0]:closeLoc[0]] + "</tool_calls>" + text[closeLoc[1]:]
}
func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
if len(block) < 3 {
return ParsedToolCall{}, false
}
attrs := parseXMLTagAttributes(block[1])
func parseSingleXMLToolCall(block xmlElementBlock) (ParsedToolCall, bool) {
attrs := parseXMLTagAttributes(block.Attrs)
name := strings.TrimSpace(html.UnescapeString(attrs["name"]))
if name == "" {
return ParsedToolCall{}, false
}
inner := strings.TrimSpace(block[2])
inner := strings.TrimSpace(block.Body)
if strings.HasPrefix(inner, "{") {
var payload map[string]any
if err := json.Unmarshal([]byte(inner), &payload); err == nil {
@@ -94,16 +85,13 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
}
input := map[string]any{}
for _, paramMatch := range xmlParameterPattern.FindAllStringSubmatch(inner, -1) {
if len(paramMatch) < 3 {
continue
}
paramAttrs := parseXMLTagAttributes(paramMatch[1])
for _, paramMatch := range findXMLElementBlocks(inner, "parameter") {
paramAttrs := parseXMLTagAttributes(paramMatch.Attrs)
paramName := strings.TrimSpace(html.UnescapeString(paramAttrs["name"]))
if paramName == "" {
continue
}
value := parseInvokeParameterValue(paramMatch[2])
value := parseInvokeParameterValue(paramMatch.Body)
appendMarkupValue(input, paramName, value)
}
@@ -116,6 +104,168 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
return ParsedToolCall{Name: name, Input: input}, true
}
type xmlElementBlock struct {
Attrs string
Body string
Start int
End int
}
func findXMLElementBlocks(text, tag string) []xmlElementBlock {
if text == "" || tag == "" {
return nil
}
var out []xmlElementBlock
pos := 0
for pos < len(text) {
start, bodyStart, attrs, ok := findXMLStartTagOutsideCDATA(text, tag, pos)
if !ok {
break
}
closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart)
if !ok {
break
}
out = append(out, xmlElementBlock{
Attrs: attrs,
Body: text[bodyStart:closeStart],
Start: start,
End: closeEnd,
})
pos = closeEnd
}
return out
}
func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) {
lower := strings.ToLower(text)
target := "<" + strings.ToLower(tag)
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
if blocked {
return -1, -1, "", false
}
if advanced {
i = next
continue
}
if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) {
end := findXMLTagEnd(text, i+len(target))
if end < 0 {
return -1, -1, "", false
}
return i, end + 1, text[i+len(target) : end], true
}
i++
}
return -1, -1, "", false
}
func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) {
lower := strings.ToLower(text)
openTarget := "<" + strings.ToLower(tag)
closeTarget := "</" + strings.ToLower(tag)
depth := 1
for i := maxInt(from, 0); i < len(text); {
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
if blocked {
return -1, -1, false
}
if advanced {
i = next
continue
}
if strings.HasPrefix(lower[i:], closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
end := findXMLTagEnd(text, i+len(closeTarget))
if end < 0 {
return -1, -1, false
}
depth--
if depth == 0 {
return i, end + 1, true
}
i = end + 1
continue
}
if strings.HasPrefix(lower[i:], openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
end := findXMLTagEnd(text, i+len(openTarget))
if end < 0 {
return -1, -1, false
}
if !isSelfClosingXMLTag(text[:end]) {
depth++
}
i = end + 1
continue
}
i++
}
return -1, -1, false
}
func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocked bool) {
switch {
case strings.HasPrefix(lower[i:], "<![cdata["):
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
if end < 0 {
return 0, false, true
}
return i + len("<![cdata[") + end + len("]]>"), true, false
case strings.HasPrefix(lower[i:], "<!--"):
end := strings.Index(lower[i+len("<!--"):], "-->")
if end < 0 {
return 0, false, true
}
return i + len("<!--") + end + len("-->"), true, false
default:
return i, false, false
}
}
func findXMLTagEnd(text string, from int) int {
quote := byte(0)
for i := maxInt(from, 0); i < len(text); i++ {
ch := text[i]
if quote != 0 {
if ch == quote {
quote = 0
}
continue
}
if ch == '"' || ch == '\'' {
quote = ch
continue
}
if ch == '>' {
return i
}
}
return -1
}
func hasXMLTagBoundary(text string, idx int) bool {
if idx >= len(text) {
return true
}
switch text[idx] {
case ' ', '\t', '\n', '\r', '>', '/':
return true
default:
return false
}
}
func isSelfClosingXMLTag(startTag string) bool {
return strings.HasSuffix(strings.TrimSpace(startTag), "/")
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func parseXMLTagAttributes(raw string) map[string]string {
if strings.TrimSpace(raw) == "" {
return map[string]string{}
@@ -143,6 +293,9 @@ func parseInvokeParameterValue(raw string) any {
if trimmed == "" {
return ""
}
if value, ok := extractStandaloneCDATA(trimmed); ok {
return value
}
if parsed := parseStructuredToolCallInput(trimmed); len(parsed) > 0 {
if len(parsed) == 1 {
if rawValue, ok := parsed["_raw"].(string); ok {

View File

@@ -54,6 +54,32 @@ echo "hello"
}
}
func TestParseToolCallsKeepsToolSyntaxInsideCDATAAsParameterText(t *testing.T) {
payload := strings.Join([]string{
"# Release notes",
"",
"```xml",
"<tool_calls>",
" <invoke name=\"demo\">",
" <parameter name=\"value\">x</parameter>",
" </invoke>",
"</tool_calls>",
"```",
}, "\n")
text := `<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[` + payload + `]]></parameter><parameter name="file_path">DS2API-4.0-Release-Notes.md</parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"Write"})
if len(calls) != 1 {
t.Fatalf("expected 1 call, got %#v", calls)
}
content, _ := calls[0].Input["content"].(string)
if content != payload {
t.Fatalf("expected CDATA payload with nested tool syntax to survive intact, got %q", content)
}
if calls[0].Input["file_path"] != "DS2API-4.0-Release-Notes.md" {
t.Fatalf("expected file_path parameter, got %#v", calls[0].Input)
}
}
func TestParseToolCallsSupportsInvokeParameters(t *testing.T) {
text := `<tool_calls><invoke name="get_weather"><parameter name="city">beijing</parameter><parameter name="unit">c</parameter></invoke></tool_calls>`
calls := ParseToolCalls(text, []string{"get_weather"})