mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-22 08:57:42 +08:00
工具优化
This commit is contained in:
@@ -43,6 +43,9 @@ func parseMarkupKVObject(text string) map[string]any {
|
||||
}
|
||||
|
||||
func parseMarkupValue(inner string) any {
|
||||
if value, ok := extractStandaloneCDATA(inner); ok {
|
||||
return value
|
||||
}
|
||||
value := strings.TrimSpace(extractRawTagValue(inner))
|
||||
if value == "" {
|
||||
return ""
|
||||
@@ -89,8 +92,8 @@ func extractRawTagValue(inner string) string {
|
||||
}
|
||||
|
||||
// 1. Check for CDATA - if present, it's the ultimate "safe" container.
|
||||
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
|
||||
return cdataMatches[1] // Return raw content between CDATA brackets
|
||||
if value, ok := extractStandaloneCDATA(trimmed); ok {
|
||||
return value // Return raw content between CDATA brackets
|
||||
}
|
||||
|
||||
// 2. If no CDATA, we still want to be robust.
|
||||
@@ -102,3 +105,11 @@ func extractRawTagValue(inner string) string {
|
||||
// but for KV objects we usually want the value.
|
||||
return html.UnescapeString(inner)
|
||||
}
|
||||
|
||||
func extractStandaloneCDATA(inner string) (string, bool) {
|
||||
trimmed := strings.TrimSpace(inner)
|
||||
if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 {
|
||||
return cdataMatches[1], true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
@@ -87,7 +87,13 @@ func stripFencedCodeBlocks(text string) string {
|
||||
lines := strings.SplitAfter(text, "\n")
|
||||
inFence := false
|
||||
fenceMarker := ""
|
||||
inCDATA := false
|
||||
for _, line := range lines {
|
||||
if inCDATA || cdataStartsBeforeFence(line) {
|
||||
b.WriteString(line)
|
||||
inCDATA = updateCDATAState(inCDATA, line)
|
||||
continue
|
||||
}
|
||||
trimmed := strings.TrimLeft(line, " \t")
|
||||
if !inFence {
|
||||
if marker, ok := parseFenceOpen(trimmed); ok {
|
||||
@@ -111,6 +117,54 @@ func stripFencedCodeBlocks(text string) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func cdataStartsBeforeFence(line string) bool {
|
||||
cdataIdx := strings.Index(strings.ToLower(line), "<![cdata[")
|
||||
if cdataIdx < 0 {
|
||||
return false
|
||||
}
|
||||
fenceIdx := firstFenceMarkerIndex(line)
|
||||
return fenceIdx < 0 || cdataIdx < fenceIdx
|
||||
}
|
||||
|
||||
func firstFenceMarkerIndex(line string) int {
|
||||
idxBacktick := strings.Index(line, "```")
|
||||
idxTilde := strings.Index(line, "~~~")
|
||||
switch {
|
||||
case idxBacktick < 0:
|
||||
return idxTilde
|
||||
case idxTilde < 0:
|
||||
return idxBacktick
|
||||
case idxBacktick < idxTilde:
|
||||
return idxBacktick
|
||||
default:
|
||||
return idxTilde
|
||||
}
|
||||
}
|
||||
|
||||
func updateCDATAState(inCDATA bool, line string) bool {
|
||||
lower := strings.ToLower(line)
|
||||
pos := 0
|
||||
state := inCDATA
|
||||
for pos < len(lower) {
|
||||
if state {
|
||||
end := strings.Index(lower[pos:], "]]>")
|
||||
if end < 0 {
|
||||
return true
|
||||
}
|
||||
pos += end + len("]]>")
|
||||
state = false
|
||||
continue
|
||||
}
|
||||
start := strings.Index(lower[pos:], "<![cdata[")
|
||||
if start < 0 {
|
||||
return false
|
||||
}
|
||||
pos += start + len("<![cdata[")
|
||||
state = true
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
func parseFenceOpen(line string) (string, bool) {
|
||||
if len(line) < 3 {
|
||||
return "", false
|
||||
|
||||
@@ -7,19 +7,16 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
var xmlToolCallsWrapperPattern = regexp.MustCompile(`(?is)<tool_calls\b[^>]*>\s*(.*?)\s*</tool_calls>`)
|
||||
var xmlInvokePattern = regexp.MustCompile(`(?is)<invoke\b([^>]*)>\s*(.*?)\s*</invoke>`)
|
||||
var xmlParameterPattern = regexp.MustCompile(`(?is)<parameter\b([^>]*)>\s*(.*?)\s*</parameter>`)
|
||||
var xmlAttrPattern = regexp.MustCompile(`(?is)\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')`)
|
||||
var xmlToolCallsClosePattern = regexp.MustCompile(`(?is)</tool_calls>`)
|
||||
var xmlInvokeStartPattern = regexp.MustCompile(`(?is)<invoke\b[^>]*\bname\s*=\s*("([^"]*)"|'([^']*)')`)
|
||||
|
||||
func parseXMLToolCalls(text string) []ParsedToolCall {
|
||||
wrappers := xmlToolCallsWrapperPattern.FindAllStringSubmatch(text, -1)
|
||||
wrappers := findXMLElementBlocks(text, "tool_calls")
|
||||
if len(wrappers) == 0 {
|
||||
repaired := repairMissingXMLToolCallsOpeningWrapper(text)
|
||||
if repaired != text {
|
||||
wrappers = xmlToolCallsWrapperPattern.FindAllStringSubmatch(repaired, -1)
|
||||
wrappers = findXMLElementBlocks(repaired, "tool_calls")
|
||||
}
|
||||
}
|
||||
if len(wrappers) == 0 {
|
||||
@@ -27,10 +24,7 @@ func parseXMLToolCalls(text string) []ParsedToolCall {
|
||||
}
|
||||
out := make([]ParsedToolCall, 0, len(wrappers))
|
||||
for _, wrapper := range wrappers {
|
||||
if len(wrapper) < 2 {
|
||||
continue
|
||||
}
|
||||
for _, block := range xmlInvokePattern.FindAllStringSubmatch(wrapper[1], -1) {
|
||||
for _, block := range findXMLElementBlocks(wrapper.Body, "invoke") {
|
||||
call, ok := parseSingleXMLToolCall(block)
|
||||
if !ok {
|
||||
continue
|
||||
@@ -66,17 +60,14 @@ func repairMissingXMLToolCallsOpeningWrapper(text string) string {
|
||||
return text[:invokeLoc[0]] + "<tool_calls>" + text[invokeLoc[0]:closeLoc[0]] + "</tool_calls>" + text[closeLoc[1]:]
|
||||
}
|
||||
|
||||
func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
|
||||
if len(block) < 3 {
|
||||
return ParsedToolCall{}, false
|
||||
}
|
||||
attrs := parseXMLTagAttributes(block[1])
|
||||
func parseSingleXMLToolCall(block xmlElementBlock) (ParsedToolCall, bool) {
|
||||
attrs := parseXMLTagAttributes(block.Attrs)
|
||||
name := strings.TrimSpace(html.UnescapeString(attrs["name"]))
|
||||
if name == "" {
|
||||
return ParsedToolCall{}, false
|
||||
}
|
||||
|
||||
inner := strings.TrimSpace(block[2])
|
||||
inner := strings.TrimSpace(block.Body)
|
||||
if strings.HasPrefix(inner, "{") {
|
||||
var payload map[string]any
|
||||
if err := json.Unmarshal([]byte(inner), &payload); err == nil {
|
||||
@@ -94,16 +85,13 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
|
||||
}
|
||||
|
||||
input := map[string]any{}
|
||||
for _, paramMatch := range xmlParameterPattern.FindAllStringSubmatch(inner, -1) {
|
||||
if len(paramMatch) < 3 {
|
||||
continue
|
||||
}
|
||||
paramAttrs := parseXMLTagAttributes(paramMatch[1])
|
||||
for _, paramMatch := range findXMLElementBlocks(inner, "parameter") {
|
||||
paramAttrs := parseXMLTagAttributes(paramMatch.Attrs)
|
||||
paramName := strings.TrimSpace(html.UnescapeString(paramAttrs["name"]))
|
||||
if paramName == "" {
|
||||
continue
|
||||
}
|
||||
value := parseInvokeParameterValue(paramMatch[2])
|
||||
value := parseInvokeParameterValue(paramMatch.Body)
|
||||
appendMarkupValue(input, paramName, value)
|
||||
}
|
||||
|
||||
@@ -116,6 +104,168 @@ func parseSingleXMLToolCall(block []string) (ParsedToolCall, bool) {
|
||||
return ParsedToolCall{Name: name, Input: input}, true
|
||||
}
|
||||
|
||||
type xmlElementBlock struct {
|
||||
Attrs string
|
||||
Body string
|
||||
Start int
|
||||
End int
|
||||
}
|
||||
|
||||
func findXMLElementBlocks(text, tag string) []xmlElementBlock {
|
||||
if text == "" || tag == "" {
|
||||
return nil
|
||||
}
|
||||
var out []xmlElementBlock
|
||||
pos := 0
|
||||
for pos < len(text) {
|
||||
start, bodyStart, attrs, ok := findXMLStartTagOutsideCDATA(text, tag, pos)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
closeStart, closeEnd, ok := findMatchingXMLEndTagOutsideCDATA(text, tag, bodyStart)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
out = append(out, xmlElementBlock{
|
||||
Attrs: attrs,
|
||||
Body: text[bodyStart:closeStart],
|
||||
Start: start,
|
||||
End: closeEnd,
|
||||
})
|
||||
pos = closeEnd
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func findXMLStartTagOutsideCDATA(text, tag string, from int) (start, bodyStart int, attrs string, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
target := "<" + strings.ToLower(tag)
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
if blocked {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
if advanced {
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], target) && hasXMLTagBoundary(text, i+len(target)) {
|
||||
end := findXMLTagEnd(text, i+len(target))
|
||||
if end < 0 {
|
||||
return -1, -1, "", false
|
||||
}
|
||||
return i, end + 1, text[i+len(target) : end], true
|
||||
}
|
||||
i++
|
||||
}
|
||||
return -1, -1, "", false
|
||||
}
|
||||
|
||||
func findMatchingXMLEndTagOutsideCDATA(text, tag string, from int) (closeStart, closeEnd int, ok bool) {
|
||||
lower := strings.ToLower(text)
|
||||
openTarget := "<" + strings.ToLower(tag)
|
||||
closeTarget := "</" + strings.ToLower(tag)
|
||||
depth := 1
|
||||
for i := maxInt(from, 0); i < len(text); {
|
||||
next, advanced, blocked := skipXMLIgnoredSection(lower, i)
|
||||
if blocked {
|
||||
return -1, -1, false
|
||||
}
|
||||
if advanced {
|
||||
i = next
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], closeTarget) && hasXMLTagBoundary(text, i+len(closeTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(closeTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
}
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return i, end + 1, true
|
||||
}
|
||||
i = end + 1
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(lower[i:], openTarget) && hasXMLTagBoundary(text, i+len(openTarget)) {
|
||||
end := findXMLTagEnd(text, i+len(openTarget))
|
||||
if end < 0 {
|
||||
return -1, -1, false
|
||||
}
|
||||
if !isSelfClosingXMLTag(text[:end]) {
|
||||
depth++
|
||||
}
|
||||
i = end + 1
|
||||
continue
|
||||
}
|
||||
i++
|
||||
}
|
||||
return -1, -1, false
|
||||
}
|
||||
|
||||
func skipXMLIgnoredSection(lower string, i int) (next int, advanced bool, blocked bool) {
|
||||
switch {
|
||||
case strings.HasPrefix(lower[i:], "<![cdata["):
|
||||
end := strings.Index(lower[i+len("<![cdata["):], "]]>")
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return i + len("<![cdata[") + end + len("]]>"), true, false
|
||||
case strings.HasPrefix(lower[i:], "<!--"):
|
||||
end := strings.Index(lower[i+len("<!--"):], "-->")
|
||||
if end < 0 {
|
||||
return 0, false, true
|
||||
}
|
||||
return i + len("<!--") + end + len("-->"), true, false
|
||||
default:
|
||||
return i, false, false
|
||||
}
|
||||
}
|
||||
|
||||
func findXMLTagEnd(text string, from int) int {
|
||||
quote := byte(0)
|
||||
for i := maxInt(from, 0); i < len(text); i++ {
|
||||
ch := text[i]
|
||||
if quote != 0 {
|
||||
if ch == quote {
|
||||
quote = 0
|
||||
}
|
||||
continue
|
||||
}
|
||||
if ch == '"' || ch == '\'' {
|
||||
quote = ch
|
||||
continue
|
||||
}
|
||||
if ch == '>' {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func hasXMLTagBoundary(text string, idx int) bool {
|
||||
if idx >= len(text) {
|
||||
return true
|
||||
}
|
||||
switch text[idx] {
|
||||
case ' ', '\t', '\n', '\r', '>', '/':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isSelfClosingXMLTag(startTag string) bool {
|
||||
return strings.HasSuffix(strings.TrimSpace(startTag), "/")
|
||||
}
|
||||
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func parseXMLTagAttributes(raw string) map[string]string {
|
||||
if strings.TrimSpace(raw) == "" {
|
||||
return map[string]string{}
|
||||
@@ -143,6 +293,9 @@ func parseInvokeParameterValue(raw string) any {
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
if value, ok := extractStandaloneCDATA(trimmed); ok {
|
||||
return value
|
||||
}
|
||||
if parsed := parseStructuredToolCallInput(trimmed); len(parsed) > 0 {
|
||||
if len(parsed) == 1 {
|
||||
if rawValue, ok := parsed["_raw"].(string); ok {
|
||||
|
||||
@@ -54,6 +54,32 @@ echo "hello"
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsKeepsToolSyntaxInsideCDATAAsParameterText(t *testing.T) {
|
||||
payload := strings.Join([]string{
|
||||
"# Release notes",
|
||||
"",
|
||||
"```xml",
|
||||
"<tool_calls>",
|
||||
" <invoke name=\"demo\">",
|
||||
" <parameter name=\"value\">x</parameter>",
|
||||
" </invoke>",
|
||||
"</tool_calls>",
|
||||
"```",
|
||||
}, "\n")
|
||||
text := `<tool_calls><invoke name="Write"><parameter name="content"><![CDATA[` + payload + `]]></parameter><parameter name="file_path">DS2API-4.0-Release-Notes.md</parameter></invoke></tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"Write"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected 1 call, got %#v", calls)
|
||||
}
|
||||
content, _ := calls[0].Input["content"].(string)
|
||||
if content != payload {
|
||||
t.Fatalf("expected CDATA payload with nested tool syntax to survive intact, got %q", content)
|
||||
}
|
||||
if calls[0].Input["file_path"] != "DS2API-4.0-Release-Notes.md" {
|
||||
t.Fatalf("expected file_path parameter, got %#v", calls[0].Input)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsSupportsInvokeParameters(t *testing.T) {
|
||||
text := `<tool_calls><invoke name="get_weather"><parameter name="city">beijing</parameter><parameter name="unit">c</parameter></invoke></tool_calls>`
|
||||
calls := ParseToolCalls(text, []string{"get_weather"})
|
||||
|
||||
Reference in New Issue
Block a user