package toolcall import ( "encoding/json" "html" "regexp" "strings" ) var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)`) // cdataPattern matches a standalone CDATA section. var cdataPattern = regexp.MustCompile(`(?is)^$`) func parseMarkupKVObject(text string) map[string]any { matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1) if len(matches) == 0 { return nil } out := map[string]any{} for _, m := range matches { if len(m) < 4 { continue } key := strings.TrimSpace(m[1]) endKey := strings.TrimSpace(m[3]) if key == "" { continue } if !strings.EqualFold(key, endKey) { continue } value := parseMarkupValue(m[2]) if value == nil { continue } appendMarkupValue(out, key, value) } if len(out) == 0 { return nil } return out } func parseMarkupValue(inner string) any { if value, ok := extractStandaloneCDATA(inner); ok { return value } value := strings.TrimSpace(extractRawTagValue(inner)) if value == "" { return "" } if strings.Contains(value, "<") && strings.Contains(value, ">") { if parsed := parseStructuredToolCallInput(value); len(parsed) > 0 { if len(parsed) == 1 { if raw, ok := parsed["_raw"].(string); ok { return raw } } return parsed } } var jsonValue any if json.Unmarshal([]byte(value), &jsonValue) == nil { return jsonValue } return value } func appendMarkupValue(out map[string]any, key string, value any) { if existing, ok := out[key]; ok { switch current := existing.(type) { case []any: out[key] = append(current, value) default: out[key] = []any{current, value} } return } out[key] = value } // extractRawTagValue treats the inner content of a tag robustly. // It detects CDATA and strips it, otherwise it unescapes standard HTML entities. // It avoids over-aggressive tag stripping that might break user content. func extractRawTagValue(inner string) string { trimmed := strings.TrimSpace(inner) if trimmed == "" { return "" } // 1. Check for CDATA - if present, it's the ultimate "safe" container. if value, ok := extractStandaloneCDATA(trimmed); ok { return value // Return raw content between CDATA brackets } // 2. If no CDATA, we still want to be robust. // We unescape standard HTML entities (like < > &) // but we DON'T recursively strip tags unless they are actually valid XML tags // at the start/end (which should have been handled by the outer matcher anyway). // If it contains what looks like a single tag and no other text, it might be nested XML // but for KV objects we usually want the value. return html.UnescapeString(inner) } func extractStandaloneCDATA(inner string) (string, bool) { trimmed := strings.TrimSpace(inner) if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 { return cdataMatches[1], true } if strings.HasPrefix(strings.ToLower(trimmed), "" var b strings.Builder b.Grow(len(text)) changed := false pos := 0 for pos < len(text) { startRel := strings.Index(lower[pos:], openMarker) if startRel < 0 { b.WriteString(text[pos:]) break } start := pos + startRel contentStart := start + len(openMarker) b.WriteString(text[pos:start]) if endRel := strings.Index(lower[contentStart:], closeMarker); endRel >= 0 { end := contentStart + endRel + len(closeMarker) b.WriteString(text[start:end]) pos = end continue } changed = true b.WriteString(text[contentStart:]) pos = len(text) } if !changed { return text } return b.String() }