mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-15 21:55:09 +08:00
feat: implement XML-based tool call extraction and refactor sieve utilities into dedicated modules
This commit is contained in:
@@ -159,6 +159,10 @@ func findSuspiciousPrefixStart(s string) int {
|
||||
start = idx
|
||||
}
|
||||
}
|
||||
// Also check for partial XML tool tag at end of string.
|
||||
if xmlIdx := findPartialXMLToolTagStart(s); xmlIdx >= 0 && xmlIdx > start {
|
||||
start = xmlIdx
|
||||
}
|
||||
return start
|
||||
}
|
||||
|
||||
@@ -175,9 +179,23 @@ func findToolSegmentStart(s string) int {
|
||||
bestKeyIdx = idx
|
||||
}
|
||||
}
|
||||
// Also detect XML tool call tags.
|
||||
for _, tag := range xmlToolTagsToDetect {
|
||||
idx := strings.Index(lower, tag)
|
||||
if idx >= 0 && (bestKeyIdx < 0 || idx < bestKeyIdx) {
|
||||
bestKeyIdx = idx
|
||||
}
|
||||
}
|
||||
if bestKeyIdx < 0 {
|
||||
return -1
|
||||
}
|
||||
// For XML tags, the '<' is itself the segment start.
|
||||
if bestKeyIdx < len(s) && s[bestKeyIdx] == '<' {
|
||||
if fenceStart, ok := openFenceStartBefore(s, bestKeyIdx); ok {
|
||||
return fenceStart
|
||||
}
|
||||
return bestKeyIdx
|
||||
}
|
||||
start := strings.LastIndex(s[:bestKeyIdx], "{")
|
||||
if start < 0 {
|
||||
start = bestKeyIdx
|
||||
@@ -193,6 +211,16 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
|
||||
if captured == "" {
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
// Try XML tool call extraction first.
|
||||
if xmlPrefix, xmlCalls, xmlSuffix, xmlReady := consumeXMLToolCapture(captured, toolNames); xmlReady {
|
||||
return xmlPrefix, xmlCalls, xmlSuffix, true
|
||||
}
|
||||
// If XML tags are present but block is incomplete, keep buffering.
|
||||
if hasOpenXMLToolTag(captured) {
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
lower := strings.ToLower(captured)
|
||||
keyIdx := -1
|
||||
keywords := []string{"tool_calls", "\"function\"", "function.name:", "[tool_call_history]", "[tool_result_history]"}
|
||||
@@ -234,67 +262,3 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
|
||||
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
|
||||
return prefixPart, parsed.Calls, suffixPart, true
|
||||
}
|
||||
|
||||
func extractToolHistoryBlock(captured string, keyIdx int) (start int, end int, ok bool) {
|
||||
if keyIdx < 0 || keyIdx >= len(captured) {
|
||||
return 0, 0, false
|
||||
}
|
||||
rest := strings.ToLower(captured[keyIdx:])
|
||||
switch {
|
||||
case strings.HasPrefix(rest, "[tool_call_history]"):
|
||||
closeTag := "[/tool_call_history]"
|
||||
closeIdx := strings.Index(rest, closeTag)
|
||||
if closeIdx < 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
return keyIdx, keyIdx + closeIdx + len(closeTag), true
|
||||
case strings.HasPrefix(rest, "[tool_result_history]"):
|
||||
closeTag := "[/tool_result_history]"
|
||||
closeIdx := strings.Index(rest, closeTag)
|
||||
if closeIdx < 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
return keyIdx, keyIdx + closeIdx + len(closeTag), true
|
||||
default:
|
||||
return 0, 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func trimWrappingJSONFence(prefix, suffix string) (string, string) {
|
||||
trimmedPrefix := strings.TrimRight(prefix, " \t\r\n")
|
||||
fenceIdx := strings.LastIndex(trimmedPrefix, "```")
|
||||
if fenceIdx < 0 {
|
||||
return prefix, suffix
|
||||
}
|
||||
// Only strip when the trailing fence in prefix behaves like an opening fence.
|
||||
// A legitimate closing fence before a standalone tool JSON must be preserved.
|
||||
if strings.Count(trimmedPrefix[:fenceIdx+3], "```")%2 == 0 {
|
||||
return prefix, suffix
|
||||
}
|
||||
fenceHeader := strings.TrimSpace(trimmedPrefix[fenceIdx+3:])
|
||||
if fenceHeader != "" && !strings.EqualFold(fenceHeader, "json") {
|
||||
return prefix, suffix
|
||||
}
|
||||
|
||||
trimmedSuffix := strings.TrimLeft(suffix, " \t\r\n")
|
||||
if !strings.HasPrefix(trimmedSuffix, "```") {
|
||||
return prefix, suffix
|
||||
}
|
||||
consumedLeading := len(suffix) - len(trimmedSuffix)
|
||||
return trimmedPrefix[:fenceIdx], suffix[consumedLeading+3:]
|
||||
}
|
||||
|
||||
func openFenceStartBefore(s string, pos int) (int, bool) {
|
||||
if pos <= 0 || pos > len(s) {
|
||||
return -1, false
|
||||
}
|
||||
segment := s[:pos]
|
||||
lastFence := strings.LastIndex(segment, "```")
|
||||
if lastFence < 0 {
|
||||
return -1, false
|
||||
}
|
||||
if strings.Count(segment, "```")%2 == 1 {
|
||||
return lastFence, true
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package openai
|
||||
|
||||
import "strings"
|
||||
|
||||
func extractJSONObjectFrom(text string, start int) (string, int, bool) {
|
||||
if start < 0 || start >= len(text) || text[start] != '{' {
|
||||
return "", 0, false
|
||||
@@ -41,3 +43,67 @@ func extractJSONObjectFrom(text string, start int) (string, int, bool) {
|
||||
}
|
||||
return "", 0, false
|
||||
}
|
||||
|
||||
func extractToolHistoryBlock(captured string, keyIdx int) (start int, end int, ok bool) {
|
||||
if keyIdx < 0 || keyIdx >= len(captured) {
|
||||
return 0, 0, false
|
||||
}
|
||||
rest := strings.ToLower(captured[keyIdx:])
|
||||
switch {
|
||||
case strings.HasPrefix(rest, "[tool_call_history]"):
|
||||
closeTag := "[/tool_call_history]"
|
||||
closeIdx := strings.Index(rest, closeTag)
|
||||
if closeIdx < 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
return keyIdx, keyIdx + closeIdx + len(closeTag), true
|
||||
case strings.HasPrefix(rest, "[tool_result_history]"):
|
||||
closeTag := "[/tool_result_history]"
|
||||
closeIdx := strings.Index(rest, closeTag)
|
||||
if closeIdx < 0 {
|
||||
return 0, 0, false
|
||||
}
|
||||
return keyIdx, keyIdx + closeIdx + len(closeTag), true
|
||||
default:
|
||||
return 0, 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func trimWrappingJSONFence(prefix, suffix string) (string, string) {
|
||||
trimmedPrefix := strings.TrimRight(prefix, " \t\r\n")
|
||||
fenceIdx := strings.LastIndex(trimmedPrefix, "```")
|
||||
if fenceIdx < 0 {
|
||||
return prefix, suffix
|
||||
}
|
||||
// Only strip when the trailing fence in prefix behaves like an opening fence.
|
||||
// A legitimate closing fence before a standalone tool JSON must be preserved.
|
||||
if strings.Count(trimmedPrefix[:fenceIdx+3], "```")%2 == 0 {
|
||||
return prefix, suffix
|
||||
}
|
||||
fenceHeader := strings.TrimSpace(trimmedPrefix[fenceIdx+3:])
|
||||
if fenceHeader != "" && !strings.EqualFold(fenceHeader, "json") {
|
||||
return prefix, suffix
|
||||
}
|
||||
|
||||
trimmedSuffix := strings.TrimLeft(suffix, " \t\r\n")
|
||||
if !strings.HasPrefix(trimmedSuffix, "```") {
|
||||
return prefix, suffix
|
||||
}
|
||||
consumedLeading := len(suffix) - len(trimmedSuffix)
|
||||
return trimmedPrefix[:fenceIdx], suffix[consumedLeading+3:]
|
||||
}
|
||||
|
||||
func openFenceStartBefore(s string, pos int) (int, bool) {
|
||||
if pos <= 0 || pos > len(s) {
|
||||
return -1, false
|
||||
}
|
||||
segment := s[:pos]
|
||||
lastFence := strings.LastIndex(segment, "```")
|
||||
if lastFence < 0 {
|
||||
return -1, false
|
||||
}
|
||||
if strings.Count(segment, "```")%2 == 1 {
|
||||
return lastFence, true
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
|
||||
109
internal/adapter/openai/tool_sieve_xml.go
Normal file
109
internal/adapter/openai/tool_sieve_xml.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"ds2api/internal/util"
|
||||
)
|
||||
|
||||
// --- XML tool call support for the streaming sieve ---
|
||||
|
||||
var xmlToolCallClosingTags = []string{"</tool_calls>", "</tool_call>", "</invoke>", "</function_call>", "</function_calls>", "</tool_use>"}
|
||||
var xmlToolCallOpeningTags = []string{"<tool_calls", "<tool_call", "<invoke", "<function_call", "<function_calls", "<tool_use"}
|
||||
|
||||
// xmlToolCallBlockPattern matches a complete XML tool call block (wrapper or standalone).
|
||||
var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)(<tool_calls>\s*(?:.*?)\s*</tool_calls>|<tool_call>\s*(?:.*?)\s*</tool_call>|<invoke\b[^>]*>(?:.*?)</invoke>|<function_calls?\b[^>]*>(?:.*?)</function_calls?>|<tool_use>(?:.*?)</tool_use>)`)
|
||||
|
||||
// xmlToolTagsToDetect is the set of XML tag prefixes used by findToolSegmentStart.
|
||||
var xmlToolTagsToDetect = []string{"<tool_calls>", "<tool_calls\n", "<tool_call>", "<tool_call\n",
|
||||
"<invoke ", "<invoke>", "<function_call", "<function_calls", "<tool_use>"}
|
||||
|
||||
// consumeXMLToolCapture tries to extract complete XML tool call blocks from captured text.
|
||||
func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
|
||||
lower := strings.ToLower(captured)
|
||||
// Find the earliest XML tool opening tag.
|
||||
openIdx := -1
|
||||
for _, tag := range xmlToolCallOpeningTags {
|
||||
idx := strings.Index(lower, tag)
|
||||
if idx >= 0 && (openIdx < 0 || idx < openIdx) {
|
||||
openIdx = idx
|
||||
}
|
||||
}
|
||||
if openIdx < 0 {
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
// Look for a matching closing tag.
|
||||
closeIdx := -1
|
||||
for _, tag := range xmlToolCallClosingTags {
|
||||
idx := strings.Index(lower[openIdx:], tag)
|
||||
if idx >= 0 {
|
||||
absEnd := openIdx + idx + len(tag)
|
||||
if closeIdx < 0 || absEnd > closeIdx {
|
||||
closeIdx = absEnd
|
||||
}
|
||||
}
|
||||
}
|
||||
if closeIdx <= 0 {
|
||||
return "", nil, "", false
|
||||
}
|
||||
|
||||
xmlBlock := captured[openIdx:closeIdx]
|
||||
prefixPart := captured[:openIdx]
|
||||
suffixPart := captured[closeIdx:]
|
||||
parsed := util.ParseToolCalls(xmlBlock, toolNames)
|
||||
if len(parsed) > 0 {
|
||||
prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
|
||||
return prefixPart, parsed, suffixPart, true
|
||||
}
|
||||
// Looks like XML tool syntax but failed to parse — consume it to avoid leak.
|
||||
return prefixPart, nil, suffixPart, true
|
||||
}
|
||||
|
||||
// hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag
|
||||
// but no corresponding closing tag yet.
|
||||
func hasOpenXMLToolTag(captured string) bool {
|
||||
lower := strings.ToLower(captured)
|
||||
for _, tag := range xmlToolCallOpeningTags {
|
||||
if strings.Contains(lower, tag) {
|
||||
hasClosed := false
|
||||
for _, ct := range xmlToolCallClosingTags {
|
||||
if strings.Contains(lower, ct) {
|
||||
hasClosed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasClosed {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// findPartialXMLToolTagStart checks if the string ends with a partial XML tool tag
|
||||
// (e.g., "<tool_ca" or "<inv") and returns the position of the '<'.
|
||||
func findPartialXMLToolTagStart(s string) int {
|
||||
lastLT := strings.LastIndex(s, "<")
|
||||
if lastLT < 0 {
|
||||
return -1
|
||||
}
|
||||
tail := s[lastLT:]
|
||||
// If there's a '>' in the tail, the tag is closed — not partial.
|
||||
if strings.Contains(tail, ">") {
|
||||
return -1
|
||||
}
|
||||
lowerTail := strings.ToLower(tail)
|
||||
// Check if the tail is a prefix of any known XML tool tag.
|
||||
for _, tag := range xmlToolCallOpeningTags {
|
||||
tagWithLT := tag
|
||||
if !strings.HasPrefix(tagWithLT, "<") {
|
||||
tagWithLT = "<" + tagWithLT
|
||||
}
|
||||
if strings.HasPrefix(tagWithLT, lowerTail) {
|
||||
return lastLT
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
155
internal/adapter/openai/tool_sieve_xml_test.go
Normal file
155
internal/adapter/openai/tool_sieve_xml_test.go
Normal file
@@ -0,0 +1,155 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestProcessToolSieveInterceptsXMLToolCallWithoutLeak(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Simulate a model producing XML tool call output chunk by chunk.
|
||||
chunks := []string{
|
||||
"<tool_calls>\n",
|
||||
" <tool_call>\n",
|
||||
" <tool_name>read_file</tool_name>\n",
|
||||
` <parameters>{"path":"README.MD"}</parameters>` + "\n",
|
||||
" </tool_call>\n",
|
||||
"</tool_calls>",
|
||||
}
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent string
|
||||
var toolCalls int
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent += evt.Content
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("XML tool call content leaked to text: %q", textContent)
|
||||
}
|
||||
if strings.Contains(textContent, "read_file") {
|
||||
t.Fatalf("tool name leaked to text: %q", textContent)
|
||||
}
|
||||
if toolCalls == 0 {
|
||||
t.Fatal("expected tool calls to be extracted, got none")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSieveXMLWithLeadingText(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Model outputs some prose then an XML tool call.
|
||||
chunks := []string{
|
||||
"Let me check the file.\n",
|
||||
"<tool_calls>\n <tool_call>\n <tool_name>read_file</tool_name>\n",
|
||||
` <parameters>{"path":"go.mod"}</parameters>` + "\n </tool_call>\n</tool_calls>",
|
||||
}
|
||||
var events []toolStreamEvent
|
||||
for _, c := range chunks {
|
||||
events = append(events, processToolSieveChunk(&state, c, []string{"read_file"})...)
|
||||
}
|
||||
events = append(events, flushToolSieve(&state, []string{"read_file"})...)
|
||||
|
||||
var textContent string
|
||||
var toolCalls int
|
||||
for _, evt := range events {
|
||||
if evt.Content != "" {
|
||||
textContent += evt.Content
|
||||
}
|
||||
toolCalls += len(evt.ToolCalls)
|
||||
}
|
||||
|
||||
// Leading text should be emitted.
|
||||
if !strings.Contains(textContent, "Let me check the file.") {
|
||||
t.Fatalf("expected leading text to be emitted, got %q", textContent)
|
||||
}
|
||||
// The XML itself should NOT leak.
|
||||
if strings.Contains(textContent, "<tool_call") {
|
||||
t.Fatalf("XML tool call content leaked to text: %q", textContent)
|
||||
}
|
||||
if toolCalls == 0 {
|
||||
t.Fatal("expected tool calls to be extracted, got none")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProcessToolSievePartialXMLTagHeldBack(t *testing.T) {
|
||||
var state toolStreamSieveState
|
||||
// Chunk ends with a partial XML tool tag.
|
||||
events := processToolSieveChunk(&state, "Hello <tool_ca", []string{"read_file"})
|
||||
|
||||
var textContent string
|
||||
for _, evt := range events {
|
||||
textContent += evt.Content
|
||||
}
|
||||
|
||||
// "Hello " should be emitted, but "<tool_ca" should be held back.
|
||||
if strings.Contains(textContent, "<tool_ca") {
|
||||
t.Fatalf("partial XML tag should not be emitted, got %q", textContent)
|
||||
}
|
||||
if !strings.Contains(textContent, "Hello") {
|
||||
t.Fatalf("expected 'Hello' text to be emitted, got %q", textContent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindToolSegmentStartDetectsXMLToolCalls(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want int
|
||||
}{
|
||||
{"tool_calls_tag", "some text <tool_calls>\n", 10},
|
||||
{"tool_call_tag", "prefix <tool_call>\n", 7},
|
||||
{"invoke_tag", "text <invoke name=\"foo\">body</invoke>", 5},
|
||||
{"function_call_tag", "<function_call name=\"foo\">body</function_call>", 0},
|
||||
{"no_xml", "just plain text", -1},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := findToolSegmentStart(tc.input)
|
||||
if got != tc.want {
|
||||
t.Fatalf("findToolSegmentStart(%q) = %d, want %d", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPartialXMLToolTagStart(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want int
|
||||
}{
|
||||
{"partial_tool_call", "Hello <tool_ca", 6},
|
||||
{"partial_invoke", "Prefix <inv", 7},
|
||||
{"partial_lt_only", "Text <", 5},
|
||||
{"complete_tag", "Text <tool_call>done", -1},
|
||||
{"no_lt", "plain text", -1},
|
||||
{"closed_lt", "a < b > c", -1},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := findPartialXMLToolTagStart(tc.input)
|
||||
if got != tc.want {
|
||||
t.Fatalf("findPartialXMLToolTagStart(%q) = %d, want %d", tc.input, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasOpenXMLToolTag(t *testing.T) {
|
||||
if !hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name>") {
|
||||
t.Fatal("should detect open XML tool tag without closing tag")
|
||||
}
|
||||
if hasOpenXMLToolTag("<tool_call>\n<tool_name>foo</tool_name></tool_call>") {
|
||||
t.Fatal("should return false when closing tag is present")
|
||||
}
|
||||
if hasOpenXMLToolTag("plain text without any XML") {
|
||||
t.Fatal("should return false for plain text")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user