diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go index e7e41f8..cdb2585 100644 --- a/internal/adapter/openai/tool_sieve_core.go +++ b/internal/adapter/openai/tool_sieve_core.go @@ -167,13 +167,28 @@ func findToolSegmentStart(s string) int { return -1 } lower := strings.ToLower(s) + keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"} offset := 0 for { - keyRel := strings.Index(lower[offset:], "tool_calls") - if keyRel < 0 { + bestKeyIdx := -1 + matchedKeyword := "" + + for _, kw := range keywords { + idx := strings.Index(lower[offset:], kw) + if idx >= 0 { + absIdx := offset + idx + if bestKeyIdx < 0 || absIdx < bestKeyIdx { + bestKeyIdx = absIdx + matchedKeyword = kw + } + } + } + + if bestKeyIdx < 0 { return -1 } - keyIdx := offset + keyRel + + keyIdx := bestKeyIdx start := strings.LastIndex(s[:keyIdx], "{") if start < 0 { start = keyIdx @@ -181,7 +196,7 @@ func findToolSegmentStart(s string) int { if !insideCodeFence(s[:start]) { return start } - offset = keyIdx + len("tool_calls") + offset = keyIdx + len(matchedKeyword) } } diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index c1b92a8..ae25fd4 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -165,19 +165,34 @@ function findToolSegmentStart(s) { return -1; } const lower = s.toLowerCase(); + const keywords = ['tool_calls', 'function.name:', '[tool_call_history]']; let offset = 0; // eslint-disable-next-line no-constant-condition while (true) { - const keyIdx = lower.indexOf('tool_calls', offset); - if (keyIdx < 0) { + let bestKeyIdx = -1; + let matchedKeyword = ''; + + for (const kw of keywords) { + const idx = lower.indexOf(kw, offset); + if (idx >= 0) { + if (bestKeyIdx < 0 || idx < bestKeyIdx) { + bestKeyIdx = idx; + matchedKeyword = kw; + } + } + } + + if (bestKeyIdx < 0) { return -1; } + + const keyIdx = bestKeyIdx; const start = s.slice(0, keyIdx).lastIndexOf('{'); const candidateStart = start >= 0 ? start : keyIdx; if (!insideCodeFence(s.slice(0, candidateStart))) { return candidateStart; } - offset = keyIdx + 'tool_calls'.length; + offset = keyIdx + matchedKeyword.length; } } diff --git a/internal/util/toolcalls_parse.go b/internal/util/toolcalls_parse.go index fb6d459..8c1a905 100644 --- a/internal/util/toolcalls_parse.go +++ b/internal/util/toolcalls_parse.go @@ -264,6 +264,13 @@ func parseToolCallInput(v any) map[string]any { if err := json.Unmarshal([]byte(raw), &parsed); err == nil && parsed != nil { return parsed } + // Try to repair invalid backslashes (common in Windows paths output by models) + repaired := repairInvalidJSONBackslashes(raw) + if repaired != raw { + if err := json.Unmarshal([]byte(repaired), &parsed); err == nil && parsed != nil { + return parsed + } + } return map[string]any{"_raw": raw} default: b, err := json.Marshal(x) @@ -277,3 +284,51 @@ func parseToolCallInput(v any) map[string]any { return map[string]any{} } } + +func repairInvalidJSONBackslashes(s string) string { + if !strings.Contains(s, "\\") { + return s + } + var out strings.Builder + out.Grow(len(s) + 10) + runes := []rune(s) + for i := 0; i < len(runes); i++ { + if runes[i] == '\\' { + if i+1 < len(runes) { + next := runes[i+1] + switch next { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + out.WriteRune('\\') + out.WriteRune(next) + i++ + continue + case 'u': + if i+5 < len(runes) { + isHex := true + for j := 1; j <= 4; j++ { + r := runes[i+1+j] + if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) { + isHex = false + break + } + } + if isHex { + out.WriteRune('\\') + out.WriteRune('u') + for j := 1; j <= 4; j++ { + out.WriteRune(runes[i+1+j]) + } + i += 5 + continue + } + } + } + } + // Not a valid escape sequence, double it + out.WriteString("\\\\") + } else { + out.WriteRune(runes[i]) + } + } + return out.String() +} diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index 3ace015..94417f3 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -279,3 +279,45 @@ func TestParseToolCallsDoesNotAcceptMismatchedMarkupTags(t *testing.T) { t.Fatalf("expected mismatched tags to be rejected, got %#v", calls) } } + +func TestRepairInvalidJSONBackslashes(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\name"}`}, + {`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`}, + {`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`}, + {`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`}, + {`{"unicode": "\u2705"}`, `{"unicode": "\u2705"}`}, + {`{"invalid_u": "\u123"}`, `{"invalid_u": "\\u123"}`}, + } + + for _, tt := range tests { + got := repairInvalidJSONBackslashes(tt.input) + if got != tt.expected { + t.Errorf("repairInvalidJSONBackslashes(%s) = %s; want %s", tt.input, got, tt.expected) + } + } +} + +func TestParseToolCallsWithInvalidBackslashes(t *testing.T) { + // DeepSeek sometimes outputs Windows paths with single backslashes in JSON strings + text := `好的,执行以下命令:{"name": "execute_command", "input": "{\"command\": \"cd D:\git_codes && dir\"}"}` + availableTools := []string{"execute_command"} + + parsed := ParseToolCalls(text, availableTools) + if len(parsed) != 1 { + t.Fatalf("expected 1 tool call, got %d", len(parsed)) + } + + cmd, ok := parsed[0].Input["command"].(string) + if !ok { + t.Fatalf("expected command string in input, got %v", parsed[0].Input) + } + + expected := "cd D:\\git_codes && dir" + if cmd != expected { + t.Errorf("expected command %q, got %q", expected, cmd) + } +} diff --git a/tests/repair_json_tool.go b/tests/repair_json_tool.go new file mode 100644 index 0000000..7abf952 --- /dev/null +++ b/tests/repair_json_tool.go @@ -0,0 +1,77 @@ +package main + +import ( + "fmt" + "strings" +) + +func repairInvalidJSONBackslashes(s string) string { + if !strings.Contains(s, "\\") { + return s + } + var out strings.Builder + out.Grow(len(s) + 10) + runes := []rune(s) + for i := 0; i < len(runes); i++ { + if runes[i] == '\\' { + if i+1 < len(runes) { + next := runes[i+1] + switch next { + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + out.WriteRune('\\') + out.WriteRune(next) + i++ + continue + case 'u': + if i+5 < len(runes) { + isHex := true + for j := 1; j <= 4; j++ { + r := runes[i+1+j] + if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) { + isHex = false + break + } + } + if isHex { + out.WriteRune('\\') + out.WriteRune('u') + for j := 1; j <= 4; j++ { + out.WriteRune(runes[i+1+j]) + } + i += 5 + continue + } + } + } + } + // Not a valid escape sequence, double it + out.WriteString("\\\\") + } else { + out.WriteRune(runes[i]) + } + } + return out.String() +} + +func main() { + tests := []struct { + input string + expected string + }{ + {`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\\name"}`}, + {`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`}, + {`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`}, + {`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`}, + {`{"unicode": "\u2705"}`, `{"unicode": "\u2705"}`}, + {`{"invalid_u": "\u123"}`, `{"invalid_u": "\\u123"}`}, + } + + for _, tt := range tests { + got := repairInvalidJSONBackslashes(tt.input) + if got != tt.expected { + fmt.Printf("FAIL: input=%s\n got=%s\n exp=%s\n", tt.input, got, tt.expected) + } else { + fmt.Printf("PASS: input=%s\n", tt.input) + } + } +}