fix(toolcall): fix deepseek function calling bug and add json repair

- Fix: Expand stream sieve keywords to support function.name: and [TOOL_CALL_HISTORY]

- Fix: Add repairInvalidJSONBackslashes to handle unescaped backslashes in Windows paths

- Sync: Update JS stream sieve to match Go implementation

- Test: Add unit tests for backslash repair and deepseek format parsing

- Tool: Move repair json test tool to tests/repair_json_tool.go
This commit is contained in:
huangxun
2026-03-13 13:47:40 +08:00
parent f2674487c7
commit 7318d1f4a8
5 changed files with 211 additions and 7 deletions

View File

@@ -167,13 +167,28 @@ func findToolSegmentStart(s string) int {
return -1
}
lower := strings.ToLower(s)
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
offset := 0
for {
keyRel := strings.Index(lower[offset:], "tool_calls")
if keyRel < 0 {
bestKeyIdx := -1
matchedKeyword := ""
for _, kw := range keywords {
idx := strings.Index(lower[offset:], kw)
if idx >= 0 {
absIdx := offset + idx
if bestKeyIdx < 0 || absIdx < bestKeyIdx {
bestKeyIdx = absIdx
matchedKeyword = kw
}
}
}
if bestKeyIdx < 0 {
return -1
}
keyIdx := offset + keyRel
keyIdx := bestKeyIdx
start := strings.LastIndex(s[:keyIdx], "{")
if start < 0 {
start = keyIdx
@@ -181,7 +196,7 @@ func findToolSegmentStart(s string) int {
if !insideCodeFence(s[:start]) {
return start
}
offset = keyIdx + len("tool_calls")
offset = keyIdx + len(matchedKeyword)
}
}

View File

@@ -165,19 +165,34 @@ function findToolSegmentStart(s) {
return -1;
}
const lower = s.toLowerCase();
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]'];
let offset = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
const keyIdx = lower.indexOf('tool_calls', offset);
if (keyIdx < 0) {
let bestKeyIdx = -1;
let matchedKeyword = '';
for (const kw of keywords) {
const idx = lower.indexOf(kw, offset);
if (idx >= 0) {
if (bestKeyIdx < 0 || idx < bestKeyIdx) {
bestKeyIdx = idx;
matchedKeyword = kw;
}
}
}
if (bestKeyIdx < 0) {
return -1;
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
const candidateStart = start >= 0 ? start : keyIdx;
if (!insideCodeFence(s.slice(0, candidateStart))) {
return candidateStart;
}
offset = keyIdx + 'tool_calls'.length;
offset = keyIdx + matchedKeyword.length;
}
}

View File

@@ -264,6 +264,13 @@ func parseToolCallInput(v any) map[string]any {
if err := json.Unmarshal([]byte(raw), &parsed); err == nil && parsed != nil {
return parsed
}
// Try to repair invalid backslashes (common in Windows paths output by models)
repaired := repairInvalidJSONBackslashes(raw)
if repaired != raw {
if err := json.Unmarshal([]byte(repaired), &parsed); err == nil && parsed != nil {
return parsed
}
}
return map[string]any{"_raw": raw}
default:
b, err := json.Marshal(x)
@@ -277,3 +284,51 @@ func parseToolCallInput(v any) map[string]any {
return map[string]any{}
}
}
func repairInvalidJSONBackslashes(s string) string {
if !strings.Contains(s, "\\") {
return s
}
var out strings.Builder
out.Grow(len(s) + 10)
runes := []rune(s)
for i := 0; i < len(runes); i++ {
if runes[i] == '\\' {
if i+1 < len(runes) {
next := runes[i+1]
switch next {
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
out.WriteRune('\\')
out.WriteRune(next)
i++
continue
case 'u':
if i+5 < len(runes) {
isHex := true
for j := 1; j <= 4; j++ {
r := runes[i+1+j]
if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) {
isHex = false
break
}
}
if isHex {
out.WriteRune('\\')
out.WriteRune('u')
for j := 1; j <= 4; j++ {
out.WriteRune(runes[i+1+j])
}
i += 5
continue
}
}
}
}
// Not a valid escape sequence, double it
out.WriteString("\\\\")
} else {
out.WriteRune(runes[i])
}
}
return out.String()
}

View File

@@ -279,3 +279,45 @@ func TestParseToolCallsDoesNotAcceptMismatchedMarkupTags(t *testing.T) {
t.Fatalf("expected mismatched tags to be rejected, got %#v", calls)
}
}
func TestRepairInvalidJSONBackslashes(t *testing.T) {
tests := []struct {
input string
expected string
}{
{`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\name"}`},
{`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`},
{`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`},
{`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`},
{`{"unicode": "\u2705"}`, `{"unicode": "\u2705"}`},
{`{"invalid_u": "\u123"}`, `{"invalid_u": "\\u123"}`},
}
for _, tt := range tests {
got := repairInvalidJSONBackslashes(tt.input)
if got != tt.expected {
t.Errorf("repairInvalidJSONBackslashes(%s) = %s; want %s", tt.input, got, tt.expected)
}
}
}
func TestParseToolCallsWithInvalidBackslashes(t *testing.T) {
// DeepSeek sometimes outputs Windows paths with single backslashes in JSON strings
text := `好的,执行以下命令:{"name": "execute_command", "input": "{\"command\": \"cd D:\git_codes && dir\"}"}`
availableTools := []string{"execute_command"}
parsed := ParseToolCalls(text, availableTools)
if len(parsed) != 1 {
t.Fatalf("expected 1 tool call, got %d", len(parsed))
}
cmd, ok := parsed[0].Input["command"].(string)
if !ok {
t.Fatalf("expected command string in input, got %v", parsed[0].Input)
}
expected := "cd D:\\git_codes && dir"
if cmd != expected {
t.Errorf("expected command %q, got %q", expected, cmd)
}
}

77
tests/repair_json_tool.go Normal file
View File

@@ -0,0 +1,77 @@
package main
import (
"fmt"
"strings"
)
func repairInvalidJSONBackslashes(s string) string {
if !strings.Contains(s, "\\") {
return s
}
var out strings.Builder
out.Grow(len(s) + 10)
runes := []rune(s)
for i := 0; i < len(runes); i++ {
if runes[i] == '\\' {
if i+1 < len(runes) {
next := runes[i+1]
switch next {
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
out.WriteRune('\\')
out.WriteRune(next)
i++
continue
case 'u':
if i+5 < len(runes) {
isHex := true
for j := 1; j <= 4; j++ {
r := runes[i+1+j]
if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) {
isHex = false
break
}
}
if isHex {
out.WriteRune('\\')
out.WriteRune('u')
for j := 1; j <= 4; j++ {
out.WriteRune(runes[i+1+j])
}
i += 5
continue
}
}
}
}
// Not a valid escape sequence, double it
out.WriteString("\\\\")
} else {
out.WriteRune(runes[i])
}
}
return out.String()
}
func main() {
tests := []struct {
input string
expected string
}{
{`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\\name"}`},
{`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`},
{`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`},
{`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`},
{`{"unicode": "\u2705"}`, `{"unicode": "\u2705"}`},
{`{"invalid_u": "\u123"}`, `{"invalid_u": "\\u123"}`},
}
for _, tt := range tests {
got := repairInvalidJSONBackslashes(tt.input)
if got != tt.expected {
fmt.Printf("FAIL: input=%s\n got=%s\n exp=%s\n", tt.input, got, tt.expected)
} else {
fmt.Printf("PASS: input=%s\n", tt.input)
}
}
}