mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-02 07:25:26 +08:00
Merge pull request #99 from CJackHwang/codex/refactor-toolcalls_parse.go-for-line-limits
Codex-generated pull request
This commit is contained in:
@@ -363,6 +363,8 @@ cp opencode.json.example opencode.json
|
||||
3. 未在 `tools` 声明中的工具名会被严格拒绝,不会下发为有效 tool call
|
||||
4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed`
|
||||
5. 仅在通过策略校验后才会发出有效工具调用事件,避免错误工具名进入客户端执行链
|
||||
6. strict 模式下采用“可解析即拦截”:即使 tool JSON 前后混有 prose,只要结构可提取仍会拦截 tool_calls,剩余文本继续透传
|
||||
7. 当参数字符串无法可靠修复为对象时,会保留 `{"_raw":"..."}` 回退,避免 silent corruption
|
||||
|
||||
## 本地开发抓包工具
|
||||
|
||||
|
||||
@@ -200,6 +200,13 @@ go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/
|
||||
# 2. 查看测试输出中的详细调试信息
|
||||
go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/ 2>&1
|
||||
|
||||
# 2.1 strict 模式(Go/JS)语义对齐检查:混合 prose + tool JSON 仍可拦截
|
||||
node --test tests/node/stream-tool-sieve.test.js
|
||||
|
||||
# 2.2 Windows 路径与文本换行语义回归
|
||||
go test -v -run TestParseToolCallsWithInvalidBackslashes ./internal/util/
|
||||
go test -v -run TestParseToolCallsWithPathEscapesAndTextNewlines ./internal/util/
|
||||
|
||||
# 3. 检查具体测试用例的修复效果
|
||||
# 测试用例位于 internal/util/toolcalls_test.go,包含:
|
||||
# - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
import "strings"
|
||||
|
||||
type ParsedToolCall struct {
|
||||
Name string `json:"name"`
|
||||
@@ -84,31 +80,26 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed)
|
||||
candidates := []string{trimmed}
|
||||
for _, candidate := range candidates {
|
||||
candidate = strings.TrimSpace(candidate)
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
parsed := parseToolCallsPayload(candidate)
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseXMLToolCalls(candidate)
|
||||
}
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseMarkupToolCalls(candidate)
|
||||
}
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseTextKVToolCalls(candidate)
|
||||
}
|
||||
if len(parsed) > 0 {
|
||||
result.SawToolCallSyntax = true
|
||||
calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames)
|
||||
result.Calls = calls
|
||||
result.RejectedToolNames = rejectedNames
|
||||
result.RejectedByPolicy = len(rejectedNames) > 0 && len(calls) == 0
|
||||
return result
|
||||
}
|
||||
|
||||
parsed := parseToolCallsPayload(trimmed)
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseXMLToolCalls(trimmed)
|
||||
}
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseMarkupToolCalls(trimmed)
|
||||
}
|
||||
if len(parsed) == 0 {
|
||||
parsed = parseTextKVToolCalls(trimmed)
|
||||
}
|
||||
if len(parsed) == 0 {
|
||||
return result
|
||||
}
|
||||
|
||||
result.SawToolCallSyntax = true
|
||||
calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames)
|
||||
result.Calls = calls
|
||||
result.RejectedToolNames = rejectedNames
|
||||
result.RejectedByPolicy = len(rejectedNames) > 0 && len(calls) == 0
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -141,6 +132,7 @@ func filterToolCallsDetailed(parsed []ParsedToolCall, availableToolNames []strin
|
||||
}
|
||||
return nil, rejected
|
||||
}
|
||||
|
||||
out := make([]ParsedToolCall, 0, len(parsed))
|
||||
rejectedSet := map[string]struct{}{}
|
||||
rejected := make([]string, 0)
|
||||
@@ -169,31 +161,6 @@ func resolveAllowedToolName(name string, allowed map[string]struct{}, allowedCan
|
||||
return resolveAllowedToolNameWithLooseMatch(name, allowed, allowedCanonical)
|
||||
}
|
||||
|
||||
func parseToolCallsPayload(payload string) []ParsedToolCall {
|
||||
var decoded any
|
||||
if err := json.Unmarshal([]byte(payload), &decoded); err != nil {
|
||||
// Try to repair backslashes first! Because LLMs often mix these two problems.
|
||||
repaired := repairInvalidJSONBackslashes(payload)
|
||||
// Try loose repair on top of that
|
||||
repaired = RepairLooseJSON(repaired)
|
||||
if err := json.Unmarshal([]byte(repaired), &decoded); err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
switch v := decoded.(type) {
|
||||
case map[string]any:
|
||||
if tc, ok := v["tool_calls"]; ok {
|
||||
return parseToolCallList(tc)
|
||||
}
|
||||
if parsed, ok := parseToolCallItem(v); ok {
|
||||
return []ParsedToolCall{parsed}
|
||||
}
|
||||
case []any:
|
||||
return parseToolCallList(v)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func looksLikeToolCallSyntax(text string) bool {
|
||||
lower := strings.ToLower(text)
|
||||
return strings.Contains(lower, "tool_calls") ||
|
||||
@@ -202,172 +169,3 @@ func looksLikeToolCallSyntax(text string) bool {
|
||||
strings.Contains(lower, "<invoke") ||
|
||||
strings.Contains(lower, "function.name:")
|
||||
}
|
||||
|
||||
func parseToolCallList(v any) []ParsedToolCall {
|
||||
items, ok := v.([]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
out := make([]ParsedToolCall, 0, len(items))
|
||||
for _, item := range items {
|
||||
m, ok := item.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if tc, ok := parseToolCallItem(m); ok {
|
||||
out = append(out, tc)
|
||||
}
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseToolCallItem(m map[string]any) (ParsedToolCall, bool) {
|
||||
name, _ := m["name"].(string)
|
||||
inputRaw, hasInput := m["input"]
|
||||
if fn, ok := m["function"].(map[string]any); ok {
|
||||
if name == "" {
|
||||
name, _ = fn["name"].(string)
|
||||
}
|
||||
if !hasInput {
|
||||
if v, ok := fn["arguments"]; ok {
|
||||
inputRaw = v
|
||||
hasInput = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !hasInput {
|
||||
for _, key := range []string{"arguments", "args", "parameters", "params"} {
|
||||
if v, ok := m[key]; ok {
|
||||
inputRaw = v
|
||||
hasInput = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(name) == "" {
|
||||
return ParsedToolCall{}, false
|
||||
}
|
||||
return ParsedToolCall{
|
||||
Name: strings.TrimSpace(name),
|
||||
Input: parseToolCallInput(inputRaw),
|
||||
}, true
|
||||
}
|
||||
|
||||
func parseToolCallInput(v any) map[string]any {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return map[string]any{}
|
||||
case map[string]any:
|
||||
return x
|
||||
case string:
|
||||
raw := strings.TrimSpace(x)
|
||||
if raw == "" {
|
||||
return map[string]any{}
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(raw), &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
// Try to repair invalid backslashes (common in Windows paths output by models)
|
||||
repaired := repairInvalidJSONBackslashes(raw)
|
||||
if repaired != raw {
|
||||
if err := json.Unmarshal([]byte(repaired), &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
}
|
||||
// Try to repair loose JSON in string argument as well
|
||||
repairedLoose := RepairLooseJSON(raw)
|
||||
if repairedLoose != raw {
|
||||
if err := json.Unmarshal([]byte(repairedLoose), &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
}
|
||||
return map[string]any{"_raw": raw}
|
||||
default:
|
||||
b, err := json.Marshal(x)
|
||||
if err != nil {
|
||||
return map[string]any{}
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(b, &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
return map[string]any{}
|
||||
}
|
||||
}
|
||||
|
||||
func repairInvalidJSONBackslashes(s string) string {
|
||||
if !strings.Contains(s, "\\") {
|
||||
return s
|
||||
}
|
||||
var out strings.Builder
|
||||
out.Grow(len(s) + 10)
|
||||
runes := []rune(s)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
if runes[i] == '\\' {
|
||||
if i+1 < len(runes) {
|
||||
next := runes[i+1]
|
||||
switch next {
|
||||
case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune(next)
|
||||
i++
|
||||
continue
|
||||
case 'u':
|
||||
if i+5 < len(runes) {
|
||||
isHex := true
|
||||
for j := 1; j <= 4; j++ {
|
||||
r := runes[i+1+j]
|
||||
if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) {
|
||||
isHex = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if isHex {
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune('u')
|
||||
for j := 1; j <= 4; j++ {
|
||||
out.WriteRune(runes[i+1+j])
|
||||
}
|
||||
i += 5
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Not a valid escape sequence, double it
|
||||
out.WriteString("\\\\")
|
||||
} else {
|
||||
out.WriteRune(runes[i])
|
||||
}
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
var unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`)
|
||||
|
||||
// missingArrayBracketsPattern identifies a sequence of two or more JSON objects separated by commas
|
||||
// that immediately follow a colon, which indicates a missing array bracket `[` `]`.
|
||||
// E.g., "key": {"a": 1}, {"b": 2} -> "key": [{"a": 1}, {"b": 2}]
|
||||
// NOTE: The pattern uses (?:[^{}]|\{[^{}]*\})* to support single-level nested {} objects,
|
||||
// which handles cases like {"content": "x", "input": {"q": "y"}}
|
||||
var missingArrayBracketsPattern = regexp.MustCompile(`(:\s*)(\{(?:[^{}]|\{[^{}]*\})*\}(?:\s*,\s*\{(?:[^{}]|\{[^{}]*\})*\})+)`)
|
||||
|
||||
func RepairLooseJSON(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
// 1. Replace unquoted keys: {key: -> {"key":
|
||||
s = unquotedKeyPattern.ReplaceAllString(s, `$1"$2":`)
|
||||
|
||||
// 2. Heuristic: Fix missing array brackets for list of objects
|
||||
// e.g., : {obj1}, {obj2} -> : [{obj1}, {obj2}]
|
||||
// This specifically addresses DeepSeek's "list hallucination"
|
||||
s = missingArrayBracketsPattern.ReplaceAllString(s, `$1[$2]`)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
185
internal/util/toolcalls_parse_payload.go
Normal file
185
internal/util/toolcalls_parse_payload.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func parseToolCallsPayload(payload string) []ParsedToolCall {
|
||||
var decoded any
|
||||
if err := json.Unmarshal([]byte(payload), &decoded); err != nil {
|
||||
repaired := repairInvalidJSONBackslashesWithPathContext(payload)
|
||||
repaired = RepairLooseJSON(repaired)
|
||||
if err := json.Unmarshal([]byte(repaired), &decoded); err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
switch v := decoded.(type) {
|
||||
case map[string]any:
|
||||
if tc, ok := v["tool_calls"]; ok {
|
||||
return parseToolCallList(tc)
|
||||
}
|
||||
if parsed, ok := parseToolCallItem(v); ok {
|
||||
return []ParsedToolCall{parsed}
|
||||
}
|
||||
case []any:
|
||||
return parseToolCallList(v)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseToolCallList(v any) []ParsedToolCall {
|
||||
items, ok := v.([]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
out := make([]ParsedToolCall, 0, len(items))
|
||||
for _, item := range items {
|
||||
m, ok := item.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if tc, ok := parseToolCallItem(m); ok {
|
||||
out = append(out, tc)
|
||||
}
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func parseToolCallItem(m map[string]any) (ParsedToolCall, bool) {
|
||||
name, _ := m["name"].(string)
|
||||
inputRaw, hasInput := m["input"]
|
||||
|
||||
if fn, ok := m["function"].(map[string]any); ok {
|
||||
if name == "" {
|
||||
name, _ = fn["name"].(string)
|
||||
}
|
||||
if !hasInput {
|
||||
if v, ok := fn["arguments"]; ok {
|
||||
inputRaw = v
|
||||
hasInput = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !hasInput {
|
||||
for _, key := range []string{"arguments", "args", "parameters", "params"} {
|
||||
if v, ok := m[key]; ok {
|
||||
inputRaw = v
|
||||
hasInput = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(name) == "" {
|
||||
return ParsedToolCall{}, false
|
||||
}
|
||||
return ParsedToolCall{
|
||||
Name: strings.TrimSpace(name),
|
||||
Input: parseToolCallInput(inputRaw),
|
||||
}, true
|
||||
}
|
||||
|
||||
func parseToolCallInput(v any) map[string]any {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return map[string]any{}
|
||||
case map[string]any:
|
||||
return x
|
||||
case string:
|
||||
raw := strings.TrimSpace(x)
|
||||
if raw == "" {
|
||||
return map[string]any{}
|
||||
}
|
||||
|
||||
if parsed := decodeJSONObject(raw); parsed != nil {
|
||||
if hasSuspiciousPathControlChars(parsed) {
|
||||
repaired := repairInvalidJSONBackslashesWithPathContext(raw)
|
||||
if repaired != raw {
|
||||
if reparsed := decodeJSONObject(repaired); reparsed != nil {
|
||||
return reparsed
|
||||
}
|
||||
}
|
||||
}
|
||||
return parsed
|
||||
}
|
||||
|
||||
repaired := repairInvalidJSONBackslashesWithPathContext(raw)
|
||||
if repaired != raw {
|
||||
if reparsed := decodeJSONObject(repaired); reparsed != nil {
|
||||
return reparsed
|
||||
}
|
||||
}
|
||||
|
||||
repairedLoose := RepairLooseJSON(raw)
|
||||
if repairedLoose != raw {
|
||||
if reparsed := decodeJSONObject(repairedLoose); reparsed != nil {
|
||||
return reparsed
|
||||
}
|
||||
}
|
||||
return map[string]any{"_raw": raw}
|
||||
default:
|
||||
b, err := json.Marshal(x)
|
||||
if err != nil {
|
||||
return map[string]any{}
|
||||
}
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal(b, &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
return map[string]any{}
|
||||
}
|
||||
}
|
||||
|
||||
func decodeJSONObject(raw string) map[string]any {
|
||||
var parsed map[string]any
|
||||
if err := json.Unmarshal([]byte(raw), &parsed); err == nil && parsed != nil {
|
||||
return parsed
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasSuspiciousPathControlChars(v any) bool {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
for key, value := range x {
|
||||
if isPathLikeKey(key) && hasControlCharsInString(value) {
|
||||
return true
|
||||
}
|
||||
if hasSuspiciousPathControlChars(value) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case []any:
|
||||
for _, item := range x {
|
||||
if hasSuspiciousPathControlChars(item) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isPathLikeKey(key string) bool {
|
||||
lower := strings.ToLower(strings.TrimSpace(key))
|
||||
if lower == "" {
|
||||
return false
|
||||
}
|
||||
for _, candidate := range []string{"path", "file", "filepath", "filename", "cwd", "dir", "directory"} {
|
||||
if lower == candidate || strings.HasSuffix(lower, "_"+candidate) || strings.HasSuffix(lower, candidate+"_path") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasControlCharsInString(v any) bool {
|
||||
s, ok := v.(string)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return strings.ContainsAny(s, "\n\r\t")
|
||||
}
|
||||
276
internal/util/toolcalls_repair.go
Normal file
276
internal/util/toolcalls_repair.go
Normal file
@@ -0,0 +1,276 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`)
|
||||
|
||||
// fallback pattern for shallow objects; scanner-based repair runs first.
|
||||
var missingArrayBracketsPattern = regexp.MustCompile(`(:\s*)(\{(?:[^{}]|\{[^{}]*\})*\}(?:\s*,\s*\{(?:[^{}]|\{[^{}]*\})*\})+)`)
|
||||
|
||||
func repairInvalidJSONBackslashes(s string) string {
|
||||
return repairInvalidJSONBackslashesWithPathContext(s)
|
||||
}
|
||||
|
||||
func repairInvalidJSONBackslashesWithPathContext(s string) string {
|
||||
if !strings.Contains(s, "\\") {
|
||||
return s
|
||||
}
|
||||
var out strings.Builder
|
||||
out.Grow(len(s) + 10)
|
||||
|
||||
runes := []rune(s)
|
||||
pathKeyContext := buildPathKeyStringMask(runes)
|
||||
inString := false
|
||||
escaped := false
|
||||
stringStart := -1
|
||||
|
||||
for i := 0; i < len(runes); i++ {
|
||||
r := runes[i]
|
||||
if r == '"' && !escaped {
|
||||
inString = !inString
|
||||
if inString {
|
||||
stringStart = i
|
||||
} else {
|
||||
stringStart = -1
|
||||
}
|
||||
out.WriteRune(r)
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if r == '\\' && inString {
|
||||
if i+1 < len(runes) {
|
||||
next := runes[i+1]
|
||||
if next == 'u' {
|
||||
if i+5 < len(runes) && isHex4(runes[i+2:i+6]) {
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune('u')
|
||||
for _, hx := range runes[i+2 : i+6] {
|
||||
out.WriteRune(hx)
|
||||
}
|
||||
i += 5
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
} else if shouldKeepEscape(next, pathKeyContext[stringStart]) {
|
||||
out.WriteRune('\\')
|
||||
out.WriteRune(next)
|
||||
i++
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
out.WriteString("\\\\")
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
out.WriteRune(r)
|
||||
escaped = r == '\\' && !escaped
|
||||
if r != '\\' {
|
||||
escaped = false
|
||||
}
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func shouldKeepEscape(next rune, inPathContext bool) bool {
|
||||
switch next {
|
||||
case '"', '\\', '/', 'b', 'f':
|
||||
return true
|
||||
case 'n', 'r', 't':
|
||||
return !inPathContext
|
||||
case 'u':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func buildPathKeyStringMask(runes []rune) map[int]bool {
|
||||
mask := map[int]bool{}
|
||||
inString := false
|
||||
escaped := false
|
||||
stringStart := -1
|
||||
var lastKey string
|
||||
|
||||
for i := 0; i < len(runes); i++ {
|
||||
r := runes[i]
|
||||
if !inString {
|
||||
if r == '"' {
|
||||
inString = true
|
||||
stringStart = i
|
||||
}
|
||||
continue
|
||||
}
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if r == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if r != '"' {
|
||||
continue
|
||||
}
|
||||
|
||||
value := string(runes[stringStart+1 : i])
|
||||
j := i + 1
|
||||
for j < len(runes) && (runes[j] == ' ' || runes[j] == '\n' || runes[j] == '\r' || runes[j] == '\t') {
|
||||
j++
|
||||
}
|
||||
if j < len(runes) && runes[j] == ':' {
|
||||
lastKey = strings.ToLower(strings.TrimSpace(value))
|
||||
} else if isPathLikeKey(lastKey) {
|
||||
mask[stringStart] = true
|
||||
}
|
||||
|
||||
inString = false
|
||||
stringStart = -1
|
||||
}
|
||||
return mask
|
||||
}
|
||||
|
||||
func RepairLooseJSON(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
s = unquotedKeyPattern.ReplaceAllString(s, `$1"$2":`)
|
||||
s = repairMissingArrayBracketsByScanner(s)
|
||||
return missingArrayBracketsPattern.ReplaceAllString(s, `$1[$2]`)
|
||||
}
|
||||
|
||||
func repairMissingArrayBracketsByScanner(s string) string {
|
||||
const maxScanLen = 200_000
|
||||
if len(s) == 0 || len(s) > maxScanLen {
|
||||
return s
|
||||
}
|
||||
|
||||
var out strings.Builder
|
||||
out.Grow(len(s) + 8)
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
if s[i] != ':' {
|
||||
out.WriteByte(s[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
out.WriteByte(':')
|
||||
i++
|
||||
for i < len(s) && isJSONWhitespace(s[i]) {
|
||||
out.WriteByte(s[i])
|
||||
i++
|
||||
}
|
||||
if i >= len(s) || s[i] != '{' {
|
||||
continue
|
||||
}
|
||||
|
||||
start := i
|
||||
end := scanJSONObjectEnd(s, start)
|
||||
if end < 0 {
|
||||
out.WriteString(s[start:])
|
||||
break
|
||||
}
|
||||
cursor := end
|
||||
next := skipJSONWhitespace(s, cursor)
|
||||
if next >= len(s) || s[next] != ',' {
|
||||
out.WriteString(s[start:end])
|
||||
i = end
|
||||
continue
|
||||
}
|
||||
|
||||
seqEnd := end
|
||||
hasMultiple := false
|
||||
for {
|
||||
comma := skipJSONWhitespace(s, seqEnd)
|
||||
if comma >= len(s) || s[comma] != ',' {
|
||||
break
|
||||
}
|
||||
objStart := skipJSONWhitespace(s, comma+1)
|
||||
if objStart >= len(s) || s[objStart] != '{' {
|
||||
break
|
||||
}
|
||||
objEnd := scanJSONObjectEnd(s, objStart)
|
||||
if objEnd < 0 {
|
||||
break
|
||||
}
|
||||
hasMultiple = true
|
||||
seqEnd = objEnd
|
||||
}
|
||||
if !hasMultiple {
|
||||
out.WriteString(s[start:end])
|
||||
i = end
|
||||
continue
|
||||
}
|
||||
|
||||
out.WriteByte('[')
|
||||
out.WriteString(s[start:seqEnd])
|
||||
out.WriteByte(']')
|
||||
i = seqEnd
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func scanJSONObjectEnd(s string, start int) int {
|
||||
depth := 0
|
||||
inString := false
|
||||
escaped := false
|
||||
for i := start; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if inString {
|
||||
if escaped {
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
if c == '\\' {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
if c == '"' {
|
||||
inString = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
if c == '"' {
|
||||
inString = true
|
||||
continue
|
||||
}
|
||||
if c == '{' {
|
||||
depth++
|
||||
continue
|
||||
}
|
||||
if c == '}' {
|
||||
depth--
|
||||
if depth == 0 {
|
||||
return i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func skipJSONWhitespace(s string, i int) int {
|
||||
for i < len(s) && isJSONWhitespace(s[i]) {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func isJSONWhitespace(b byte) bool {
|
||||
return b == ' ' || b == '\n' || b == '\r' || b == '\t'
|
||||
}
|
||||
|
||||
func isHex4(seq []rune) bool {
|
||||
if len(seq) != 4 {
|
||||
return false
|
||||
}
|
||||
for _, r := range seq {
|
||||
if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -288,7 +288,7 @@ func TestRepairInvalidJSONBackslashes(t *testing.T) {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\name"}`},
|
||||
{`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\\name"}`},
|
||||
{`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`},
|
||||
{`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`},
|
||||
{`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`},
|
||||
@@ -419,9 +419,29 @@ func TestParseToolCallsWithMixedWindowsPaths(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsWithPathEscapesAndTextNewlines(t *testing.T) {
|
||||
text := `{"name":"write_file","input":"{\"content\":\"line1\\nline2\",\"path\":\"D:\\tmp\\a.txt\"}"}`
|
||||
availableTools := []string{"write_file"}
|
||||
parsed := ParseToolCalls(text, availableTools)
|
||||
if len(parsed) != 1 {
|
||||
t.Fatalf("expected 1 parsed tool call, got %d", len(parsed))
|
||||
}
|
||||
|
||||
content, _ := parsed[0].Input["content"].(string)
|
||||
path, _ := parsed[0].Input["path"].(string)
|
||||
if !strings.Contains(content, "line1\nline2") {
|
||||
t.Fatalf("expected content to preserve newline semantics, got %q", content)
|
||||
}
|
||||
if strings.ContainsAny(path, "\n\r\t") {
|
||||
t.Fatalf("expected path to avoid control chars, got %q", path)
|
||||
}
|
||||
if !strings.Contains(path, `D:\tmp\a.txt`) {
|
||||
t.Fatalf("expected path with literal backslashes, got %q", path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepairLooseJSONWithNestedObjects(t *testing.T) {
|
||||
// 测试嵌套对象的修复:DeepSeek 幻觉输出,每个元素内部包含嵌套 {}
|
||||
// 注意:正则只支持单层嵌套,不支持更深层次的嵌套
|
||||
// 覆盖深层嵌套对象的方括号修复,避免 regex 单层能力带来的漂移。
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
@@ -487,6 +507,11 @@ func TestRepairLooseJSONWithNestedObjects(t *testing.T) {
|
||||
input: `"tasks": {"id":1}, {"id":2}, {"id":3}, {"id":4}, {"id":5}`,
|
||||
expected: `"tasks": [{"id":1}, {"id":2}, {"id":3}, {"id":4}, {"id":5}]`,
|
||||
},
|
||||
{
|
||||
name: "深层嵌套对象",
|
||||
input: `"todos": {"meta":{"a":{"b":1}},"content":"x"}, {"meta":{"a":{"b":2}},"content":"y"}`,
|
||||
expected: `"todos": [{"meta":{"a":{"b":1}},"content":"x"}, {"meta":{"a":{"b":2}},"content":"y"}]`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
@@ -259,28 +259,28 @@ test('sieve emits final tool_calls for split arguments payload without increment
|
||||
assert.deepEqual(finalCalls[0].input, { path: 'README.MD', mode: 'head' });
|
||||
});
|
||||
|
||||
test('sieve keeps tool json as text when leading prose exists (strict mode)', () => {
|
||||
test('sieve intercepts tool json even when leading prose exists (strict mode)', () => {
|
||||
const events = runSieve(
|
||||
['我将调用工具。', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0));
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, false);
|
||||
assert.equal(hasTool, true);
|
||||
assert.equal(leakedText.includes('我将调用工具。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve keeps same-chunk trailing prose payload as text in strict mode', () => {
|
||||
test('sieve intercepts same-chunk payload once tool json is complete in strict mode', () => {
|
||||
const events = runSieve(
|
||||
['{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}然后继续解释。'],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => (evt.type === 'tool_calls' && evt.calls?.length > 0) || (evt.type === 'tool_call_deltas' && evt.deltas?.length > 0));
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, false);
|
||||
assert.equal(leakedText.includes('然后继续解释。'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
assert.equal(hasTool, true);
|
||||
assert.equal(leakedText.includes('然后继续解释。'), false);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
|
||||
|
||||
Reference in New Issue
Block a user