From 4d549b710246b6f8d8d82e429fc08f2bece03b55 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Fri, 20 Mar 2026 01:38:11 +0800 Subject: [PATCH] Revert "Merge branch 'dev' into codex/fix-issues-found-in-review" This reverts commit 33b0d1d144257fbf4bf341029d1f9d2d4a766f15, reversing changes made to efb484ba4fa725789d7d3634156de0843253617f. --- README.MD | 2 - TESTING.md | 7 - internal/util/toolcalls_parse.go | 69 ++++-- internal/util/toolcalls_parse_payload.go | 185 --------------- internal/util/toolcalls_repair.go | 276 ----------------------- internal/util/toolcalls_test.go | 31 +-- 6 files changed, 52 insertions(+), 518 deletions(-) delete mode 100644 internal/util/toolcalls_parse_payload.go delete mode 100644 internal/util/toolcalls_repair.go diff --git a/README.MD b/README.MD index 636e693..6b9383a 100644 --- a/README.MD +++ b/README.MD @@ -363,8 +363,6 @@ cp opencode.json.example opencode.json 3. 未在 `tools` 声明中的工具名会被严格拒绝,不会下发为有效 tool call 4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed` 5. 仅在通过策略校验后才会发出有效工具调用事件,避免错误工具名进入客户端执行链 -6. strict 模式下采用“可解析即拦截”:即使 tool JSON 前后混有 prose,只要结构可提取仍会拦截 tool_calls,剩余文本继续透传 -7. 当参数字符串无法可靠修复为对象时,会保留 `{"_raw":"..."}` 回退,避免 silent corruption ## 本地开发抓包工具 diff --git a/TESTING.md b/TESTING.md index bf821fe..8d1a309 100644 --- a/TESTING.md +++ b/TESTING.md @@ -200,13 +200,6 @@ go test -v -run 'TestParseToolCalls|TestRepair' ./internal/util/ # 2. 查看测试输出中的详细调试信息 go test -v -run TestParseToolCallsWithDeepSeekHallucination ./internal/util/ 2>&1 -# 2.1 strict 模式(Go/JS)语义对齐检查:混合 prose + tool JSON 仍可拦截 -node --test tests/node/stream-tool-sieve.test.js - -# 2.2 Windows 路径与文本换行语义回归 -go test -v -run TestParseToolCallsWithInvalidBackslashes ./internal/util/ -go test -v -run TestParseToolCallsWithPathEscapesAndTextNewlines ./internal/util/ - # 3. 检查具体测试用例的修复效果 # 测试用例位于 internal/util/toolcalls_test.go,包含: # - TestParseToolCallsWithDeepSeekHallucination: DeepSeek 典型幻觉输出 diff --git a/internal/util/toolcalls_parse.go b/internal/util/toolcalls_parse.go index 82bb079..726009c 100644 --- a/internal/util/toolcalls_parse.go +++ b/internal/util/toolcalls_parse.go @@ -83,26 +83,31 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string) return result } result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed) - - parsed := parseToolCallsPayload(trimmed) - if len(parsed) == 0 { - parsed = parseXMLToolCalls(trimmed) + candidates := []string{trimmed} + for _, candidate := range candidates { + candidate = strings.TrimSpace(candidate) + if candidate == "" { + continue + } + parsed := parseToolCallsPayload(candidate) + if len(parsed) == 0 { + parsed = parseXMLToolCalls(candidate) + } + if len(parsed) == 0 { + parsed = parseMarkupToolCalls(candidate) + } + if len(parsed) == 0 { + parsed = parseTextKVToolCalls(candidate) + } + if len(parsed) > 0 { + result.SawToolCallSyntax = true + calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames) + result.Calls = calls + result.RejectedToolNames = rejectedNames + result.RejectedByPolicy = len(rejectedNames) > 0 && len(calls) == 0 + return result + } } - if len(parsed) == 0 { - parsed = parseMarkupToolCalls(trimmed) - } - if len(parsed) == 0 { - parsed = parseTextKVToolCalls(trimmed) - } - if len(parsed) == 0 { - return result - } - - result.SawToolCallSyntax = true - calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames) - result.Calls = calls - result.RejectedToolNames = rejectedNames - result.RejectedByPolicy = len(rejectedNames) > 0 && len(calls) == 0 return result } @@ -135,7 +140,6 @@ func filterToolCallsDetailed(parsed []ParsedToolCall, availableToolNames []strin } return nil, rejected } - out := make([]ParsedToolCall, 0, len(parsed)) rejectedSet := map[string]struct{}{} rejected := make([]string, 0) @@ -164,6 +168,31 @@ func resolveAllowedToolName(name string, allowed map[string]struct{}, allowedCan return resolveAllowedToolNameWithLooseMatch(name, allowed, allowedCanonical) } +func parseToolCallsPayload(payload string) []ParsedToolCall { + var decoded any + if err := json.Unmarshal([]byte(payload), &decoded); err != nil { + // Try to repair backslashes first! Because LLMs often mix these two problems. + repaired := repairInvalidJSONBackslashes(payload) + // Try loose repair on top of that + repaired = RepairLooseJSON(repaired) + if err := json.Unmarshal([]byte(repaired), &decoded); err != nil { + return nil + } + } + switch v := decoded.(type) { + case map[string]any: + if tc, ok := v["tool_calls"]; ok { + return parseToolCallList(tc) + } + if parsed, ok := parseToolCallItem(v); ok { + return []ParsedToolCall{parsed} + } + case []any: + return parseToolCallList(v) + } + return nil +} + func looksLikeToolCallSyntax(text string) bool { lower := strings.ToLower(text) return strings.Contains(lower, "tool_calls") || diff --git a/internal/util/toolcalls_parse_payload.go b/internal/util/toolcalls_parse_payload.go deleted file mode 100644 index 5534e10..0000000 --- a/internal/util/toolcalls_parse_payload.go +++ /dev/null @@ -1,185 +0,0 @@ -package util - -import ( - "encoding/json" - "strings" -) - -func parseToolCallsPayload(payload string) []ParsedToolCall { - var decoded any - if err := json.Unmarshal([]byte(payload), &decoded); err != nil { - repaired := repairInvalidJSONBackslashesWithPathContext(payload) - repaired = RepairLooseJSON(repaired) - if err := json.Unmarshal([]byte(repaired), &decoded); err != nil { - return nil - } - } - - switch v := decoded.(type) { - case map[string]any: - if tc, ok := v["tool_calls"]; ok { - return parseToolCallList(tc) - } - if parsed, ok := parseToolCallItem(v); ok { - return []ParsedToolCall{parsed} - } - case []any: - return parseToolCallList(v) - } - return nil -} - -func parseToolCallList(v any) []ParsedToolCall { - items, ok := v.([]any) - if !ok { - return nil - } - out := make([]ParsedToolCall, 0, len(items)) - for _, item := range items { - m, ok := item.(map[string]any) - if !ok { - continue - } - if tc, ok := parseToolCallItem(m); ok { - out = append(out, tc) - } - } - if len(out) == 0 { - return nil - } - return out -} - -func parseToolCallItem(m map[string]any) (ParsedToolCall, bool) { - name, _ := m["name"].(string) - inputRaw, hasInput := m["input"] - - if fn, ok := m["function"].(map[string]any); ok { - if name == "" { - name, _ = fn["name"].(string) - } - if !hasInput { - if v, ok := fn["arguments"]; ok { - inputRaw = v - hasInput = true - } - } - } - if !hasInput { - for _, key := range []string{"arguments", "args", "parameters", "params"} { - if v, ok := m[key]; ok { - inputRaw = v - hasInput = true - break - } - } - } - if strings.TrimSpace(name) == "" { - return ParsedToolCall{}, false - } - return ParsedToolCall{ - Name: strings.TrimSpace(name), - Input: parseToolCallInput(inputRaw), - }, true -} - -func parseToolCallInput(v any) map[string]any { - switch x := v.(type) { - case nil: - return map[string]any{} - case map[string]any: - return x - case string: - raw := strings.TrimSpace(x) - if raw == "" { - return map[string]any{} - } - - if parsed := decodeJSONObject(raw); parsed != nil { - if hasSuspiciousPathControlChars(parsed) { - repaired := repairInvalidJSONBackslashesWithPathContext(raw) - if repaired != raw { - if reparsed := decodeJSONObject(repaired); reparsed != nil { - return reparsed - } - } - } - return parsed - } - - repaired := repairInvalidJSONBackslashesWithPathContext(raw) - if repaired != raw { - if reparsed := decodeJSONObject(repaired); reparsed != nil { - return reparsed - } - } - - repairedLoose := RepairLooseJSON(raw) - if repairedLoose != raw { - if reparsed := decodeJSONObject(repairedLoose); reparsed != nil { - return reparsed - } - } - return map[string]any{"_raw": raw} - default: - b, err := json.Marshal(x) - if err != nil { - return map[string]any{} - } - var parsed map[string]any - if err := json.Unmarshal(b, &parsed); err == nil && parsed != nil { - return parsed - } - return map[string]any{} - } -} - -func decodeJSONObject(raw string) map[string]any { - var parsed map[string]any - if err := json.Unmarshal([]byte(raw), &parsed); err == nil && parsed != nil { - return parsed - } - return nil -} - -func hasSuspiciousPathControlChars(v any) bool { - switch x := v.(type) { - case map[string]any: - for key, value := range x { - if isPathLikeKey(key) && hasControlCharsInString(value) { - return true - } - if hasSuspiciousPathControlChars(value) { - return true - } - } - case []any: - for _, item := range x { - if hasSuspiciousPathControlChars(item) { - return true - } - } - } - return false -} - -func isPathLikeKey(key string) bool { - lower := strings.ToLower(strings.TrimSpace(key)) - if lower == "" { - return false - } - for _, candidate := range []string{"path", "file", "filepath", "filename", "cwd", "dir", "directory"} { - if lower == candidate || strings.HasSuffix(lower, "_"+candidate) || strings.HasSuffix(lower, candidate+"_path") { - return true - } - } - return false -} - -func hasControlCharsInString(v any) bool { - s, ok := v.(string) - if !ok { - return false - } - return strings.ContainsAny(s, "\n\r\t") -} diff --git a/internal/util/toolcalls_repair.go b/internal/util/toolcalls_repair.go deleted file mode 100644 index 185cb45..0000000 --- a/internal/util/toolcalls_repair.go +++ /dev/null @@ -1,276 +0,0 @@ -package util - -import ( - "regexp" - "strings" -) - -var unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`) - -// fallback pattern for shallow objects; scanner-based repair runs first. -var missingArrayBracketsPattern = regexp.MustCompile(`(:\s*)(\{(?:[^{}]|\{[^{}]*\})*\}(?:\s*,\s*\{(?:[^{}]|\{[^{}]*\})*\})+)`) - -func repairInvalidJSONBackslashes(s string) string { - return repairInvalidJSONBackslashesWithPathContext(s) -} - -func repairInvalidJSONBackslashesWithPathContext(s string) string { - if !strings.Contains(s, "\\") { - return s - } - var out strings.Builder - out.Grow(len(s) + 10) - - runes := []rune(s) - pathKeyContext := buildPathKeyStringMask(runes) - inString := false - escaped := false - stringStart := -1 - - for i := 0; i < len(runes); i++ { - r := runes[i] - if r == '"' && !escaped { - inString = !inString - if inString { - stringStart = i - } else { - stringStart = -1 - } - out.WriteRune(r) - escaped = false - continue - } - if r == '\\' && inString { - if i+1 < len(runes) { - next := runes[i+1] - if next == 'u' { - if i+5 < len(runes) && isHex4(runes[i+2:i+6]) { - out.WriteRune('\\') - out.WriteRune('u') - for _, hx := range runes[i+2 : i+6] { - out.WriteRune(hx) - } - i += 5 - escaped = false - continue - } - } else if shouldKeepEscape(next, pathKeyContext[stringStart]) { - out.WriteRune('\\') - out.WriteRune(next) - i++ - escaped = false - continue - } - } - out.WriteString("\\\\") - escaped = false - continue - } - out.WriteRune(r) - escaped = r == '\\' && !escaped - if r != '\\' { - escaped = false - } - } - return out.String() -} - -func shouldKeepEscape(next rune, inPathContext bool) bool { - switch next { - case '"', '\\', '/', 'b', 'f': - return true - case 'n', 'r', 't': - return !inPathContext - case 'u': - return true - default: - return false - } -} - -func buildPathKeyStringMask(runes []rune) map[int]bool { - mask := map[int]bool{} - inString := false - escaped := false - stringStart := -1 - var lastKey string - - for i := 0; i < len(runes); i++ { - r := runes[i] - if !inString { - if r == '"' { - inString = true - stringStart = i - } - continue - } - if escaped { - escaped = false - continue - } - if r == '\\' { - escaped = true - continue - } - if r != '"' { - continue - } - - value := string(runes[stringStart+1 : i]) - j := i + 1 - for j < len(runes) && (runes[j] == ' ' || runes[j] == '\n' || runes[j] == '\r' || runes[j] == '\t') { - j++ - } - if j < len(runes) && runes[j] == ':' { - lastKey = strings.ToLower(strings.TrimSpace(value)) - } else if isPathLikeKey(lastKey) { - mask[stringStart] = true - } - - inString = false - stringStart = -1 - } - return mask -} - -func RepairLooseJSON(s string) string { - s = strings.TrimSpace(s) - if s == "" { - return s - } - s = unquotedKeyPattern.ReplaceAllString(s, `$1"$2":`) - s = repairMissingArrayBracketsByScanner(s) - return missingArrayBracketsPattern.ReplaceAllString(s, `$1[$2]`) -} - -func repairMissingArrayBracketsByScanner(s string) string { - const maxScanLen = 200_000 - if len(s) == 0 || len(s) > maxScanLen { - return s - } - - var out strings.Builder - out.Grow(len(s) + 8) - i := 0 - for i < len(s) { - if s[i] != ':' { - out.WriteByte(s[i]) - i++ - continue - } - out.WriteByte(':') - i++ - for i < len(s) && isJSONWhitespace(s[i]) { - out.WriteByte(s[i]) - i++ - } - if i >= len(s) || s[i] != '{' { - continue - } - - start := i - end := scanJSONObjectEnd(s, start) - if end < 0 { - out.WriteString(s[start:]) - break - } - cursor := end - next := skipJSONWhitespace(s, cursor) - if next >= len(s) || s[next] != ',' { - out.WriteString(s[start:end]) - i = end - continue - } - - seqEnd := end - hasMultiple := false - for { - comma := skipJSONWhitespace(s, seqEnd) - if comma >= len(s) || s[comma] != ',' { - break - } - objStart := skipJSONWhitespace(s, comma+1) - if objStart >= len(s) || s[objStart] != '{' { - break - } - objEnd := scanJSONObjectEnd(s, objStart) - if objEnd < 0 { - break - } - hasMultiple = true - seqEnd = objEnd - } - if !hasMultiple { - out.WriteString(s[start:end]) - i = end - continue - } - - out.WriteByte('[') - out.WriteString(s[start:seqEnd]) - out.WriteByte(']') - i = seqEnd - } - return out.String() -} - -func scanJSONObjectEnd(s string, start int) int { - depth := 0 - inString := false - escaped := false - for i := start; i < len(s); i++ { - c := s[i] - if inString { - if escaped { - escaped = false - continue - } - if c == '\\' { - escaped = true - continue - } - if c == '"' { - inString = false - } - continue - } - if c == '"' { - inString = true - continue - } - if c == '{' { - depth++ - continue - } - if c == '}' { - depth-- - if depth == 0 { - return i + 1 - } - } - } - return -1 -} - -func skipJSONWhitespace(s string, i int) int { - for i < len(s) && isJSONWhitespace(s[i]) { - i++ - } - return i -} - -func isJSONWhitespace(b byte) bool { - return b == ' ' || b == '\n' || b == '\r' || b == '\t' -} - -func isHex4(seq []rune) bool { - if len(seq) != 4 { - return false - } - for _, r := range seq { - if !((r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')) { - return false - } - } - return true -} diff --git a/internal/util/toolcalls_test.go b/internal/util/toolcalls_test.go index 10458df..e3fae5d 100644 --- a/internal/util/toolcalls_test.go +++ b/internal/util/toolcalls_test.go @@ -288,7 +288,7 @@ func TestRepairInvalidJSONBackslashes(t *testing.T) { input string expected string }{ - {`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\\name"}`}, + {`{"path": "C:\Users\name"}`, `{"path": "C:\\Users\name"}`}, {`{"cmd": "cd D:\git_codes"}`, `{"cmd": "cd D:\\git_codes"}`}, {`{"text": "line1\nline2"}`, `{"text": "line1\nline2"}`}, {`{"path": "D:\\back\\slash"}`, `{"path": "D:\\back\\slash"}`}, @@ -419,29 +419,9 @@ func TestParseToolCallsWithMixedWindowsPaths(t *testing.T) { } } -func TestParseToolCallsWithPathEscapesAndTextNewlines(t *testing.T) { - text := `{"name":"write_file","input":"{\"content\":\"line1\\nline2\",\"path\":\"D:\\tmp\\a.txt\"}"}` - availableTools := []string{"write_file"} - parsed := ParseToolCalls(text, availableTools) - if len(parsed) != 1 { - t.Fatalf("expected 1 parsed tool call, got %d", len(parsed)) - } - - content, _ := parsed[0].Input["content"].(string) - path, _ := parsed[0].Input["path"].(string) - if !strings.Contains(content, "line1\nline2") { - t.Fatalf("expected content to preserve newline semantics, got %q", content) - } - if strings.ContainsAny(path, "\n\r\t") { - t.Fatalf("expected path to avoid control chars, got %q", path) - } - if !strings.Contains(path, `D:\tmp\a.txt`) { - t.Fatalf("expected path with literal backslashes, got %q", path) - } -} - func TestRepairLooseJSONWithNestedObjects(t *testing.T) { - // 覆盖深层嵌套对象的方括号修复,避免 regex 单层能力带来的漂移。 + // 测试嵌套对象的修复:DeepSeek 幻觉输出,每个元素内部包含嵌套 {} + // 注意:正则只支持单层嵌套,不支持更深层次的嵌套 tests := []struct { name string input string @@ -507,11 +487,6 @@ func TestRepairLooseJSONWithNestedObjects(t *testing.T) { input: `"tasks": {"id":1}, {"id":2}, {"id":3}, {"id":4}, {"id":5}`, expected: `"tasks": [{"id":1}, {"id":2}, {"id":3}, {"id":4}, {"id":5}]`, }, - { - name: "深层嵌套对象", - input: `"todos": {"meta":{"a":{"b":1}},"content":"x"}, {"meta":{"a":{"b":2}},"content":"y"}`, - expected: `"todos": [{"meta":{"a":{"b":1}},"content":"x"}, {"meta":{"a":{"b":2}},"content":"y"}]`, - }, } for _, tt := range tests {