From d27e700c4fa762dbb6daba248880717784656ca9 Mon Sep 17 00:00:00 2001
From: valkryhx <283306449@qq..com>
Date: Fri, 6 Mar 2026 23:22:11 +0800
Subject: [PATCH 1/2] =?UTF-8?q?update=20openai=20function=20calling=20?=
 =?UTF-8?q?=E6=88=90=E5=8A=9F=E7=8E=87=E9=AB=98=20=E6=98=AF=E5=9B=A0?=
 =?UTF-8?q?=E4=B8=BAchat=E5=86=85=E5=AE=B9=E5=92=8Ctool=E5=86=85=E5=AE=B9?=
 =?UTF-8?q?=E5=88=86=E5=BC=80=E4=BF=9D=E5=AD=98=EF=BC=8C=E8=80=8Cds?=
 =?UTF-8?q?=E5=88=99=E6=B7=B7=E5=90=88=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 misc/deepseek_functioncalling_bug/report.md | 101 ++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 misc/deepseek_functioncalling_bug/report.md

diff --git a/misc/deepseek_functioncalling_bug/report.md b/misc/deepseek_functioncalling_bug/report.md
new file mode 100644
index 0000000..f723578
--- /dev/null
+++ b/misc/deepseek_functioncalling_bug/report.md
@@ -0,0 +1,101 @@
+# DeepSeek Function Calling 缺陷分析与 ds2api 的增强修复策略
+
+> **相关 PR**: #74 (代码核心实现) 与 #75 (Merge to dev)
+> **问题背景**: 解决因包括 DeepSeek 在内的部分模型在函数调用（Function Calling/Tool Call）表现不够“规范”，从而导致工具调用失败的问题。
+
+## 一、底层架构对比：为什么会产生 Function Calling 缺陷？
+
+在探讨缺陷前，我们需要理解两种 Function Calling 的底层结构差异：
+
+### 1. OpenAI 的原生结构化返回 (API 级分离)
+在 OpenAI 的规范中，**聊天文字与工具调用是在底层的 JSON 结构中被硬性拆分的**：
+* 聊天废话存放在 `response.choices[0].message.content` 里。
+* 工具请求存放在单独的数组 `response.choices[0].message.tool_calls` 里。
+
+**优势：** 这种设计对客户端极其友好。客户端只需判断 `tool_calls` 是否为空，就能决定是执行代码还是渲染文字。它支持同时并发多个工具请求，且底层的生成殷勤被严格训练和约束，极少抛出语法错误的 JSON。
+
+### 2. DeepSeek 等模型的“单文本流”机制
+相比之下，部分未经深度专门微调的模型（或者在特定的通信适配层中），它们依然倾向于把一切内容打包成一个纯文本流吐出。这就是为什么它们的输出往往不仅包含了本该属于 `tool_calls` 结构里的 JSON，还会像个“老实人”一样夹杂了属于 `content` 里的散文。
+
+---
+
+## 二、DeepSeek 在 Function Calling 上的特定缺陷表现
+
+相比于 OpenAI 严格遵循 API 约定的原生结构，DeepSeek 等开源/国产推理模型在工具调用时，经常会暴露出以下三种典型的“不守规矩”的输出行为：
+
+### 1. 混合输出：散文文本与工具 JSON 混杂 (Mixed Prose Streams)
+当应用要求模型直接返回工具请求时，DeepSeek 有时候会**“忍不住想和用户搭话”**。
+它常常前置一段解释性废话，中间插入工具调用的 JSON 参数，并在末尾再补上一句总结：
+```text
+好的，我这就帮你读取 README.md 的内容：
+{"tool_calls":[{"name":"read_file","input":{"path":"README.md"}}]}
+请稍等片刻，我马上把它读出来。
+```
+**旧版系统痛点：**
+原有的代码存在**严格模式（Strict Mode）**校验：
+```go
+// 如果解析到的 JSON 块前后存在任何非空字符串，就放弃当作工具调用！
+if strings.TrimSpace(state.recentTextTail) != "" || strings.TrimSpace(prefixPart) != "" ... {
+    return captured, nil, "", true
+}
+```
+这直接导致上述结构被网关认定是一段“普通聊天”，直接原封不动地返回给用户，这直接干挂了后续的工具自动执行流程。
+
+### 2. 工具名格式幻觉：擅自修改或前缀化工具名称
+由于 DeepSeek 的预训练数据中有大量的代码和不同的平台结构，它在回复工具名称时，常常无法忠实于 System Prompt 中提供的纯命名（也就是 `name: "read_file"`），而是加上前缀或者拼写变形，例如：
+* `{"name": "mcp.search_web"}` （自带命名空间）
+* `{"name": "tools.read_file"}`
+* `{"name": "search-web"}` （下划线变成了中划线）
+
+**旧版系统痛点：**
+旧版系统对于工具名的匹配几乎只有“绝对相等”的字典级比对，只要差了一个字符或加了前缀，就会由于找不到合法工具而直接失败。
+
+### 3. Role 角色的非标准返回
+在部分工具通信流的响应中，返回的内容其所属的 `role` 没有被标准化处理，可能携带意料之外的属性，或是与下游严格比对出现冲突。
+
+---
+
+## 二、PR #74 的代码增强修复方案
+
+为了解决大模型这种自身的不规范行为，PR #74 在系统的中间层网关联入了一个**极其包容的容错引擎**。它并不强制要求模型“改过自新”，而是主动做了以下三块增强：
+
+### 1. 从流中分离混合内容（废除 Strict Mode）
+修改了 `internal/adapter/openai/tool_sieve_core.go`。
+取消了前后包裹文本的拦截逻辑。当系统扫描到流式结构中有完整的 `{"tool_calls":...}` 时，它会将废话和 JSON 分发到不同的事件流中：
+```go
+if prefix != "" {
+    // 将前面的“好的，帮你读文件”剥离出来作为常规文本输出
+    state.noteText(prefix)
+    events = append(events, toolStreamEvent{Content: prefix})
+}
+// 捕获并拦截中间的工具请求，进行背后执行
+state.pendingToolCalls = calls
+```
+**效果：** 用户的屏幕上只能看到正常的文字交流，而后端的工具也会立刻挂载。
+
+### 2. 多级宽容匹配引擎 (Resolve Allowed Tool Name)
+在 `internal/util/toolcalls_parse.go` 中，新增了一个由严到松降级匹配的强大漏斗策略函数 `resolveAllowedToolName`：
+
+1. **绝对匹配**：和以前一样，`read_file` == `read_file`。
+2. **忽略大小写**：`Read_File` 算作合法。
+3. **命名空间抹除**：通过寻找最后一个 `.` 来剥离前缀，强制将 `mcp.search_web` 还原出真实的 `search_web`。
+4. **终极正则清洗**：
+   引入 `var toolNameLoosePattern = regexp.MustCompile(`[^a-z0-9]+`)`。
+   这个正则剥离了字符串里所有的符号、空格、格式符。
+   将传入的 `read-file` 洗除符号成为 `readfile`，并去和系统中所有合法工具同样清洗后的版本进行比较。只要核心字母一致，即算作匹配成功。
+
+### 3. Role 归一化 (Normalize OpenAIRoleForPrompt)
+在 `internal/adapter/openai/responses_input_items.go` 等处，引入了特定的 `normalizeOpenAIRoleForPrompt(role)` 清洗，保证输入和传递给上游的 Role 枚举始终受控，消除了因为意外的身份字段传参崩溃。
+
+---
+
+## 报告总结与 tool_sieve 的本质作用
+
+PR #74 / #75 并没有从模型本身开刀，而是基于**网关应足够健壮**的设计哲学。
+
+**其实整个增强实现，本质上实现了一个名为 `tool_sieve` (工具筛子) 的中间层网关。**
+面对 DeepSeek 这种吐出一团混合了聊天文字与 JSON 面团的“不标准”数据流，`tool_sieve` 就像一个勤劳的高精度筛子，不仅人工揉开了面团：
+1. 它把散文分拣出来，塞回标准结构的 `content` 字段去展示；
+2. 剥离并清洗出有瑕疵的 JSON 块，按照 OpenAI 的标准格式小心翼翼地放进 `tool_calls` 结构里去等待执行。
+
+这意味着，即便 AI 被配置了奇怪的回复设定、加粗了强调语言，甚至是犯了标点符号拼写小失误，**只要它输出了可以拼凑成工具指令的 JSON 核心单元，整个中继层就能将其挽救，并把正确的工具结果呈现给模型和用户**。 这不仅修复了缺陷，更极大地增强了工具网关的通用性和鲁棒性。

From 0f1985af4a3b38609fdd730705ba82aa45017dfa Mon Sep 17 00:00:00 2001
From: huangxun <huangxun@cmdi.chinamobile.com>
Date: Mon, 9 Mar 2026 15:00:16 +0800
Subject: [PATCH 2/2] =?UTF-8?q?feat(util):=20=E5=A2=9E=E5=8A=A0=E5=AF=B9?=
 =?UTF-8?q?=E6=B7=B7=E6=9D=82=E6=96=87=E6=9C=AC=E4=B8=AD=20Tool=20Call=20?=
 =?UTF-8?q?=E7=9A=84=20fallback=20=E8=A7=A3=E6=9E=90=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 引入 parseTextKVToolCalls 解析器以处理混杂文本或带历史记录套壳（如 [TOOL_CALL_HISTORY]）输出的函数调用提取。
- 将其作为 JSON 和 XML 的 fallback 解析手段集成到主流程。
- 添加单元测试用例且更新相关语义说明文档。
---
 docs/toolcall-semantics.md             |  3 +-
 internal/util/toolcalls_parse.go       | 14 ++++++-
 internal/util/toolcalls_textkv.go      | 55 ++++++++++++++++++++++++++
 internal/util/toolcalls_textkv_test.go | 52 ++++++++++++++++++++++++
 4 files changed, 121 insertions(+), 3 deletions(-)
 create mode 100644 internal/util/toolcalls_textkv.go
 create mode 100644 internal/util/toolcalls_textkv_test.go

diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md
index 50165a9..b386762 100644
--- a/docs/toolcall-semantics.md
+++ b/docs/toolcall-semantics.md
@@ -19,7 +19,8 @@ This document defines the cross-runtime contract for `ParseToolCallsDetailed` /
    - first `{` to last `}` object slice.
 3. Parse each candidate in order:
    - JSON payload parser (`tool_calls`, list, single call object),
-   - markup parser (`<tool_call>`, `<function_call>`, `<invoke>`; supports attributes + nested fields).
+   - XML/Markup parser (`<tool_call>`, `<function_call>`, `<invoke>`; supports attributes + nested fields),
+   - Text KV fallback parser (`function.name: <name>` ... `function.arguments: {json}`).
 4. Stop at first candidate that yields at least one call.
 
 ## Name normalization policy
diff --git a/internal/util/toolcalls_parse.go b/internal/util/toolcalls_parse.go
index 53eac8e..d08ffe7 100644
--- a/internal/util/toolcalls_parse.go
+++ b/internal/util/toolcalls_parse.go
@@ -45,6 +45,9 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa
 		if len(tc) == 0 {
 			tc = parseMarkupToolCalls(candidate)
 		}
+		if len(tc) == 0 {
+			tc = parseTextKVToolCalls(candidate)
+		}
 		if len(tc) > 0 {
 			parsed = tc
 			result.SawToolCallSyntax = true
@@ -54,7 +57,10 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa
 	if len(parsed) == 0 {
 		parsed = parseXMLToolCalls(text)
 		if len(parsed) == 0 {
-			return result
+			parsed = parseTextKVToolCalls(text)
+			if len(parsed) == 0 {
+				return result
+			}
 		}
 		result.SawToolCallSyntax = true
 	}
@@ -93,6 +99,9 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
 		if len(parsed) == 0 {
 			parsed = parseMarkupToolCalls(candidate)
 		}
+		if len(parsed) == 0 {
+			parsed = parseTextKVToolCalls(candidate)
+		}
 		if len(parsed) > 0 {
 			result.SawToolCallSyntax = true
 			calls, rejectedNames := filterToolCallsDetailed(parsed, availableToolNames)
@@ -207,7 +216,8 @@ func looksLikeToolCallSyntax(text string) bool {
 	return strings.Contains(lower, "tool_calls") ||
 		strings.Contains(lower, "<tool_call") ||
 		strings.Contains(lower, "<function_call") ||
-		strings.Contains(lower, "<invoke")
+		strings.Contains(lower, "<invoke") ||
+		strings.Contains(lower, "function.name:")
 }
 
 func parseToolCallList(v any) []ParsedToolCall {
diff --git a/internal/util/toolcalls_textkv.go b/internal/util/toolcalls_textkv.go
new file mode 100644
index 0000000..d487507
--- /dev/null
+++ b/internal/util/toolcalls_textkv.go
@@ -0,0 +1,55 @@
+package util
+
+import (
+	"regexp"
+	"strings"
+)
+
+var textKVNamePattern = regexp.MustCompile(`(?is)function\.name:\s*([a-zA-Z0-9_\-.]+)`)
+
+func parseTextKVToolCalls(text string) []ParsedToolCall {
+	var out []ParsedToolCall
+	matches := textKVNamePattern.FindAllStringSubmatchIndex(text, -1)
+	if len(matches) == 0 {
+		return nil
+	}
+
+	for i, match := range matches {
+		name := text[match[2]:match[3]]
+
+		offset := match[1]
+		endSearch := len(text)
+		if i+1 < len(matches) {
+			endSearch = matches[i+1][0]
+		}
+
+		searchArea := text[offset:endSearch]
+		argIdx := strings.Index(searchArea, "function.arguments:")
+		if argIdx < 0 {
+			continue
+		}
+
+		startIdx := offset + argIdx + len("function.arguments:")
+		braceIdx := strings.IndexByte(text[startIdx:endSearch], '{')
+		if braceIdx < 0 {
+			continue
+		}
+
+		actualStart := startIdx + braceIdx
+		objJson, _, ok := extractJSONObject(text, actualStart)
+		if !ok {
+			continue
+		}
+
+		input := parseToolCallInput(objJson)
+		out = append(out, ParsedToolCall{
+			Name:  name,
+			Input: input,
+		})
+	}
+
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
diff --git a/internal/util/toolcalls_textkv_test.go b/internal/util/toolcalls_textkv_test.go
new file mode 100644
index 0000000..c32d9f2
--- /dev/null
+++ b/internal/util/toolcalls_textkv_test.go
@@ -0,0 +1,52 @@
+package util
+
+import (
+	"testing"
+)
+
+func TestParseTextKVToolCalls_Basic(t *testing.T) {
+	text := `
+[TOOL_CALL_HISTORY]
+status: already_called
+origin: assistant
+not_user_input: true
+tool_call_id: call_3fcd15235eb94f7eae3a8de5a9cfa36b
+function.name: execute_command
+function.arguments: {"command":"cd scripts && python check_syntax.py example.py","cwd":null,"timeout":30}
+[/TOOL_CALL_HISTORY]
+
+Some other text thinking...
+`
+	calls := ParseToolCalls(text, []string{"execute_command"})
+	if len(calls) != 1 {
+		t.Fatalf("expected 1 call, got %d", len(calls))
+	}
+	if calls[0].Name != "execute_command" {
+		t.Fatalf("unexpected name: %s", calls[0].Name)
+	}
+	if calls[0].Input["command"] != "cd scripts && python check_syntax.py example.py" {
+		t.Fatalf("unexpected command arg: %v", calls[0].Input["command"])
+	}
+}
+
+func TestParseTextKVToolCalls_Multiple(t *testing.T) {
+	text := `
+function.name: read_file
+function.arguments: {
+	"path": "abc.txt"
+}
+
+function.name: bash
+function.arguments: {"command": "ls"}
+`
+	calls := ParseToolCalls(text, []string{"read_file", "bash"})
+	if len(calls) != 2 {
+		t.Fatalf("expected 2 calls, got %d", len(calls))
+	}
+	if calls[0].Name != "read_file" {
+		t.Fatalf("unexpected 1st name: %s", calls[0].Name)
+	}
+	if calls[1].Name != "bash" {
+		t.Fatalf("unexpected 2nd name: %s", calls[1].Name)
+	}
+}