From 70467054c3d8b6453464721b2411638875d7e0ce Mon Sep 17 00:00:00 2001 From: CJACK Date: Mon, 27 Apr 2026 15:06:44 +0800 Subject: [PATCH] fix: preserve partial-update fields for current_input_file and thinking_injection, expand DSML space-separator aliases - Guard current_input_file.enabled / thinking_injection.{enabled,prompt} with hasNestedSettingsKey so partial updates don't overwrite omitted fields - Expand DSML alias support to tolerate space-separated tags (e.g. <|dsml invoke>) alongside pipe-separated forms - Sync Go sieve, Node sieve, toolcall parser, and tests for all new DSML variants - Update API.md and toolcall-semantics.md with expanded alias coverage Co-Authored-By: Claude Opus 4.7 --- API.md | 4 +- docs/toolcall-semantics.md | 5 +- .../httpapi/admin/handler_settings_test.go | 92 +++++++++++++++++++ .../admin/settings/handler_settings_write.go | 31 ++++++- .../js/helpers/stream-tool-sieve/parse.js | 2 +- .../stream-tool-sieve/parse_payload.js | 12 +++ .../js/helpers/stream-tool-sieve/sieve-xml.js | 13 ++- .../stream-tool-sieve/tool-keywords.js | 8 ++ internal/toolcall/toolcalls_dsml.go | 12 +++ internal/toolcall/toolcalls_parse.go | 2 + internal/toolcall/toolcalls_test.go | 34 +++++++ internal/toolstream/complex_edge_test.go | 61 ++++++++++++ internal/toolstream/tool_sieve_xml.go | 62 ++++++++++--- internal/toolstream/tool_sieve_xml_tags.go | 10 +- tests/node/stream-tool-sieve.test.js | 40 ++++++++ 15 files changed, 361 insertions(+), 27 deletions(-) diff --git a/API.md b/API.md index 17a2f1e..65d9cb6 100644 --- a/API.md +++ b/API.md @@ -37,7 +37,7 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>` 以及旧式 canonical XML `` → `` → ``,内部仍以 XML 解析语义为准,并在流式场景执行防泄漏筛分。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`),以及旧式 canonical XML `` → `` → ``,内部仍以 XML 解析语义为准,并在流式场景执行防泄漏筛分。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 --- @@ -344,7 +344,7 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;DSML 会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。 +- 解析器当前把 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;DSML 会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index fe38c72..3466ce0 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -39,6 +39,7 @@ 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 +- 如果模型把 DSML 标签里的分隔符 `|` 写漏成空格(例如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`,或无 leading pipe 的 `` 形态),Go / Node 会在固定工具标签名范围内归一化;相似但非工具标签名(如 `tool_calls_extra`)仍按普通文本处理。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 ## 2) 非兼容内容 @@ -51,7 +52,7 @@ 在流式链路中(Go / Node 一致): -- DSML `<|DSML|tool_calls>` wrapper 及其兼容变体(``、`<|tool_calls>`、`<|tool_calls>`)和 canonical `` wrapper 都会进入结构化捕获 +- DSML `<|DSML|tool_calls>` wrapper、兼容变体(``、`<|tool_calls>`、`<|tool_calls>`)、窄容错空格分隔形态(如 `<|DSML tool_calls>`)和 canonical `` wrapper 都会进入结构化捕获 - 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 @@ -87,7 +88,7 @@ node --test tests/node/stream-tool-sieve.test.js - DSML `<|DSML|tool_calls>` wrapper 正常解析 - legacy canonical `` wrapper 正常解析 -- 别名变体(``、`<|tool_calls>`、`<|tool_calls>`)正常解析 +- 别名变体(``、`<|tool_calls>`、`<|tool_calls>`)和 DSML 空格分隔 typo(如 `<|DSML tool_calls>`)正常解析 - 混搭标签(DSML wrapper + canonical inner)归一化后正常解析 - 波浪线围栏 `~~~` 内的示例不执行 - 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行 diff --git a/internal/httpapi/admin/handler_settings_test.go b/internal/httpapi/admin/handler_settings_test.go index 9ca5ba5..fba6bd1 100644 --- a/internal/httpapi/admin/handler_settings_test.go +++ b/internal/httpapi/admin/handler_settings_test.go @@ -244,6 +244,52 @@ func TestUpdateSettingsCurrentInputFile(t *testing.T) { } } +func TestUpdateSettingsCurrentInputFilePartialUpdatePreservesEnabled(t *testing.T) { + h := newAdminTestHandler(t, `{"keys":["k1"],"current_input_file":{"enabled":false,"min_chars":777}}`) + payload := map[string]any{ + "current_input_file": map[string]any{ + "min_chars": 5000, + }, + } + b, _ := json.Marshal(payload) + req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b)) + rec := httptest.NewRecorder() + h.updateSettings(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + snap := h.Store.Snapshot() + if snap.CurrentInputFile.Enabled == nil || *snap.CurrentInputFile.Enabled { + t.Fatalf("expected current_input_file.enabled to remain false, got %#v", snap.CurrentInputFile.Enabled) + } + if snap.CurrentInputFile.MinChars != 5000 { + t.Fatalf("expected current_input_file.min_chars=5000, got %#v", snap.CurrentInputFile) + } +} + +func TestUpdateSettingsCurrentInputFilePartialUpdatePreservesMinChars(t *testing.T) { + h := newAdminTestHandler(t, `{"keys":["k1"],"current_input_file":{"enabled":false,"min_chars":777}}`) + payload := map[string]any{ + "current_input_file": map[string]any{ + "enabled": true, + }, + } + b, _ := json.Marshal(payload) + req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b)) + rec := httptest.NewRecorder() + h.updateSettings(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + snap := h.Store.Snapshot() + if snap.CurrentInputFile.Enabled == nil || !*snap.CurrentInputFile.Enabled { + t.Fatalf("expected current_input_file.enabled=true, got %#v", snap.CurrentInputFile.Enabled) + } + if snap.CurrentInputFile.MinChars != 777 { + t.Fatalf("expected current_input_file.min_chars to remain 777, got %#v", snap.CurrentInputFile) + } +} + func TestUpdateSettingsRejectsTwoSplitModesEnabled(t *testing.T) { h := newAdminTestHandler(t, `{"keys":["k1"]}`) payload := map[string]any{ @@ -292,6 +338,52 @@ func TestUpdateSettingsThinkingInjection(t *testing.T) { } } +func TestUpdateSettingsThinkingInjectionPartialPromptPreservesEnabled(t *testing.T) { + h := newAdminTestHandler(t, `{"keys":["k1"],"thinking_injection":{"enabled":false,"prompt":"original prompt"}}`) + payload := map[string]any{ + "thinking_injection": map[string]any{ + "prompt": " updated prompt ", + }, + } + b, _ := json.Marshal(payload) + req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b)) + rec := httptest.NewRecorder() + h.updateSettings(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + snap := h.Store.Snapshot() + if snap.ThinkingInjection.Enabled == nil || *snap.ThinkingInjection.Enabled { + t.Fatalf("expected thinking_injection.enabled to remain false, got %#v", snap.ThinkingInjection.Enabled) + } + if got := h.Store.ThinkingInjectionPrompt(); got != "updated prompt" { + t.Fatalf("expected updated prompt, got %q", got) + } +} + +func TestUpdateSettingsThinkingInjectionPartialEnabledPreservesPrompt(t *testing.T) { + h := newAdminTestHandler(t, `{"keys":["k1"],"thinking_injection":{"enabled":false,"prompt":"original prompt"}}`) + payload := map[string]any{ + "thinking_injection": map[string]any{ + "enabled": true, + }, + } + b, _ := json.Marshal(payload) + req := httptest.NewRequest(http.MethodPut, "/admin/settings", bytes.NewReader(b)) + rec := httptest.NewRecorder() + h.updateSettings(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + snap := h.Store.Snapshot() + if snap.ThinkingInjection.Enabled == nil || !*snap.ThinkingInjection.Enabled { + t.Fatalf("expected thinking_injection.enabled=true, got %#v", snap.ThinkingInjection.Enabled) + } + if got := h.Store.ThinkingInjectionPrompt(); got != "original prompt" { + t.Fatalf("expected original prompt to be preserved, got %q", got) + } +} + func TestUpdateSettingsAutoDeleteMode(t *testing.T) { h := newAdminTestHandler(t, `{"keys":["k1"],"auto_delete":{"sessions":true}}`) diff --git a/internal/httpapi/admin/settings/handler_settings_write.go b/internal/httpapi/admin/settings/handler_settings_write.go index 1a28589..1958d5f 100644 --- a/internal/httpapi/admin/settings/handler_settings_write.go +++ b/internal/httpapi/admin/settings/handler_settings_write.go @@ -28,6 +28,10 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) { return } } + currentInputEnabledSet := hasNestedSettingsKey(req, "current_input_file", "enabled") + currentInputMinCharsSet := hasNestedSettingsKey(req, "current_input_file", "min_chars") + thinkingInjectionEnabledSet := hasNestedSettingsKey(req, "thinking_injection", "enabled") + thinkingInjectionPromptSet := hasNestedSettingsKey(req, "thinking_injection", "prompt") if err := h.Store.Update(func(c *config.Config) error { if adminCfg != nil { @@ -80,16 +84,24 @@ func (h *Handler) updateSettings(w http.ResponseWriter, r *http.Request) { } } if currentInputCfg != nil { - c.CurrentInputFile.Enabled = currentInputCfg.Enabled - if currentInputCfg.Enabled != nil && *currentInputCfg.Enabled { + if currentInputEnabledSet { + c.CurrentInputFile.Enabled = currentInputCfg.Enabled + } + if currentInputEnabledSet && currentInputCfg.Enabled != nil && *currentInputCfg.Enabled { disabled := false c.HistorySplit.Enabled = &disabled } - c.CurrentInputFile.MinChars = currentInputCfg.MinChars + if currentInputMinCharsSet { + c.CurrentInputFile.MinChars = currentInputCfg.MinChars + } } if thinkingInjCfg != nil { - c.ThinkingInjection.Enabled = thinkingInjCfg.Enabled - c.ThinkingInjection.Prompt = thinkingInjCfg.Prompt + if thinkingInjectionEnabledSet { + c.ThinkingInjection.Enabled = thinkingInjCfg.Enabled + } + if thinkingInjectionPromptSet { + c.ThinkingInjection.Prompt = thinkingInjCfg.Prompt + } } if aliasMap != nil { c.ModelAliases = aliasMap @@ -144,3 +156,12 @@ func (h *Handler) updateSettingsPassword(w http.ResponseWriter, r *http.Request) "jwt_valid_after_unix": now, }) } + +func hasNestedSettingsKey(req map[string]any, section, key string) bool { + raw, ok := req[section].(map[string]any) + if !ok { + return false + } + _, exists := raw[key] + return exists +} diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index 1d37b60..d81661f 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -8,7 +8,7 @@ const { stripFencedCodeBlocks, } = require('./parse_payload'); -const TOOL_MARKUP_PREFIXES = ['', to: '' }, { from: '<|dsml|parameter', to: '', to: '' }, + { from: '<|dsml tool_calls', to: '', to: '' }, + { from: '<|dsml invoke', to: '', to: '' }, + { from: '<|dsml parameter', to: '', to: '' }, + { from: '', to: '' }, + { from: '', to: '' }, + { from: '', to: '' }, { from: '', to: '' }, { from: '' }, + { open: '<|dsml tool_calls', close: '' }, { open: '' }, + { open: '' }, { open: '<|tool_calls', close: '' }, { open: '<|tool_calls', close: '' }, { open: '' }, @@ -12,7 +14,7 @@ const XML_TOOL_TAG_PAIRS = [ const XML_TOOL_OPENING_TAGS = [ ...XML_TOOL_TAG_PAIRS.map(p => p.open), - '<|dsml|invoke', '= 0) { - if (findXMLCloseOutsideCDATA(captured, pair.close, openIdx + pair.open.length) < 0) { + if (findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx) < 0) { return true; } } @@ -203,7 +204,9 @@ function hasOpenXMLToolTag(captured) { function containsAnyToolCallWrapper(lower) { return lower.includes('', '<|dsml|tool_calls\n', '<|dsml|tool_calls ', '<|dsml|invoke ', '<|dsml|invoke\n', '<|dsml|invoke\t', '<|dsml|invoke\r', + '<|dsml tool_calls>', '<|dsml tool_calls\n', '<|dsml tool_calls ', + '<|dsml invoke ', '<|dsml invoke\n', '<|dsml invoke\t', '<|dsml invoke\r', '', '', '', '<|tool_calls\n', '<|tool_calls ', '<|invoke ', '<|invoke\n', '<|invoke\t', '<|invoke\r', '<|tool_calls>', '<|tool_calls\n', '<|tool_calls ', @@ -15,7 +19,9 @@ const XML_TOOL_SEGMENT_TAGS = [ const XML_TOOL_OPENING_TAGS = [ '<|dsml|tool_calls', + '<|dsml tool_calls', '', + '', '', + '', '', '', '', diff --git a/internal/toolcall/toolcalls_dsml.go b/internal/toolcall/toolcalls_dsml.go index df6cda2..4801a78 100644 --- a/internal/toolcall/toolcalls_dsml.go +++ b/internal/toolcall/toolcalls_dsml.go @@ -26,6 +26,18 @@ var dsmlToolMarkupAliases = []struct { {"", ""}, {"<|dsml|parameter", "", ""}, + {"<|dsml tool_calls", "", ""}, + {"<|dsml invoke", "", ""}, + {"<|dsml parameter", "", ""}, + {"", ""}, + {"", ""}, + {"", ""}, {"", ""}, {"", + "<|DSML invoke name=\"Read\">", + "<|DSML parameter name=\"file_path\">", + "", + "", + }, "\n") + calls := ParseToolCalls(text, []string{"Read"}) + if len(calls) != 1 { + t.Fatalf("expected one call from DSML space-separator typo, got %#v", calls) + } + if calls[0].Name != "Read" { + t.Fatalf("expected Read call, got %#v", calls[0]) + } + if got, _ := calls[0].Input["file_path"].(string); got != "/tmp/input.txt" { + t.Fatalf("expected file_path to parse, got %q", got) + } +} + +func TestParseToolCallsDoesNotAcceptDSMLSpaceLookalikeTagName(t *testing.T) { + text := strings.Join([]string{ + "<|DSML tool_calls_extra>", + "<|DSML invoke name=\"Read\">", + "<|DSML parameter name=\"file_path\">/tmp/input.txt", + "", + "", + }, "\n") + calls := ParseToolCalls(text, []string{"Read"}) + if len(calls) != 0 { + t.Fatalf("expected no calls from lookalike tag, got %#v", calls) + } +} + func TestParseToolCallsSkipsProseMentionOfSameWrapperVariant(t *testing.T) { text := strings.Join([]string{ "Summary: support canonical and DSML <|DSML|tool_calls> wrappers.", diff --git a/internal/toolstream/complex_edge_test.go b/internal/toolstream/complex_edge_test.go index ec5664d..c1c6488 100644 --- a/internal/toolstream/complex_edge_test.go +++ b/internal/toolstream/complex_edge_test.go @@ -554,3 +554,64 @@ func TestSieve_ChineseReviewSamplePreservesInlineDSMLMention(t *testing.T) { t.Fatalf("真实工具块不应泄漏到正文, got %q", text.String()) } } + +func TestSieve_ToleratesDSMLSpaceSeparatorTypo(t *testing.T) { + var state State + chunks := []string{ + "准备读取文件。\n", + "<|DSML tool_calls>\n", + "<|DSML invoke name=\"Read\">\n", + "<|DSML parameter name=\"file_path\">\n", + "\n", + "", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Read"})...) + } + events = append(events, Flush(&state, []string{"Read"})...) + + var text strings.Builder + var filePath string + callCount := 0 + for _, e := range events { + text.WriteString(e.Content) + for _, call := range e.ToolCalls { + callCount++ + filePath, _ = call.Input["file_path"].(string) + } + } + + if callCount != 1 { + t.Fatalf("应解析出 1 个工具调用,got %d, text=%q", callCount, text.String()) + } + if filePath != "/tmp/input.txt" { + t.Fatalf("应解析出 file_path,got %q", filePath) + } + if !strings.Contains(text.String(), "准备读取文件") { + t.Fatalf("前置正文应保留, got %q", text.String()) + } + if strings.Contains(text.String(), "<|DSML invoke") { + t.Fatalf("真实工具块不应泄漏到正文, got %q", text.String()) + } +} + +func TestSieve_DSMLSpaceLookalikeTagNameStaysText(t *testing.T) { + var state State + input := "<|DSML tool_calls_extra><|DSML invoke name=\"Read\"><|DSML parameter name=\"file_path\">/tmp/input.txt" + events := ProcessChunk(&state, input, []string{"Read"}) + events = append(events, Flush(&state, []string{"Read"})...) + + var text strings.Builder + callCount := 0 + for _, e := range events { + text.WriteString(e.Content) + callCount += len(e.ToolCalls) + } + if callCount != 0 { + t.Fatalf("相似标签名不应触发工具调用,got %d", callCount) + } + if text.String() != input { + t.Fatalf("相似标签名应作为正文透传, got %q", text.String()) + } +} diff --git a/internal/toolstream/tool_sieve_xml.go b/internal/toolstream/tool_sieve_xml.go index b755200..06fc469 100644 --- a/internal/toolstream/tool_sieve_xml.go +++ b/internal/toolstream/tool_sieve_xml.go @@ -99,11 +99,10 @@ func consumeXMLToolCapture(captured string, toolNames []string) (prefix string, // hasOpenXMLToolTag returns true if captured text contains an XML tool opening tag // whose SPECIFIC closing tag has not appeared yet. func hasOpenXMLToolTag(captured string) bool { - lower := strings.ToLower(captured) for _, pair := range xmlToolCallTagPairs { - openIdx := strings.Index(lower, pair.open) + openIdx := findXMLOpenOutsideCDATA(captured, pair.open, 0) if openIdx >= 0 { - if findXMLCloseOutsideCDATA(captured, pair.close, openIdx+len(pair.open)) < 0 { + if findMatchingXMLToolWrapperClose(captured, pair.open, pair.close, openIdx) < 0 { return true } } @@ -117,17 +116,25 @@ func shouldKeepBareInvokeCapture(captured string) bool { if invokeIdx < 0 || containsAnyToolCallWrapper(lower) { return false } - wrapperClose := "" invokeOpenLen := len(" invokeIdx { + if dsml && strings.HasPrefix(lower[invokeIdx:], "<|dsml invoke") { + invokeOpenLen = len("<|dsml invoke") + parameterOpen = "<|dsml parameter" + } + if dsml && strings.HasPrefix(lower[invokeIdx:], " invokeIdx { return true } @@ -141,9 +148,15 @@ func shouldKeepBareInvokeCapture(captured string) bool { return true } - invokeCloseIdx := findXMLCloseOutsideCDATA(captured, invokeClose, startEnd+1) + invokeCloseIdx := findAnyXMLCloseOutsideCDATA(captured, possibleInvokeCloseTags(dsml), startEnd+1) if invokeCloseIdx >= 0 { - afterClose := captured[invokeCloseIdx+len(invokeClose):] + afterClose := captured[invokeCloseIdx:] + for _, closeTag := range possibleInvokeCloseTags(dsml) { + if strings.HasPrefix(strings.ToLower(afterClose), closeTag) { + afterClose = afterClose[len(closeTag):] + break + } + } return strings.TrimSpace(afterClose) == "" } @@ -156,15 +169,42 @@ func shouldKeepBareInvokeCapture(captured string) bool { func containsAnyToolCallWrapper(lower string) bool { return strings.Contains(lower, ""} + } + return []string{"", "", "", "", "", ""} +} + +func possibleInvokeCloseTags(dsml bool) []string { + if !dsml { + return []string{""} + } + return []string{"", "", "", "", "", ""} +} + +func findAnyXMLCloseOutsideCDATA(s string, closeTags []string, start int) int { + best := -1 + for _, closeTag := range closeTags { + idx := findXMLCloseOutsideCDATA(s, closeTag, start) + if idx >= 0 && (best < 0 || idx < best) { + best = idx + } + } + return best +} + func firstInvokeIndex(lower string) (int, bool) { xmlIdx := strings.Index(lower, "", "", "", "", ""} +var xmlToolCallClosingTags = []string{"", "", "", "", "", "", ""} var xmlToolCallOpeningTags = []string{ ""}, + {"<|dsml tool_calls", ""}, {""}, + {""}, {"<|tool_calls", ""}, {"<|tool_calls", ""}, {""}, @@ -33,8 +37,12 @@ var xmlToolCallBlockPattern = regexp.MustCompile(`(?is)((?:", "<|dsml|tool_calls\n", "<|dsml|tool_calls ", "<|dsml|invoke ", "<|dsml|invoke\n", "<|dsml|invoke\t", "<|dsml|invoke\r", + "<|dsml tool_calls>", "<|dsml tool_calls\n", "<|dsml tool_calls ", + "<|dsml invoke ", "<|dsml invoke\n", "<|dsml invoke\t", "<|dsml invoke\r", "", "", "", "<|tool_calls\n", "<|tool_calls ", "<|invoke ", "<|invoke\n", "<|invoke\t", "<|invoke\r", "<|tool_calls>", "<|tool_calls\n", "<|tool_calls ", diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index d870d73..dabaae2 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -57,6 +57,20 @@ test('parseToolCalls parses DSML shell as XML-compatible tool call', () => { assert.deepEqual(calls[0].input, { path: 'README.MD' }); }); +test('parseToolCalls tolerates DSML space-separator typo', () => { + const payload = '<|DSML tool_calls><|DSML invoke name="Read"><|DSML parameter name="file_path">'; + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: '/tmp/input.txt' }); +}); + +test('parseToolCalls ignores DSML space lookalike tag names', () => { + const payload = '<|DSML tool_calls_extra><|DSML invoke name="Read"><|DSML parameter name="file_path">/tmp/input.txt'; + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 0); +}); + test('parseToolCalls keeps canonical XML examples inside DSML CDATA', () => { const content = 'x'; const payload = `<|DSML|tool_calls><|DSML|invoke name="write_file"><|DSML|parameter name="path">notes.md<|DSML|parameter name="content">`; @@ -107,6 +121,32 @@ test('sieve emits tool_calls after prose mentions same wrapper variant', () => { assert.equal(collectText(events).includes('Summary:'), true); }); +test('sieve emits tool_calls for DSML space-separator typo', () => { + const events = runSieve([ + '准备读取文件。\n', + '<|DSML tool_calls>\n', + '<|DSML invoke name="Read">\n', + '<|DSML parameter name="file_path">\n', + '\n', + '', + ], ['Read']); + const text = collectText(events); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Read'); + assert.equal(finalCalls[0].input.file_path, '/tmp/input.txt'); + assert.equal(text.includes('准备读取文件'), true); + assert.equal(text.includes('<|DSML invoke'), false); +}); + +test('sieve keeps DSML space lookalike tag names as text', () => { + const input = '<|DSML tool_calls_extra><|DSML invoke name="Read"><|DSML parameter name="file_path">/tmp/input.txt'; + const events = runSieve([input], ['Read']); + const finalCalls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []); + assert.equal(finalCalls.length, 0); + assert.equal(collectText(events), input); +}); + test('sieve preserves review body with alias mentions before real DSML tool calls', () => { const events = runSieve([ "Done reviewing the diff. Here's my analysis before we commit:\n\n",