From 7a28b9e26566cc7a35b7b1aa4f7196418079bc2e Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 10 May 2026 03:41:55 +0800 Subject: [PATCH 1/3] feat: improve CDATA and DSML tag parsing robustness with support for fullwidth-bang, ideographic-comma, and extended quote/separator normalization. --- API.en.md | 4 +- API.md | 4 +- docs/prompt-compatibility.md | 4 +- docs/toolcall-semantics.md | 5 +- .../stream-tool-sieve/parse_payload.js | 112 +++++++++++++++--- internal/promptcompat/message_normalize.go | 38 ++++++ .../promptcompat/message_normalize_test.go | 36 ++++++ internal/toolcall/tool_prompt.go | 1 + internal/toolcall/tool_prompt_test.go | 13 ++ internal/toolcall/toolcalls_dsml.go | 2 +- internal/toolcall/toolcalls_markup.go | 29 ++++- internal/toolcall/toolcalls_parse_markup.go | 39 +++++- internal/toolcall/toolcalls_scan.go | 19 +++ internal/toolcall/toolcalls_test.go | 43 +++++++ internal/toolstream/tool_sieve_xml_test.go | 73 ++++++++++++ tests/node/stream-tool-sieve.test.js | 70 +++++++++++ 16 files changed, 460 insertions(+), 32 deletions(-) diff --git a/API.en.md b/API.en.md index 19368ec..07fbf3d 100644 --- a/API.en.md +++ b/API.en.md @@ -40,7 +40,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket and trailing attribute separator drift such as `...〈/DSM|parameter〉`, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) back to XML before parsing; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, or `<、DSML、tool_calls>`, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. - When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. - Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links. @@ -355,7 +355,7 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket and trailing attribute separator drift (`...〈/DSM|parameter〉`), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize back to XML first, while internal parsing remains XML-based. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. +- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. - The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. - If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. diff --git a/API.md b/API.md index 8d8c827..5ad14c0 100644 --- a/API.md +++ b/API.md @@ -40,7 +40,7 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉`)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,前缀壳会在解析入口归一化;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,前缀壳会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 - 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 - 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 @@ -357,7 +357,7 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉`)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些前缀壳会先归一化回 XML,内部仍以 XML 解析语义为准。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 +- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些前缀壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 - 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index dd8196a..9d2494a 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 4. 把这整段内容并入 system prompt。 工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前任意协议前缀壳都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser。但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现。解析器会先截获非代码块中的疑似工具 wrapper,完整解析失败或工具语义无效时再按普通文本放行。 +兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前任意协议前缀壳都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser。CDATA 开头也使用同一类扫描式容错,`...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 @@ -222,6 +222,8 @@ assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 ``` +如果客户端历史里没有结构化 `tool_calls` 字段、却把一个可独立解析的 assistant 工具块放进了普通 `content`,兼容层会在写入后续 prompt 前先按工具调用解析它,再重渲染为规范 DSML 历史外壳。这样可以避免一次 malformed 工具块未被结构化保存后,作为普通 assistant 文本回灌,继续污染后续模型的 few-shot 工具格式。 + 解析层同时兼容旧式纯 XML 形态:`` / `` / ``。两者都会先归一到现有 XML 解析语义;其他旧格式都会作为普通文本保留,不会作为可执行调用语法。 例外是 parser 会对一个非常窄的模型失误做修复:如果 assistant 输出了 `` ... ``(或 DSML 对应标签),但漏掉最前面的 opening wrapper,解析阶段会补回 wrapper 后再尝试识别。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index f2c148f..409b942 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -39,7 +39,7 @@ 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉` 等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、``、``、`...〈/DSM|tool_calls〉` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 +- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 - 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,或在带属性标签的结束符前多输出一个尾部管道符(如 ``),兼容层会把这个尾部 `|` / `|` 当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -61,6 +61,7 @@ - fenced code block(反引号 `` ``` `` 和波浪线 `~~~`)中的 XML 示例始终按普通文本处理 - 支持嵌套围栏(如 4 反引号嵌套 3 反引号)和 CDATA 内围栏保护 - 对 `command` / `content` 等长文本参数,CDATA 内部如果包含 Markdown fenced DSML / XML 示例,即使示例里出现 `]]>` / `` 这类看起来像外层结束标签的片段,也会继续按参数原文保留,直到真正位于围栏外的外层结束标签 +- CDATA 开头也按扫描式识别,除了标准 `` 或 Markdown inline code 里的 `<|DSML|tool_calls>`)而后面紧跟真正工具调用时,sieve 会跳过不可解析的 mention 候选并继续匹配后续真实工具块,不会因 mention 导致工具调用丢失,也不会截断 mention 后的正文 - Go 侧 SSE 读取不再使用 `bufio.Scanner` 的固定 token 上限;单个 `data:` 行中包含很长的写文件参数时,非流式收集、流式解析与 auto-continue 透传都应保留完整行,再交给 tool parser 处理 @@ -102,7 +103,7 @@ go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./ - DSML `<|DSML|tool_calls>` wrapper 正常解析 - legacy canonical `` wrapper 正常解析 -- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、`...〈/DSM|tool_calls〉`)正常解析 +- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`)正常解析 - 混搭标签(DSML wrapper + canonical inner)归一化后正常解析 - 波浪线围栏 `~~~` 内的示例不执行 - 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行 diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index ffd7742..175223c 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -1,6 +1,6 @@ 'use strict'; -const CDATA_PATTERN = /^(?:<|〈)!\[CDATA\[([\s\S]*?)]](?:>|>|〉)$/i; +const CDATA_PATTERN = /^(?:<|〈)(?:!|!)\[CDATA\[([\s\S]*?)]](?:>|>|〉)$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; const TOOL_MARKUP_NAMES = [ { raw: 'tool_calls', canonical: 'tool_calls' }, @@ -87,7 +87,8 @@ function isFenceCloseLine(trimmed, fenceChar, fenceLen) { } function cdataStartsBeforeFence(line) { - const cdataIdx = line.toLowerCase().indexOf('= 0 ? line.indexOf('```') : Infinity, @@ -109,9 +110,9 @@ function updateCDATAStateLine(inCDATA, line) { state = false; continue; } - const start = lower.indexOf('= raw.length || normalizeFullwidthASCIIChar(raw[start]) !== '<') { + return { ok: false, bodyStart: start }; + } + let i = start + 1; + for (let skipped = 0; skipped <= 4 && i < raw.length; skipped += 1) { + const matched = matchNormalizedASCII(raw, i, '[cdata['); + if (matched.ok) { + return { ok: true, bodyStart: i + matched.len }; + } + if (!isCDATAOpenSeparator(raw[i])) { + break; + } + i += 1; + } + return { ok: false, bodyStart: start }; +} + +function isCDATAOpenSeparator(ch) { + const normalized = normalizeFullwidthASCIIChar(ch || ''); + if (!normalized || ['<', '>', '/', '=', '"', "'", '['].includes(normalized)) { + return false; + } + if ([' ', '\t', '\n', '\r'].includes(normalized)) { + return false; + } + return !/^[A-Za-z0-9]$/.test(normalized); +} + function findCDATAEnd(text, from) { const ascii = text.indexOf(']]>', from); const fullwidth = text.indexOf(']]>', from); @@ -458,7 +504,7 @@ function scanToolMarkupTagAt(text, start) { while (i < raw.length && normalizeFullwidthASCIIChar(raw[i]) === '<') { i += 1; } - const closing = raw[i] === '/'; + let closing = raw[i] === '/'; if (closing) { i += 1; } @@ -472,6 +518,9 @@ function scanToolMarkupTagAt(text, start) { if (!fallback.ok) { return null; } + if (!closing && toolMarkupPrefixContainsSlash(raw.slice(prefixStart, fallback.start))) { + closing = true; + } name = fallback.name; i = fallback.start; len = fallback.len; @@ -692,6 +741,15 @@ function toolMarkupPrefixAllowsLocalName(prefix) { return !/^[A-Za-z0-9]$/.test(previous); } +function toolMarkupPrefixContainsSlash(prefix) { + for (const ch of toStringSafe(prefix)) { + if (normalizeFullwidthASCIIChar(ch) === '/') { + return true; + } + } + return false; +} + function isToolMarkupTagTerminator(raw, idx) { return raw[idx] === '>' || normalizeFullwidthASCIIChar(raw[idx] || '') === '>'; } @@ -833,6 +891,12 @@ function normalizeFullwidthASCIIChar(ch) { if (ch === '〉') { return '>'; } + if (ch === '“' || ch === '”') { + return '"'; + } + if (ch === '‘' || ch === '’') { + return "'"; + } const code = ch.charCodeAt(0); if (code >= 0xff01 && code <= 0xff5e) { return String.fromCharCode(code - 0xfee0); @@ -882,7 +946,7 @@ function normalizeToolMarkupTagTailForXML(tail) { } else if (normalized === '"' || normalized === "'") { quote = normalized; out += normalized; - } else if (normalized === '|') { + } else if (normalized === '|' || normalized === '!') { let j = i + 1; while (j < raw.length && [' ', '\t', '\r', '\n'].includes(raw[j])) { j += 1; @@ -1239,16 +1303,34 @@ function unescapeHtml(safe) { function extractStandaloneCDATA(inner) { const s = toStringSafe(inner).trim(); - const cdataMatch = s.match(CDATA_PATTERN); - if (cdataMatch && cdataMatch[1] !== undefined) { - return { ok: true, value: cdataMatch[1] }; - } - if (s.toLowerCase().startsWith(' + <!DSML!invoke name=“Bash”> + <!DSML!parameter name=“command”><![CDATA[lsof -i :4321 -t]]><!/DSML!parameter> + <!DSML!parameter name=“description”><![CDATA[Verify port 4321 is free]]><!/DSML!parameter> + <!/DSML!invoke> + <!/DSML!tool_calls>`, + }, + } + + normalized := NormalizeOpenAIMessagesForPrompt(raw, "") + if len(normalized) != 1 { + t.Fatalf("expected one normalized assistant message, got %#v", normalized) + } + content, _ := normalized[0]["content"].(string) + for _, want := range []string{ + "<|DSML|tool_calls>", + `<|DSML|invoke name="Bash">`, + `<|DSML|parameter name="command">`, + `<|DSML|parameter name="description">`, + "", + } { + if !strings.Contains(content, want) { + t.Fatalf("expected canonicalized assistant tool markup to contain %q, got %q", want, content) + } + } + for _, bad := range []string{"<!DSML", "!tool_calls", "“", "”"} { + if strings.Contains(content, bad) { + t.Fatalf("expected malformed assistant tool markup to be removed from prompt history, found %q in %q", bad, content) + } + } +} + func TestNormalizeOpenAIMessagesForPrompt_DeveloperRoleMapsToSystem(t *testing.T) { raw := []any{ map[string]any{"role": "developer", "content": "必须先走工具调用"}, diff --git a/internal/toolcall/tool_prompt.go b/internal/toolcall/tool_prompt.go index c506344..9f278e5 100644 --- a/internal/toolcall/tool_prompt.go +++ b/internal/toolcall/tool_prompt.go @@ -21,6 +21,7 @@ RULES: 1) Use the <|DSML|tool_calls> wrapper format. 2) Put one or more <|DSML|invoke> entries under a single <|DSML|tool_calls> root. 3) Put the tool name in the invoke name attribute: <|DSML|invoke name="TOOL_NAME">. +3a) Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar |. 4) All string values must use , even short ones. This includes code, scripts, file contents, prompts, paths, names, and queries. 5) Every top-level argument must be a <|DSML|parameter name="ARG_NAME">... node. 6) Objects use nested XML elements inside the parameter body. Arrays may repeat children. diff --git a/internal/toolcall/tool_prompt_test.go b/internal/toolcall/tool_prompt_test.go index 96fdf56..1c3757c 100644 --- a/internal/toolcall/tool_prompt_test.go +++ b/internal/toolcall/tool_prompt_test.go @@ -133,6 +133,19 @@ func TestBuildToolCallInstructions_RejectsEmptyParametersInPrompt(t *testing.T) } } +func TestBuildToolCallInstructions_UsesPositiveTagPunctuationAlphabet(t *testing.T) { + out := BuildToolCallInstructions([]string{"Bash"}) + want := `Tag punctuation alphabet: ASCII < > / = " plus the fullwidth vertical bar |.` + if !strings.Contains(out, want) { + t.Fatalf("expected positive tag punctuation alphabet %q, got: %s", want, out) + } + for _, bad := range []string{"lookalike", "substitute", "!", "〈", "〉", "“", "”", "、"} { + if strings.Contains(out, bad) { + t.Fatalf("tool prompt should not include negative punctuation examples %q, got: %s", bad, out) + } + } +} + func findInvokeBlocks(text, name string) []string { open := `<|DSML|invoke name="` + name + `">` remaining := text diff --git a/internal/toolcall/toolcalls_dsml.go b/internal/toolcall/toolcalls_dsml.go index e57a5ac..a5d9c4a 100644 --- a/internal/toolcall/toolcalls_dsml.go +++ b/internal/toolcall/toolcalls_dsml.go @@ -86,7 +86,7 @@ func normalizeToolMarkupTagTailForXML(tail string) string { case '"', '\'': quote = ch b.WriteRune(ch) - case '|': + case '|', '!': j := i + size for j < len(tail) { next, nextSize := utf8.DecodeRuneInString(tail[j:]) diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go index 03a6d6b..cc94256 100644 --- a/internal/toolcall/toolcalls_markup.go +++ b/internal/toolcall/toolcalls_markup.go @@ -10,7 +10,7 @@ import ( var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)`) // cdataPattern matches a standalone CDATA section. -var cdataPattern = regexp.MustCompile(`(?is)^(?:<|〈)!\[CDATA\[(.*?)]](?:>|>|〉)$`) +var cdataPattern = regexp.MustCompile(`(?is)^(?:<|〈)(?:!|!)\[CDATA\[(.*?)]](?:>|>|〉)$`) func parseMarkupKVObject(text string) map[string]any { matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1) @@ -108,15 +108,32 @@ func extractRawTagValue(inner string) string { func extractStandaloneCDATA(inner string) (string, bool) { trimmed := strings.TrimSpace(inner) - if cdataMatches := cdataPattern.FindStringSubmatch(trimmed); len(cdataMatches) >= 2 { - return cdataMatches[1], true - } - if strings.HasPrefix(strings.ToLower(trimmed), "= len(text) { return i, false, false } - switch { - case hasASCIIPrefixFoldAt(text, i, "") if end < 0 { @@ -227,6 +228,38 @@ func skipXMLIgnoredSection(text string, i int) (next int, advanced bool, blocked } } +func matchToolCDATAOpenAt(text string, start int) (int, bool) { + i, ok := consumeToolMarkupLessThan(text, start) + if !ok { + return start, false + } + for skipped := 0; skipped <= 4 && i < len(text); skipped++ { + if cdataLen, ok := matchASCIIPrefixFoldAt(text, i, "[cdata["); ok { + return i + cdataLen, true + } + r, size := utf8.DecodeRuneInString(text[i:]) + if size <= 0 || !isToolCDATAOpenSeparator(r) { + break + } + i += size + } + return start, false +} + +func isToolCDATAOpenSeparator(r rune) bool { + ch := normalizeFullwidthASCII(r) + if ch == 0 || ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '"' || ch == '\'' || ch == '[' { + return false + } + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + return false + } + if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { + return false + } + return true +} + func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool { _, ok := matchASCIIPrefixFoldAt(text, start, prefix) return ok diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index f8001fd..30b4e6a 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -159,6 +159,9 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { if !ok { return ToolMarkupTag{}, false } + if !closing && toolMarkupPrefixContainsSlash(text[prefixStart:fallbackStart]) { + closing = true + } name = fallbackName i = fallbackStart nameLen = fallbackLen @@ -461,6 +464,9 @@ func consumeToolMarkupPipe(text string, idx int) (int, bool) { if strings.HasPrefix(text[idx:], "␂") { return idx + len("␂"), true } + if ch, size := normalizedASCIIAt(text, idx); ch == '!' { + return idx + size, true + } return idx, false } @@ -506,9 +512,22 @@ func normalizeFullwidthASCII(r rune) rune { return '<' case '〉': return '>' + case '“', '”': + return '"' + case '‘', '’': + return '\'' } if r >= '!' && r <= '~' { return r - 0xFEE0 } return r } + +func toolMarkupPrefixContainsSlash(prefix string) bool { + for _, r := range prefix { + if normalizeFullwidthASCII(r) == '/' { + return true + } + } + return false +} diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index b66f047..1171b8d 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -163,6 +163,49 @@ func TestParseToolCallsSupportsCJKAngleDSMDrift(t *testing.T) { } } +func TestParseToolCallsSupportsFullwidthBangDSMLDrift(t *testing.T) { + text := `<!DSML!tool_calls> + <!DSML!invoke name=“Bash”> + <!DSML!parameter name=“command”><![CDATA[lsof -i :4321 -t]]><!/DSML!parameter> + <!DSML!parameter name=“description”><![CDATA[Verify port 4321 is free]]><!/DSML!parameter> + <!/DSML!invoke> + <!/DSML!tool_calls>` + + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected one fullwidth-bang DSML drift call, got %#v", calls) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "lsof -i :4321 -t" || calls[0].Input["description"] != "Verify port 4321 is free" { + t.Fatalf("unexpected fullwidth-bang DSML drift call: %#v", calls[0]) + } +} + +func TestParseToolCallsSupportsIdeographicCommaDSMLDrift(t *testing.T) { + text := `<、DSML、tool_calls> + <、DSML、invoke name="Bash"> + <、DSML、parameter name="command"><、[CDATA[git commit -m "$(cat <<'EOF' +feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing + +Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com +EOF +)"]]><、/DSML、parameter> + <、DSML、parameter name="description"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter> + <、/DSML、invoke> +<、/DSML、tool_calls>` + + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected one ideographic-comma DSML drift call, got %#v", calls) + } + command, _ := calls[0].Input["command"].(string) + if calls[0].Name != "Bash" || !strings.Contains(command, `git commit -m "$(cat <<'EOF'`) || !strings.Contains(command, "Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com") { + t.Fatalf("unexpected ideographic-comma DSML drift call: %#v", calls[0]) + } + if calls[0].Input["description"] != "Create commit with staged changes" { + t.Fatalf("unexpected ideographic-comma description: %#v", calls[0]) + } +} + func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) { text := `pwd` calls := ParseToolCalls(text, []string{"Bash"}) diff --git a/internal/toolstream/tool_sieve_xml_test.go b/internal/toolstream/tool_sieve_xml_test.go index e207969..780dc1b 100644 --- a/internal/toolstream/tool_sieve_xml_test.go +++ b/internal/toolstream/tool_sieve_xml_test.go @@ -1262,3 +1262,76 @@ func TestProcessToolSieveCJKAngleDSMDriftDoesNotLeak(t *testing.T) { t.Fatalf("unexpected CJK-angle DSM drift call: %#v", calls[0]) } } + +func TestProcessToolSieveFullwidthBangDSMLDriftDoesNotLeak(t *testing.T) { + var state State + chunks := []string{ + "<!DSML!tool_calls>\n", + " <!DSML!invoke name=“Bash”>\n", + " <!DSML!parameter name=“command”><![CDATA[lsof -i :4321 -t]]><!/DSML!parameter>\n", + " <!DSML!parameter name=“description”><![CDATA[Verify port 4321 is free]]><!/DSML!parameter>\n", + " <!/DSML!invoke>\n", + " <!/DSML!tool_calls>", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Bash"})...) + } + events = append(events, Flush(&state, []string{"Bash"})...) + + var textContent string + var calls []toolcall.ParsedToolCall + for _, evt := range events { + textContent += evt.Content + calls = append(calls, evt.ToolCalls...) + } + + if strings.Contains(textContent, "DSML") || strings.Contains(textContent, "lsof") { + t.Fatalf("fullwidth-bang DSML drift leaked to text: %q events=%#v", textContent, events) + } + if len(calls) != 1 { + t.Fatalf("expected one fullwidth-bang DSML drift tool call, got %d events=%#v", len(calls), events) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "lsof -i :4321 -t" { + t.Fatalf("unexpected fullwidth-bang DSML drift call: %#v", calls[0]) + } +} + +func TestProcessToolSieveIdeographicCommaDSMLDriftDoesNotLeak(t *testing.T) { + var state State + chunks := []string{ + "<、DSML、tool_calls>\n", + " <、DSML、invoke name=\"Bash\">\n", + " <、DSML、parameter name=\"command\"><、[CDATA[git commit -m \"$(cat <<'EOF'\n", + "feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing\n\n", + "Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com\n", + "EOF\n", + ")\"]]><、/DSML、parameter>\n", + " <、DSML、parameter name=\"description\"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter>\n", + " <、/DSML、invoke>\n", + "<、/DSML、tool_calls>", + } + var events []Event + for _, c := range chunks { + events = append(events, ProcessChunk(&state, c, []string{"Bash"})...) + } + events = append(events, Flush(&state, []string{"Bash"})...) + + var textContent string + var calls []toolcall.ParsedToolCall + for _, evt := range events { + textContent += evt.Content + calls = append(calls, evt.ToolCalls...) + } + + if strings.Contains(textContent, "DSML") || strings.Contains(textContent, "git commit") { + t.Fatalf("ideographic-comma DSML drift leaked to text: %q events=%#v", textContent, events) + } + if len(calls) != 1 { + t.Fatalf("expected one ideographic-comma DSML drift tool call, got %d events=%#v", len(calls), events) + } + command, _ := calls[0].Input["command"].(string) + if calls[0].Name != "Bash" || !strings.Contains(command, "git commit -m") { + t.Fatalf("unexpected ideographic-comma DSML drift call: %#v", calls[0]) + } +} diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index e6a07e5..22ec81f 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -152,6 +152,40 @@ test('parseToolCalls parses CJK-angle DSM drift', () => { assert.equal(calls[2].input.command, 'git status -b --short'); }); +test('parseToolCalls parses fullwidth-bang DSML drift', () => { + const payload = `<!DSML!tool_calls> + <!DSML!invoke name=“Bash”> + <!DSML!parameter name=“command”><![CDATA[lsof -i :4321 -t]]><!/DSML!parameter> + <!DSML!parameter name=“description”><![CDATA[Verify port 4321 is free]]><!/DSML!parameter> + <!/DSML!invoke> + <!/DSML!tool_calls>`; + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Bash'); + assert.equal(calls[0].input.command, 'lsof -i :4321 -t'); + assert.equal(calls[0].input.description, 'Verify port 4321 is free'); +}); + +test('parseToolCalls parses ideographic-comma DSML drift', () => { + const payload = `<、DSML、tool_calls> + <、DSML、invoke name="Bash"> + <、DSML、parameter name="command"><、[CDATA[git commit -m "$(cat <<'EOF' +feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing + +Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com +EOF +)"]]><、/DSML、parameter> + <、DSML、parameter name="description"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter> + <、/DSML、invoke> +<、/DSML、tool_calls>`; + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Bash'); + assert.equal(calls[0].input.command.includes('git commit -m "$(cat <<\'EOF\''), true); + assert.equal(calls[0].input.command.includes('Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com'), true); + assert.equal(calls[0].input.description, 'Create commit with staged changes'); +}); + test('parseToolCalls parses DSML control separator drift', () => { for (const sep of ['␂', '\x02']) { const payload = ` @@ -562,6 +596,42 @@ test('sieve emits tool_calls for CJK-angle DSM drift', () => { assert.equal(collectText(events), ''); }); +test('sieve emits tool_calls for fullwidth-bang DSML drift', () => { + const events = runSieve([ + '<!DSML!tool_calls>\n', + ' <!DSML!invoke name=“Bash”>\n', + ' <!DSML!parameter name=“command”><![CDATA[lsof -i :4321 -t]]><!/DSML!parameter>\n', + ' <!DSML!parameter name=“description”><![CDATA[Verify port 4321 is free]]><!/DSML!parameter>\n', + ' <!/DSML!invoke>\n', + ' <!/DSML!tool_calls>', + ], ['Bash']); + const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : [])); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Bash'); + assert.equal(finalCalls[0].input.command, 'lsof -i :4321 -t'); + assert.equal(collectText(events), ''); +}); + +test('sieve emits tool_calls for ideographic-comma DSML drift', () => { + const events = runSieve([ + '<、DSML、tool_calls>\n', + ' <、DSML、invoke name="Bash">\n', + " <、DSML、parameter name=\"command\"><、[CDATA[git commit -m \"$(cat <<'EOF'\n", + 'feat: expand fullwidth bang separator and curly quote tolerance in DSML tool parsing\n\n', + 'Co-Authored-By: Claude Opus 4.6 noreply@anthropic.com\n', + 'EOF\n', + ')"]]><、/DSML、parameter>\n', + ' <、DSML、parameter name="description"><、[CDATA[Create commit with staged changes]]><、/DSML、parameter>\n', + ' <、/DSML、invoke>\n', + '<、/DSML、tool_calls>', + ], ['Bash']); + const finalCalls = events.flatMap((evt) => (evt.type === 'tool_calls' ? evt.calls : [])); + assert.equal(finalCalls.length, 1); + assert.equal(finalCalls[0].name, 'Bash'); + assert.equal(finalCalls[0].input.command.includes('git commit -m'), true); + assert.equal(collectText(events), ''); +}); + test('sieve emits all-empty arbitrary-prefixed tool tags without leaking text', () => { const payload = [ '\n', From 247fc7c788b94c6f278e4f35966d96a04562351d Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 10 May 2026 04:24:10 +0800 Subject: [PATCH 2/3] refactor: unify tool markup pipe and CDATA separator into general-purpose separator detector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the hardcoded isToolMarkupPipe (matching |, |, ␂, \x02, !) and isToolCDATAOpenSeparator (exclusion-based) with a single isToolMarkupSeparator that treats any Unicode punctuation outside structural characters as a valid DSML separator. This eliminates the need for a per-character allowlist — novel separators like ※ are automatically supported without code changes. Also removes the unused cdataPattern regexp and updates docs to use "non-structural separator" terminology. Co-Authored-By: Claude Opus 4.6 --- API.en.md | 4 +- API.md | 4 +- docs/prompt-compatibility.md | 2 +- docs/toolcall-semantics.md | 4 +- .../stream-tool-sieve/parse_payload.js | 32 +++++++------- internal/toolcall/toolcalls_markup.go | 3 -- internal/toolcall/toolcalls_parse_markup.go | 12 +----- internal/toolcall/toolcalls_scan.go | 43 ++++++++++--------- internal/toolcall/toolcalls_test.go | 17 ++++++++ tests/node/stream-tool-sieve.test.js | 14 ++++++ 10 files changed, 78 insertions(+), 57 deletions(-) diff --git a/API.en.md b/API.en.md index 07fbf3d..16f3f99 100644 --- a/API.en.md +++ b/API.en.md @@ -40,7 +40,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, or `<、DSML、tool_calls>`, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, `<、DSML、tool_calls>`, or ``, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) with non-structural separators before or after them back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. - When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. - Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links. @@ -355,7 +355,7 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. +- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize non-structural separators back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. - The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. - If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. diff --git a/API.md b/API.md index 5ad14c0..c39c496 100644 --- a/API.md +++ b/API.md @@ -40,7 +40,7 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,前缀壳会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,标签名前或标签名后的非结构性分隔符会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 - 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 - 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 @@ -357,7 +357,7 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>`)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些前缀壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 +- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些非结构性分隔符壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 - 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 9d2494a..e0cb4a3 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 4. 把这整段内容并入 system prompt。 工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前任意协议前缀壳都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser。CDATA 开头也使用同一类扫描式容错,`` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`、``。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前或标签名后的非结构性协议分隔符都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser;结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。CDATA 开头也使用同一类扫描式容错,`...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index 409b942..23358e2 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -40,7 +40,7 @@ - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 - Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 -- 如果模型在固定工具标签名后多输出一个尾部管道符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|`,或在带属性标签的结束符前多输出一个尾部管道符(如 ``),兼容层会把这个尾部 `|` / `|` 当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。 +- 如果模型在固定工具标签名后多输出一个非结构性分隔符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|` / ``,或在带属性标签的结束符前多输出一个尾部分隔符(如 ``),兼容层会把这个尾部分隔符当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -54,7 +54,7 @@ 在流式链路中(Go / Node 一致): -- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部管道符形态(如 `<|DSML|tool_calls|`)和 canonical `` wrapper 都会进入结构化捕获 +- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部非结构性分隔符形态(如 `<|DSML|tool_calls|` / ``)和 canonical `` wrapper 都会进入结构化捕获 - 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 175223c..11b31a4 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -464,14 +464,7 @@ function matchCDATAOpenAt(text, start) { } function isCDATAOpenSeparator(ch) { - const normalized = normalizeFullwidthASCIIChar(ch || ''); - if (!normalized || ['<', '>', '/', '=', '"', "'", '['].includes(normalized)) { - return false; - } - if ([' ', '\t', '\n', '\r'].includes(normalized)) { - return false; - } - return !/^[A-Za-z0-9]$/.test(normalized); + return isToolMarkupSeparator(ch); } function findCDATAEnd(text, from) { @@ -528,21 +521,21 @@ function scanToolMarkupTagAt(text, start) { } const originalNameEnd = i + len; let nameEnd = originalNameEnd; - while (nameEnd < raw.length && isToolMarkupPipe(raw[nameEnd])) { + while (nameEnd < raw.length && isToolMarkupSeparator(raw[nameEnd])) { nameEnd += 1; } - const hasTrailingPipe = nameEnd > originalNameEnd; + const hasTrailingSeparator = nameEnd > originalNameEnd; if (!hasXmlTagBoundary(raw, nameEnd)) { return null; } let end = findXmlTagEnd(raw, nameEnd); if (end < 0) { - if (!hasTrailingPipe) { + if (!hasTrailingSeparator) { return null; } end = nameEnd - 1; } - if (hasTrailingPipe) { + if (hasTrailingSeparator) { const nextLT = raw.indexOf('<', nameEnd); if (nextLT >= 0 && end >= nextLT) { end = nameEnd - 1; @@ -635,8 +628,15 @@ function includeDuplicateLeadingLessThan(text, idx) { return out; } -function isToolMarkupPipe(ch) { - return ch === '|' || ch === '|' || ch === '␂' || ch === '\x02'; +function isToolMarkupSeparator(ch) { + const normalized = normalizeFullwidthASCIIChar(ch || ''); + if (!normalized || ['<', '>', '/', '=', '"', "'", '['].includes(normalized)) { + return false; + } + if ([' ', '\t', '\n', '\r'].includes(normalized)) { + return false; + } + return !/^[A-Za-z0-9]$/.test(normalized); } function isPartialToolMarkupTagPrefix(text) { @@ -755,7 +755,7 @@ function isToolMarkupTagTerminator(raw, idx) { } function consumeToolMarkupNamePrefixOnce(raw, lower, idx) { - if (idx < raw.length && isToolMarkupPipe(raw[idx])) { + if (idx < raw.length && isToolMarkupSeparator(raw[idx])) { return { next: idx + 1, ok: true }; } if (idx < raw.length && [' ', '\t', '\r', '\n'].includes(raw[idx])) { @@ -794,7 +794,7 @@ function consumeArbitraryToolMarkupNamePrefix(raw, lower, idx) { } let next = k; let ok = false; - if (next < raw.length && isToolMarkupPipe(raw[next])) { + if (next < raw.length && isToolMarkupSeparator(raw[next])) { next += 1; ok = true; } else if (next < raw.length && ['_', '-'].includes(normalizeFullwidthASCIIChar(raw[next]))) { diff --git a/internal/toolcall/toolcalls_markup.go b/internal/toolcall/toolcalls_markup.go index cc94256..08cf07e 100644 --- a/internal/toolcall/toolcalls_markup.go +++ b/internal/toolcall/toolcalls_markup.go @@ -9,9 +9,6 @@ import ( var toolCallMarkupKVPattern = regexp.MustCompile(`(?is)<(?:[a-z0-9_:-]+:)?([a-z0-9_\-.]+)\b[^>]*>(.*?)`) -// cdataPattern matches a standalone CDATA section. -var cdataPattern = regexp.MustCompile(`(?is)^(?:<|〈)(?:!|!)\[CDATA\[(.*?)]](?:>|>|〉)$`) - func parseMarkupKVObject(text string) map[string]any { matches := toolCallMarkupKVPattern.FindAllStringSubmatch(strings.TrimSpace(text), -1) if len(matches) == 0 { diff --git a/internal/toolcall/toolcalls_parse_markup.go b/internal/toolcall/toolcalls_parse_markup.go index 8c72f74..4660c50 100644 --- a/internal/toolcall/toolcalls_parse_markup.go +++ b/internal/toolcall/toolcalls_parse_markup.go @@ -247,17 +247,7 @@ func matchToolCDATAOpenAt(text string, start int) (int, bool) { } func isToolCDATAOpenSeparator(r rune) bool { - ch := normalizeFullwidthASCII(r) - if ch == 0 || ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '"' || ch == '\'' || ch == '[' { - return false - } - if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { - return false - } - if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { - return false - } - return true + return isToolMarkupSeparator(r) } func hasASCIIPrefixFoldAt(text string, start int, prefix string) bool { diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index 30b4e6a..e331e43 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -168,22 +168,22 @@ func scanToolMarkupTagAt(text string, start int) (ToolMarkupTag, bool) { dsmlLike = true } nameEnd := i + nameLen - nameEndBeforePipes := nameEnd - for next, ok := consumeToolMarkupPipe(text, nameEnd); ok; next, ok = consumeToolMarkupPipe(text, nameEnd) { + nameEndBeforeSeparators := nameEnd + for next, ok := consumeToolMarkupSeparator(text, nameEnd); ok; next, ok = consumeToolMarkupSeparator(text, nameEnd) { nameEnd = next } - hasTrailingPipe := nameEnd > nameEndBeforePipes + hasTrailingSeparator := nameEnd > nameEndBeforeSeparators if !hasToolMarkupBoundary(text, nameEnd) { return ToolMarkupTag{}, false } end := findXMLTagEnd(text, nameEnd) if end < 0 { - if !hasTrailingPipe { + if !hasTrailingSeparator { return ToolMarkupTag{}, false } end = nameEnd - 1 } - if hasTrailingPipe { + if hasTrailingSeparator { if nextLT := strings.IndexByte(text[nameEnd:], '<'); nextLT >= 0 && end >= nameEnd+nextLT { end = nameEnd - 1 } @@ -251,7 +251,7 @@ func consumeToolMarkupNamePrefix(text string, idx int) (int, bool) { } func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) { - if next, ok := consumeToolMarkupPipe(text, idx); ok { + if next, ok := consumeToolMarkupSeparator(text, idx); ok { return next, true } if idx < len(text) && (text[idx] == ' ' || text[idx] == '\t' || text[idx] == '\r' || text[idx] == '\n') { @@ -288,7 +288,7 @@ func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) { for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') { k++ } - next, ok := consumeToolMarkupPipe(text, k) + next, ok := consumeToolMarkupSeparator(text, k) if !ok { if sep, size := normalizedASCIIAt(text, k); sep == '_' || sep == '-' { next = k + size @@ -448,26 +448,29 @@ func isToolMarkupTagTerminator(text string, idx int) bool { return normalizeFullwidthASCII(r) == '>' } -func consumeToolMarkupPipe(text string, idx int) (int, bool) { +func consumeToolMarkupSeparator(text string, idx int) (int, bool) { if idx >= len(text) { return idx, false } - if text[idx] == '|' { - return idx + 1, true + r, size := utf8.DecodeRuneInString(text[idx:]) + if size <= 0 || !isToolMarkupSeparator(r) { + return idx, false } - if text[idx] == '\x02' { - return idx + 1, true + return idx + size, true +} + +func isToolMarkupSeparator(r rune) bool { + ch := normalizeFullwidthASCII(r) + if ch == 0 || ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '"' || ch == '\'' || ch == '[' { + return false } - if strings.HasPrefix(text[idx:], "|") { - return idx + len("|"), true + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + return false } - if strings.HasPrefix(text[idx:], "␂") { - return idx + len("␂"), true + if (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') { + return false } - if ch, size := normalizedASCIIAt(text, idx); ch == '!' { - return idx + size, true - } - return idx, false + return true } func consumeToolMarkupLessThan(text string, idx int) (int, bool) { diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 1171b8d..01d7d10 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -238,6 +238,23 @@ func TestParseToolCallsToleratesDSMLTrailingPipeTagTerminator(t *testing.T) { } } +func TestParseToolCallsToleratesDSMLTrailingNovelSeparatorTagTerminator(t *testing.T) { + text := strings.Join([]string{ + ``, + ` `, + ` `, + ` `, + ``, + }, "\n") + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected one trailing-separator DSML call, got %#v", calls) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "pwd" { + t.Fatalf("unexpected trailing-separator DSML parse result: %#v", calls[0]) + } +} + func TestParseToolCallsToleratesExtraLeadingLessThanBeforeDSML(t *testing.T) { text := `<<|DSML|tool_calls><<|DSML|invoke name="Bash"><<|DSML|parameter name="command">` calls := ParseToolCalls(text, []string{"Bash"}) diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 22ec81f..0e55fca 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -247,6 +247,20 @@ test('parseToolCalls tolerates DSML trailing pipe tag terminator', () => { assert.deepEqual(calls[0].input, { command: 'find "/home" -type d', timeout: 10 }); }); +test('parseToolCalls tolerates DSML trailing novel separator tag terminator', () => { + const payload = [ + '', + ' ', + ' ', + ' ', + '', + ].join('\n'); + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Bash'); + assert.deepEqual(calls[0].input, { command: 'pwd' }); +}); + test('parseToolCalls tolerates extra leading less-than before DSML tags', () => { const payload = [ '<<|DSML|tool_calls>', From 1aa791ec3a1fcb0ea7a7263cac3b8fc33939afaf Mon Sep 17 00:00:00 2001 From: CJACK Date: Sun, 10 May 2026 04:52:19 +0800 Subject: [PATCH 3/3] feat: support PascalCase local-name drift in DSML tool markup parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect camelCase→PascalCase boundaries between arbitrary prefixes and fixed local names (tool_calls/invoke/parameter), so that fused forms like are recognized without explicit separator characters. Also add the underscore-free alias "toolcalls" as a valid DSML local name. Includes lookalike rejection tests to ensure near-matches like are not falsely accepted. Co-Authored-By: Claude Opus 4.6 --- API.en.md | 4 +-- API.md | 4 +-- docs/prompt-compatibility.md | 2 +- docs/toolcall-semantics.md | 4 +-- .../stream-tool-sieve/parse_payload.js | 23 +++++++++++-- internal/toolcall/toolcalls_scan.go | 34 +++++++++++++++++-- internal/toolcall/toolcalls_test.go | 19 +++++++++++ tests/node/stream-tool-sieve.test.js | 17 ++++++++++ 8 files changed, 94 insertions(+), 13 deletions(-) diff --git a/API.en.md b/API.en.md index 16f3f99..4660188 100644 --- a/API.en.md +++ b/API.en.md @@ -40,7 +40,7 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, `<、DSML、tool_calls>`, or ``, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) with non-structural separators before or after them back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, `<、DSML、tool_calls>`, ``, or ``, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) with non-structural separators before or after them back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. - When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. - Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links. @@ -355,7 +355,7 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize non-structural separators back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. +- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize non-structural separators back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. - The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. - If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. diff --git a/API.md b/API.md index c39c496..9809eca 100644 --- a/API.md +++ b/API.md @@ -40,7 +40,7 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,标签名前或标签名后的非结构性分隔符会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,标签名前或标签名后的非结构性分隔符会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 - 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 - 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 @@ -357,7 +357,7 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / ``)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些非结构性分隔符壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 +- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些非结构性分隔符壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 - 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index e0cb4a3..fb03021 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 4. 把这整段内容并入 system prompt。 工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`、``。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前或标签名后的非结构性协议分隔符都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser;结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。CDATA 开头也使用同一类扫描式容错,`` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值、PascalCase 本地名和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`、``、``。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前或标签名后的非结构性协议分隔符都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser;结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。CDATA 开头也使用同一类扫描式容错,`...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index 23358e2..4deb80d 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -39,7 +39,7 @@ 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra`)仍按普通文本处理。 +- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值、PascalCase 本地名等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra` / `ToolCallsExtra`)仍按普通文本处理。 - 如果模型在固定工具标签名后多输出一个非结构性分隔符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|` / ``,或在带属性标签的结束符前多输出一个尾部分隔符(如 ``),兼容层会把这个尾部分隔符当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -103,7 +103,7 @@ go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./ - DSML `<|DSML|tool_calls>` wrapper 正常解析 - legacy canonical `` wrapper 正常解析 -- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、`<`、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`)正常解析 +- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`)正常解析 - 混搭标签(DSML wrapper + canonical inner)归一化后正常解析 - 波浪线围栏 `~~~` 内的示例不执行 - 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行 diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 11b31a4..a24bd62 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -5,6 +5,7 @@ const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; const TOOL_MARKUP_NAMES = [ { raw: 'tool_calls', canonical: 'tool_calls' }, { raw: 'tool-calls', canonical: 'tool_calls', dsmlOnly: true }, + { raw: 'toolcalls', canonical: 'tool_calls', dsmlOnly: true }, { raw: 'invoke', canonical: 'invoke' }, { raw: 'parameter', canonical: 'parameter' }, ]; @@ -698,7 +699,7 @@ function matchToolMarkupNameAfterArbitraryPrefix(raw, start) { for (const name of TOOL_MARKUP_NAMES) { const matched = matchNormalizedASCII(raw, idx, name.raw); if (!matched.ok) continue; - if (!toolMarkupPrefixAllowsLocalName(raw.slice(start, idx))) continue; + if (!toolMarkupPrefixAllowsLocalNameAt(raw, start, idx)) continue; return { ok: true, name: name.canonical, start: idx, len: matched.len }; } idx += 1; @@ -711,10 +712,10 @@ function hasPartialToolMarkupNameAfterArbitraryPrefix(raw, start) { if (isToolMarkupTagTerminator(raw, idx)) { return false; } - if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasToolMarkupNamePrefix(raw, idx)) { + if (toolMarkupPrefixAllowsLocalNameAt(raw, start, idx) && hasToolMarkupNamePrefix(raw, idx)) { return true; } - if (toolMarkupPrefixAllowsLocalName(raw.slice(start, idx)) && hasDSMLNamePrefixOrPartial(raw, idx)) { + if (toolMarkupPrefixAllowsLocalNameAt(raw, start, idx) && hasDSMLNamePrefixOrPartial(raw, idx)) { return true; } idx += 1; @@ -741,6 +742,22 @@ function toolMarkupPrefixAllowsLocalName(prefix) { return !/^[A-Za-z0-9]$/.test(previous); } +function toolMarkupPrefixAllowsLocalNameAt(raw, start, localStart) { + if (start < 0 || localStart <= start || localStart > raw.length) { + return false; + } + const prefix = raw.slice(start, localStart); + if (toolMarkupPrefixAllowsLocalName(prefix)) { + return true; + } + if (/[="'"]/.test(prefix)) { + return false; + } + const previous = normalizeFullwidthASCIIChar(prefix[prefix.length - 1] || ''); + const next = normalizeFullwidthASCIIChar(raw[localStart] || ''); + return /^[A-Za-z0-9]$/.test(previous) && /^[A-Z]$/.test(next); +} + function toolMarkupPrefixContainsSlash(prefix) { for (const ch of toStringSafe(prefix)) { if (normalizeFullwidthASCIIChar(ch) === '/') { diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index e331e43..39727d1 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -14,6 +14,7 @@ type toolMarkupNameAlias struct { var toolMarkupNames = []toolMarkupNameAlias{ {raw: "tool_calls", canonical: "tool_calls"}, {raw: "tool-calls", canonical: "tool_calls", dsmlOnly: true}, + {raw: "toolcalls", canonical: "tool_calls", dsmlOnly: true}, {raw: "invoke", canonical: "invoke"}, {raw: "parameter", canonical: "parameter"}, } @@ -369,7 +370,7 @@ func matchToolMarkupNameAfterArbitraryPrefix(text string, start int) (string, in if !ok { continue } - if !toolMarkupPrefixAllowsLocalName(text[start:idx]) { + if !toolMarkupPrefixAllowsLocalNameAt(text, start, idx) { continue } return name.canonical, idx, nameLen, true @@ -388,10 +389,10 @@ func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool { if isToolMarkupTagTerminator(text, idx) { return false } - if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasToolMarkupNamePrefix(text, idx) { + if toolMarkupPrefixAllowsLocalNameAt(text, start, idx) && hasToolMarkupNamePrefix(text, idx) { return true } - if toolMarkupPrefixAllowsLocalName(text[start:idx]) && hasDSMLNamePrefixOrPartial(text, idx) { + if toolMarkupPrefixAllowsLocalNameAt(text, start, idx) && hasDSMLNamePrefixOrPartial(text, idx) { return true } _, size := utf8.DecodeRuneInString(text[idx:]) @@ -403,6 +404,25 @@ func hasPartialToolMarkupNameAfterArbitraryPrefix(text string, start int) bool { return toolMarkupPrefixAllowsLocalName(text[start:]) } +func toolMarkupPrefixAllowsLocalNameAt(text string, start, localStart int) bool { + if start < 0 || localStart <= start || localStart > len(text) { + return false + } + prefix := text[start:localStart] + if toolMarkupPrefixAllowsLocalName(prefix) { + return true + } + if strings.ContainsAny(prefix, "=\"'") { + return false + } + prev, prevSize := utf8.DecodeLastRuneInString(prefix) + next, _ := utf8.DecodeRuneInString(text[localStart:]) + if prevSize <= 0 || next == utf8.RuneError { + return false + } + return isASCIIAlphaNumeric(normalizeFullwidthASCII(prev)) && isASCIIUpper(normalizeFullwidthASCII(next)) +} + func hasDSMLNamePrefixOrPartial(text string, start int) bool { return hasASCIIPrefixFoldAt(text, start, "dsml") || hasASCIIPartialPrefixFoldAt(text, start, "dsml") } @@ -437,6 +457,14 @@ func normalizedASCIILowerString(text string) string { return b.String() } +func isASCIIAlphaNumeric(r rune) bool { + return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') +} + +func isASCIIUpper(r rune) bool { + return r >= 'A' && r <= 'Z' +} + func isToolMarkupTagTerminator(text string, idx int) bool { if idx >= len(text) { return false diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 01d7d10..f706f9c 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -111,6 +111,25 @@ func TestParseToolCallsSupportsArbitraryPrefixedToolMarkup(t *testing.T) { } } +func TestParseToolCallsSupportsCamelPrefixedToolMarkup(t *testing.T) { + text := `` + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 1 { + t.Fatalf("expected one camel-prefixed tool call, got %#v", calls) + } + if calls[0].Name != "Bash" || calls[0].Input["command"] != "git push" || calls[0].Input["description"] != "Push dev branch to origin" { + t.Fatalf("unexpected camel-prefixed tool call: %#v", calls[0]) + } +} + +func TestParseToolCallsRejectsCamelPrefixedToolMarkupLookalike(t *testing.T) { + text := `git push` + calls := ParseToolCalls(text, []string{"Bash"}) + if len(calls) != 0 { + t.Fatalf("expected camel-prefixed lookalike to be ignored, got %#v", calls) + } +} + func TestParseToolCallsSupportsFullwidthDSMLShell(t *testing.T) { text := `<dSML|tool_calls> <dSML|invoke name="Read"> diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index 0e55fca..1de053d 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -112,6 +112,23 @@ test('parseToolCalls parses arbitrary-prefixed tool markup shells', () => { } }); +test('parseToolCalls parses camel-prefixed tool markup shell', () => { + const payload = ''; + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Bash'); + assert.deepEqual(calls[0].input, { + command: 'git push', + description: 'Push dev branch to origin', + }); +}); + +test('parseToolCalls ignores camel-prefixed tool markup lookalike', () => { + const payload = 'git push'; + const calls = parseToolCalls(payload, ['Bash']); + assert.equal(calls.length, 0); +}); + test('parseToolCalls parses fullwidth DSML shell drift', () => { const payload = `<dSML|tool_calls> <dSML|invoke name="Read">