From 067cf465bba24ad6723ccd815da31625b2b98757 Mon Sep 17 00:00:00 2001 From: CJACK Date: Sat, 9 May 2026 23:16:07 +0800 Subject: [PATCH] feat: integrate reasoning content into assistant tool-call messages and improve tool markup parsing for prompt compatibility --- VERSION | 2 +- docs/prompt-compatibility.md | 6 +- internal/format/openai/render_responses.go | 12 +++ internal/format/openai/render_test.go | 20 ++++- internal/httpapi/claude/handler_util_test.go | 37 +++++++++ internal/httpapi/claude/handler_utils.go | 70 +++++++++++++++- internal/httpapi/gemini/convert_messages.go | 46 ++++++++++- .../httpapi/gemini/convert_messages_test.go | 41 ++++++++++ ...onses_stream_runtime_toolcalls_finalize.go | 16 ++++ .../stream-tool-sieve/parse_payload.js | 43 +++++++++- .../responses_input_items_test.go | 46 ++++++++++- .../promptcompat/responses_input_normalize.go | 79 +++++++++++++++++++ internal/toolcall/toolcalls_scan.go | 38 +++++++++ internal/toolcall/toolcalls_test.go | 39 +++++++++ tests/node/stream-tool-sieve.test.js | 32 ++++++++ 15 files changed, 513 insertions(+), 14 deletions(-) diff --git a/VERSION b/VERSION index b98ff4c..a84947d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.4.6 +4.5.0 diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index ee97b2a..f64d57b 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -168,7 +168,7 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 4. 把这整段内容并入 system prompt。 工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;DSML 标签会在解析入口归一化回现有 XML 标签后继续走同一套 parser。 +兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。需要注意:这是“兼容 DSML 外壳,内部仍以 XML 解析语义为准”,不是原生 DSML 全链路实现;这些别名会在解析入口归一化回现有 XML 标签后继续走同一套 parser。解析器会先截获非代码块中的疑似工具 wrapper,完整解析失败或工具语义无效时再按普通文本放行。 数组参数使用 `...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 @@ -205,6 +205,10 @@ assistant 的 reasoning 会变成一个显式标签块: 然后再接可见回答正文。 +对最终返回给客户端的 assistant 轮次,reasoning 不会因为本轮输出了工具调用而被丢弃。OpenAI Chat 会在同一个 assistant message 上同时返回 `reasoning_content` 和 `tool_calls`;OpenAI Responses 会先返回一个包含 `reasoning` content 的 assistant message item,再返回后续 `function_call` item;Claude / Gemini 也会在各自原生 thinking / thought 结构后继续返回 tool_use / functionCall。 + +对进入后续 prompt / `DS2API_HISTORY.txt` 的历史轮次,兼容层也会把同一轮工具调用前的 reasoning 绑定到 assistant tool call 历史上。OpenAI Chat 原生 `reasoning_content + tool_calls` 会直接保留;OpenAI Responses 若以 `reasoning` message item 后接 `function_call` item 的形式回放历史,会在归一化时合并为同一个 assistant 历史块;Claude 的 `thinking` block 会绑定到后续 `tool_use`;Gemini 的 `thought: true` part 会绑定到后续 `functionCall`。最终 prompt 中的顺序固定为 `[reasoning_content]...[/reasoning_content]`,再接 DSML tool call 外壳。 + ### 7.2 历史 tool_calls 保留方式 assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 DSML 外壳: diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 0c7dfbe..5635f96 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -21,6 +21,18 @@ func BuildResponseObjectWithToolCalls(responseID, model, finalPrompt, finalThink output := make([]any, 0, 2) if len(detected) > 0 { exposedOutputText = "" + if strings.TrimSpace(finalThinking) != "" { + output = append(output, map[string]any{ + "type": "message", + "id": "msg_" + strings.ReplaceAll(uuid.NewString(), "-", ""), + "role": "assistant", + "status": "completed", + "content": []any{map[string]any{ + "type": "reasoning", + "text": finalThinking, + }}, + }) + } output = append(output, toResponsesFunctionCallItems(detected, toolsRaw)...) } else { content := make([]any, 0, 2) diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index e174bd6..1c14f51 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -85,12 +85,24 @@ func TestBuildResponseObjectPromotesToolCallFromThinkingWhenTextEmpty(t *testing ) output, _ := obj["output"].([]any) - if len(output) != 1 { - t.Fatalf("expected one output item, got %#v", obj["output"]) + if len(output) != 2 { + t.Fatalf("expected reasoning message plus function_call output, got %#v", obj["output"]) } first, _ := output[0].(map[string]any) - if first["type"] != "function_call" { - t.Fatalf("expected function_call output, got %#v", first["type"]) + if first["type"] != "message" { + t.Fatalf("expected reasoning message output first, got %#v", first["type"]) + } + content, _ := first["content"].([]any) + if len(content) != 1 { + t.Fatalf("expected reasoning content, got %#v", first["content"]) + } + block0, _ := content[0].(map[string]any) + if block0["type"] != "reasoning" { + t.Fatalf("expected reasoning block, got %#v", block0["type"]) + } + second, _ := output[1].(map[string]any) + if second["type"] != "function_call" { + t.Fatalf("expected function_call output, got %#v", second["type"]) } } diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go index d69dc25..12572ae 100644 --- a/internal/httpapi/claude/handler_util_test.go +++ b/internal/httpapi/claude/handler_util_test.go @@ -101,6 +101,43 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) { } } +func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T) { + msgs := []any{ + map[string]any{ + "role": "assistant", + "content": []any{ + map[string]any{"type": "thinking", "thinking": "need live search before answering"}, + map[string]any{ + "type": "tool_use", + "id": "call_1", + "name": "search_web", + "input": map[string]any{"query": "latest"}, + }, + }, + }, + } + + got := normalizeClaudeMessages(msgs) + if len(got) != 1 { + t.Fatalf("expected one normalized tool-call message, got %#v", got) + } + m := got[0].(map[string]any) + if m["reasoning_content"] != "need live search before answering" { + t.Fatalf("expected thinking preserved as reasoning_content, got %#v", m) + } + tc, _ := m["tool_calls"].([]any) + if len(tc) != 1 { + t.Fatalf("expected one tool call, got %#v", m["tool_calls"]) + } + prompt := buildClaudePromptTokenText(got, true) + if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") { + t.Fatalf("expected thinking in prompt history, got %q", prompt) + } + if !containsStr(prompt, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected tool call in prompt history, got %q", prompt) + } +} + func TestNormalizeClaudeMessagesDoesNotPromoteUserToolUse(t *testing.T) { msgs := []any{ map[string]any{ diff --git a/internal/httpapi/claude/handler_utils.go b/internal/httpapi/claude/handler_utils.go index 5c53958..e3537b4 100644 --- a/internal/httpapi/claude/handler_utils.go +++ b/internal/httpapi/claude/handler_utils.go @@ -25,14 +25,21 @@ func normalizeClaudeMessages(messages []any) []any { switch content := msg["content"].(type) { case []any: textParts := make([]string, 0, len(content)) + pendingThinking := "" flushText := func() { if len(textParts) == 0 { return } - out = append(out, map[string]any{ + message := map[string]any{ "role": role, "content": strings.Join(textParts, "\n"), - }) + } + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + message["reasoning_content"] = pendingThinking + message["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(message["content"])) + pendingThinking = "" + } + out = append(out, message) textParts = textParts[:0] } for _, block := range content { @@ -46,10 +53,29 @@ func normalizeClaudeMessages(messages []any) []any { if t, ok := b["text"].(string); ok { textParts = append(textParts, t) } + case "thinking": + if role == "assistant" { + if thinking := extractClaudeThinkingBlockText(b); thinking != "" { + if pendingThinking == "" { + pendingThinking = thinking + } else { + pendingThinking += "\n" + thinking + } + } + continue + } + if raw := strings.TrimSpace(formatClaudeUnknownBlockForPrompt(b)); raw != "" { + textParts = append(textParts, raw) + } case "tool_use": if role == "assistant" { flushText() if toolMsg := normalizeClaudeToolUseToAssistant(b, state); toolMsg != nil { + if strings.TrimSpace(pendingThinking) != "" { + toolMsg["reasoning_content"] = pendingThinking + toolMsg["content"] = prependClaudeReasoningForPrompt(pendingThinking, safeStringValue(toolMsg["content"])) + pendingThinking = "" + } out = append(out, toolMsg) } continue @@ -69,6 +95,13 @@ func normalizeClaudeMessages(messages []any) []any { } } flushText() + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + out = append(out, map[string]any{ + "role": "assistant", + "reasoning_content": pendingThinking, + "content": formatClaudeReasoningForPrompt(pendingThinking), + }) + } default: copied := cloneMap(msg) out = append(out, copied) @@ -77,6 +110,39 @@ func normalizeClaudeMessages(messages []any) []any { return out } +func prependClaudeReasoningForPrompt(reasoning, content string) string { + reasoning = strings.TrimSpace(reasoning) + content = strings.TrimSpace(content) + if reasoning == "" { + return content + } + block := formatClaudeReasoningForPrompt(reasoning) + if content == "" { + return block + } + return block + "\n\n" + content +} + +func formatClaudeReasoningForPrompt(reasoning string) string { + reasoning = strings.TrimSpace(reasoning) + if reasoning == "" { + return "" + } + return "[reasoning_content]\n" + reasoning + "\n[/reasoning_content]" +} + +func extractClaudeThinkingBlockText(block map[string]any) string { + if block == nil { + return "" + } + for _, key := range []string{"thinking", "text", "content"} { + if text := strings.TrimSpace(safeStringValue(block[key])); text != "" { + return text + } + } + return "" +} + func buildClaudeToolPrompt(tools []any) string { toolSchemas := make([]string, 0, len(tools)) names := make([]string, 0, len(tools)) diff --git a/internal/httpapi/gemini/convert_messages.go b/internal/httpapi/gemini/convert_messages.go index f6af145..6dd8f50 100644 --- a/internal/httpapi/gemini/convert_messages.go +++ b/internal/httpapi/gemini/convert_messages.go @@ -44,14 +44,20 @@ func geminiMessagesFromRequest(req map[string]any) []any { } textParts := make([]string, 0, len(parts)) + pendingThinking := "" flushText := func() { if len(textParts) == 0 { return } - out = append(out, map[string]any{ + msg := map[string]any{ "role": role, "content": strings.Join(textParts, "\n"), - }) + } + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + msg["reasoning_content"] = pendingThinking + pendingThinking = "" + } + out = append(out, msg) textParts = textParts[:0] } @@ -61,6 +67,14 @@ func geminiMessagesFromRequest(req map[string]any) []any { continue } if text := strings.TrimSpace(asString(part["text"])); text != "" { + if role == "assistant" && isGeminiThoughtPart(part) { + if pendingThinking == "" { + pendingThinking = text + } else { + pendingThinking += "\n" + text + } + continue + } textParts = append(textParts, text) continue } @@ -75,7 +89,7 @@ func geminiMessagesFromRequest(req map[string]any) []any { } } lastToolCallIDByName[strings.ToLower(name)] = callID - out = append(out, map[string]any{ + msg := map[string]any{ "role": "assistant", "tool_calls": []any{ map[string]any{ @@ -87,7 +101,12 @@ func geminiMessagesFromRequest(req map[string]any) []any { }, }, }, - }) + } + if strings.TrimSpace(pendingThinking) != "" { + msg["reasoning_content"] = pendingThinking + pendingThinking = "" + } + out = append(out, msg) } continue } @@ -132,10 +151,29 @@ func geminiMessagesFromRequest(req map[string]any) []any { } } flushText() + if role == "assistant" && strings.TrimSpace(pendingThinking) != "" { + out = append(out, map[string]any{ + "role": "assistant", + "reasoning_content": pendingThinking, + }) + } } return out } +func isGeminiThoughtPart(part map[string]any) bool { + if part == nil { + return false + } + if v, ok := part["thought"].(bool); ok { + return v + } + if v, ok := part["thoughtSignature"].(string); ok && strings.TrimSpace(v) != "" { + return true + } + return false +} + func normalizeGeminiSystemInstruction(raw any) string { switch v := raw.(type) { case string: diff --git a/internal/httpapi/gemini/convert_messages_test.go b/internal/httpapi/gemini/convert_messages_test.go index a5191b9..6f0890f 100644 --- a/internal/httpapi/gemini/convert_messages_test.go +++ b/internal/httpapi/gemini/convert_messages_test.go @@ -1,6 +1,7 @@ package gemini import ( + "ds2api/internal/promptcompat" "strings" "testing" ) @@ -53,6 +54,46 @@ func TestGeminiMessagesFromRequestPreservesFunctionRoundtrip(t *testing.T) { } } +func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testing.T) { + req := map[string]any{ + "contents": []any{ + map[string]any{ + "role": "model", + "parts": []any{ + map[string]any{"text": "need current state before answering", "thought": true}, + map[string]any{ + "functionCall": map[string]any{ + "id": "call_g1", + "name": "search_web", + "args": map[string]any{"query": "ai"}, + }, + }, + }, + }, + }, + } + + got := geminiMessagesFromRequest(req) + if len(got) != 1 { + t.Fatalf("expected one normalized message, got %#v", got) + } + assistant, _ := got[0].(map[string]any) + if assistant["reasoning_content"] != "need current state before answering" { + t.Fatalf("expected thought preserved as reasoning_content, got %#v", assistant) + } + tc, _ := assistant["tool_calls"].([]any) + if len(tc) != 1 { + t.Fatalf("expected one tool call, got %#v", assistant["tool_calls"]) + } + prompt, _ := promptcompat.BuildOpenAIPromptForAdapter(got, nil, "", true) + if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") { + t.Fatalf("expected thought in prompt history, got %q", prompt) + } + if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected tool call in prompt history, got %q", prompt) + } +} + func TestGeminiMessagesFromRequestPreservesUnknownPartAsRawJSONText(t *testing.T) { req := map[string]any{ "contents": []any{ diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go index 2f03dd3..06d3673 100644 --- a/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go +++ b/internal/httpapi/openai/responses/responses_stream_runtime_toolcalls_finalize.go @@ -81,6 +81,22 @@ func (s *responsesStreamRuntime) buildCompletedResponseObject(finalThinking, fin }, }, }) + } else if len(calls) > 0 && strings.TrimSpace(finalThinking) != "" { + indexed = append(indexed, indexedItem{ + index: s.ensureMessageOutputIndex(), + item: map[string]any{ + "id": s.ensureMessageItemID(), + "type": "message", + "role": "assistant", + "status": "completed", + "content": []map[string]any{ + { + "type": "reasoning", + "text": finalThinking, + }, + }, + }, + }) } else if len(calls) == 0 { content := make([]map[string]any, 0, 2) if finalThinking != "" { diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index 6b8077e..658db88 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -616,14 +616,55 @@ function consumeToolMarkupNamePrefixOnce(raw, lower, idx) { } if (lower.startsWith('dsml', idx)) { let next = idx + 'dsml'.length; - if (next < raw.length && raw[next] === '-') { + if (next < raw.length && (raw[next] === '-' || raw[next] === '_')) { next += 1; } return { next, ok: true }; } + const arbitrary = consumeArbitraryToolMarkupNamePrefix(raw, lower, idx); + if (arbitrary.ok) { + return arbitrary; + } return { next: idx, ok: false }; } +function consumeArbitraryToolMarkupNamePrefix(raw, lower, idx) { + if (idx < 0 || idx >= raw.length || !isToolMarkupPrefixSegmentChar(raw[idx])) { + return { next: idx, ok: false }; + } + let j = idx + 1; + while (j < raw.length && isToolMarkupPrefixSegmentChar(raw[j])) { + j += 1; + } + let k = j; + while (k < raw.length && [' ', '\t', '\r', '\n'].includes(raw[k])) { + k += 1; + } + let next = k; + let ok = false; + if (next < raw.length && isToolMarkupPipe(raw[next])) { + next += 1; + ok = true; + } else if (next < raw.length && (raw[next] === '_' || raw[next] === '-')) { + next += 1; + ok = true; + } + if (!ok) { + return { next: idx, ok: false }; + } + while (next < raw.length && [' ', '\t', '\r', '\n'].includes(raw[next])) { + next += 1; + } + if (!hasToolMarkupNamePrefix(lower.slice(next))) { + return { next: idx, ok: false }; + } + return { next, ok: true }; +} + +function isToolMarkupPrefixSegmentChar(ch) { + return /^[A-Za-z0-9]$/.test(ch); +} + function hasToolMarkupNamePrefix(lowerTail) { for (const name of TOOL_MARKUP_NAMES) { if (lowerTail.startsWith(name.raw) || name.raw.startsWith(lowerTail)) { diff --git a/internal/promptcompat/responses_input_items_test.go b/internal/promptcompat/responses_input_items_test.go index 4a782f2..81c2157 100644 --- a/internal/promptcompat/responses_input_items_test.go +++ b/internal/promptcompat/responses_input_items_test.go @@ -1,6 +1,9 @@ package promptcompat -import "testing" +import ( + "strings" + "testing" +) func TestNormalizeResponsesInputItemPreservesAssistantReasoningContent(t *testing.T) { item := map[string]any{ @@ -48,3 +51,44 @@ func TestNormalizeResponsesInputItemAssistantMessageWithReasoningBlocks(t *testi t.Fatalf("expected content blocks preserved, got %#v", got["content"]) } } + +func TestNormalizeResponsesInputArrayMergesReasoningMessageIntoFunctionCallHistory(t *testing.T) { + input := []any{ + map[string]any{ + "type": "message", + "role": "assistant", + "content": []any{ + map[string]any{"type": "reasoning", "text": "need fresh docs before answering"}, + }, + }, + map[string]any{ + "type": "function_call", + "call_id": "call_search", + "name": "search_web", + "arguments": `{"query":"docs"}`, + }, + } + + got := NormalizeResponsesInputAsMessages(input) + if len(got) != 1 { + t.Fatalf("expected reasoning and function_call merged into one assistant message, got %#v", got) + } + msg, _ := got[0].(map[string]any) + if msg["role"] != "assistant" { + t.Fatalf("expected assistant message, got %#v", msg) + } + if msg["reasoning_content"] != "need fresh docs before answering" { + t.Fatalf("expected reasoning_content on tool-call message, got %#v", msg) + } + toolCalls, _ := msg["tool_calls"].([]any) + if len(toolCalls) != 1 { + t.Fatalf("expected one tool call, got %#v", msg["tool_calls"]) + } + history := BuildOpenAIHistoryTranscript(got) + if !strings.Contains(history, "[reasoning_content]\nneed fresh docs before answering\n[/reasoning_content]") { + t.Fatalf("expected reasoning in history transcript, got %q", history) + } + if !strings.Contains(history, `<|DSML|invoke name="search_web">`) { + t.Fatalf("expected tool call in history transcript, got %q", history) + } +} diff --git a/internal/promptcompat/responses_input_normalize.go b/internal/promptcompat/responses_input_normalize.go index e362d0e..1e099e3 100644 --- a/internal/promptcompat/responses_input_normalize.go +++ b/internal/promptcompat/responses_input_normalize.go @@ -61,19 +61,52 @@ func normalizeResponsesInputArray(items []any) []any { out := make([]any, 0, len(items)) callNameByID := map[string]string{} fallbackParts := make([]string, 0, len(items)) + pendingAssistantReasoning := "" flushFallback := func() { if len(fallbackParts) == 0 { return } + if pendingAssistantReasoning != "" { + out = append(out, map[string]any{"role": "assistant", "reasoning_content": pendingAssistantReasoning}) + pendingAssistantReasoning = "" + } out = append(out, map[string]any{"role": "user", "content": strings.Join(fallbackParts, "\n")}) fallbackParts = fallbackParts[:0] } + flushPendingReasoning := func() { + if pendingAssistantReasoning == "" { + return + } + out = append(out, map[string]any{"role": "assistant", "reasoning_content": pendingAssistantReasoning}) + pendingAssistantReasoning = "" + } for _, item := range items { switch x := item.(type) { case map[string]any: if msg := normalizeResponsesInputItemWithState(x, callNameByID); msg != nil { + if reasoning := assistantReasoningOnlyContent(msg); reasoning != "" { + if pendingAssistantReasoning == "" { + pendingAssistantReasoning = reasoning + } else { + pendingAssistantReasoning += "\n" + reasoning + } + continue + } + if isAssistantToolCallMessage(msg) && pendingAssistantReasoning != "" { + if strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"])) == "" { + msg["reasoning_content"] = pendingAssistantReasoning + } + pendingAssistantReasoning = "" + } else { + flushPendingReasoning() + } flushFallback() + if isAssistantToolCallMessage(msg) && len(out) > 0 { + if merged := mergeResponsesAssistantToolCalls(out[len(out)-1], msg); merged { + continue + } + } out = append(out, msg) continue } @@ -86,9 +119,55 @@ func normalizeResponsesInputArray(items []any) []any { } } } + flushPendingReasoning() flushFallback() if len(out) == 0 { return nil } return out } + +func assistantReasoningOnlyContent(msg map[string]any) string { + if !isAssistantMessage(msg) || isAssistantToolCallMessage(msg) { + return "" + } + if _, hasContent := msg["content"]; hasContent { + normalizedContent := strings.TrimSpace(NormalizeOpenAIContentForPrompt(msg["content"])) + reasoningFromContent := strings.TrimSpace(extractOpenAIReasoningContentFromMessage(msg["content"])) + if normalizedContent != "" && normalizedContent != reasoningFromContent { + return "" + } + if reasoningFromContent != "" { + return reasoningFromContent + } + } + return strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(msg["reasoning_content"])) +} + +func isAssistantMessage(msg map[string]any) bool { + return strings.EqualFold(strings.TrimSpace(asString(msg["role"])), "assistant") +} + +func isAssistantToolCallMessage(msg map[string]any) bool { + if !isAssistantMessage(msg) { + return false + } + toolCalls, ok := msg["tool_calls"].([]any) + return ok && len(toolCalls) > 0 +} + +func mergeResponsesAssistantToolCalls(prev any, next map[string]any) bool { + prevMsg, ok := prev.(map[string]any) + if !ok || !isAssistantToolCallMessage(prevMsg) || !isAssistantToolCallMessage(next) { + return false + } + prevCalls, _ := prevMsg["tool_calls"].([]any) + nextCalls, _ := next["tool_calls"].([]any) + prevMsg["tool_calls"] = append(prevCalls, nextCalls...) + if strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(prevMsg["reasoning_content"])) == "" { + if reasoning := strings.TrimSpace(normalizeOpenAIReasoningContentForPrompt(next["reasoning_content"])); reasoning != "" { + prevMsg["reasoning_content"] = reasoning + } + } + return true +} diff --git a/internal/toolcall/toolcalls_scan.go b/internal/toolcall/toolcalls_scan.go index 6acff6e..a28a188 100644 --- a/internal/toolcall/toolcalls_scan.go +++ b/internal/toolcall/toolcalls_scan.go @@ -242,9 +242,47 @@ func consumeToolMarkupNamePrefixOnce(text string, idx int) (int, bool) { } return next, true } + if next, ok := consumeArbitraryToolMarkupNamePrefix(text, idx); ok { + return next, true + } return idx, false } +func consumeArbitraryToolMarkupNamePrefix(text string, idx int) (int, bool) { + if idx < 0 || idx >= len(text) || !isToolMarkupPrefixSegmentByte(text[idx]) { + return idx, false + } + j := idx + 1 + for j < len(text) && isToolMarkupPrefixSegmentByte(text[j]) { + j++ + } + k := j + for k < len(text) && (text[k] == ' ' || text[k] == '\t' || text[k] == '\r' || text[k] == '\n') { + k++ + } + next, ok := consumeToolMarkupPipe(text, k) + if !ok { + if k < len(text) && (text[k] == '_' || text[k] == '-') { + next = k + 1 + ok = true + } + } + if !ok { + return idx, false + } + for next < len(text) && (text[next] == ' ' || text[next] == '\t' || text[next] == '\r' || text[next] == '\n') { + next++ + } + if !hasToolMarkupNamePrefix(text, next) { + return idx, false + } + return next, true +} + +func isToolMarkupPrefixSegmentByte(b byte) bool { + return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') +} + func hasASCIIPartialPrefixFoldAt(text string, start int, prefix string) bool { remain := len(text) - start if remain <= 0 || remain > len(prefix) { diff --git a/internal/toolcall/toolcalls_test.go b/internal/toolcall/toolcalls_test.go index 3cad720..e19c318 100644 --- a/internal/toolcall/toolcalls_test.go +++ b/internal/toolcall/toolcalls_test.go @@ -72,6 +72,45 @@ EOF } } +func TestParseToolCallsSupportsUnderscoredDSMLShell(t *testing.T) { + text := ` + + + + + + + +` + calls := ParseToolCalls(text, []string{"search_web", "eval_javascript"}) + if len(calls) != 2 { + t.Fatalf("expected two underscored DSML calls, got %#v", calls) + } + if calls[0].Name != "search_web" || calls[0].Input["query"] != "2026年5月 热点事件" || calls[0].Input["topic"] != "news" { + t.Fatalf("unexpected first underscored DSML call: %#v", calls[0]) + } + if calls[1].Name != "eval_javascript" || calls[1].Input["code"] != "1 + 1" { + t.Fatalf("unexpected second underscored DSML call: %#v", calls[1]) + } +} + +func TestParseToolCallsSupportsArbitraryPrefixedToolMarkup(t *testing.T) { + cases := []string{ + `README.md`, + `README.md`, + `README.md`, + } + for _, text := range cases { + calls := ParseToolCalls(text, []string{"Read"}) + if len(calls) != 1 { + t.Fatalf("expected one arbitrary-prefixed tool call for %q, got %#v", text, calls) + } + if calls[0].Name != "Read" || calls[0].Input["file_path"] != "README.md" { + t.Fatalf("unexpected arbitrary-prefixed parse result: %#v", calls[0]) + } + } +} + func TestParseToolCallsIgnoresBareHyphenatedToolCallsLookalike(t *testing.T) { text := `pwd` calls := ParseToolCalls(text, []string{"Bash"}) diff --git a/tests/node/stream-tool-sieve.test.js b/tests/node/stream-tool-sieve.test.js index eb9b5f3..ccc5e5d 100644 --- a/tests/node/stream-tool-sieve.test.js +++ b/tests/node/stream-tool-sieve.test.js @@ -80,6 +80,38 @@ EOF assert.equal(calls[0].input.command.includes('Co-Authored-By: Claude Opus 4.7'), true); }); +test('parseToolCalls parses underscored DSML shell (Vercel parity)', () => { + const payload = ` + + + + + + + +`; + const calls = parseToolCalls(payload, ['search_web', 'eval_javascript']); + assert.equal(calls.length, 2); + assert.equal(calls[0].name, 'search_web'); + assert.deepEqual(calls[0].input, { query: '2026年5月 热点事件', topic: 'news' }); + assert.equal(calls[1].name, 'eval_javascript'); + assert.deepEqual(calls[1].input, { code: '1 + 1' }); +}); + +test('parseToolCalls parses arbitrary-prefixed tool markup shells', () => { + const samples = [ + 'README.md', + 'README.md', + 'README.md', + ]; + for (const payload of samples) { + const calls = parseToolCalls(payload, ['Read']); + assert.equal(calls.length, 1); + assert.equal(calls[0].name, 'Read'); + assert.deepEqual(calls[0].input, { file_path: 'README.md' }); + } +}); + test('parseToolCalls ignores bare hyphenated tool_calls lookalike', () => { const payload = 'pwd'; const calls = parseToolCalls(payload, ['Bash']);