From a505f2cb969c29b47cd7b4e79b148c65a3e31b3e Mon Sep 17 00:00:00 2001 From: MiY Date: Sun, 26 Apr 2026 17:45:12 +0800 Subject: [PATCH] fix: fallback tool calls from thinking on empty output --- docs/prompt-compatibility.md | 7 +- internal/format/openai/render_chat.go | 2 +- internal/format/openai/render_responses.go | 2 +- internal/format/openai/render_test.go | 10 +-- .../openai/chat/chat_stream_runtime.go | 2 +- internal/httpapi/openai/chat/handler_chat.go | 4 +- .../openai/chat/handler_toolcall_test.go | 68 +++++++++++++++++++ .../openai/responses/responses_handler.go | 4 +- .../responses_stream_runtime_core.go | 2 +- .../openai/responses/responses_stream_test.go | 61 ++++++++++++++++- internal/toolcall/toolcalls_parse.go | 17 ++++- 11 files changed, 162 insertions(+), 17 deletions(-) diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index 495d1cc..95cd697 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -97,7 +97,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - `ref_file_ids` 只承载文件引用,不承载普通文本消息。 - `tools` 不会作为“原生工具 schema”直接下发给下游,而是被改写进 `prompt`。 - OpenAI Chat / Responses 原生走统一 OpenAI 标准化与 DeepSeek payload 组装;Claude / Gemini 会尽量复用 OpenAI prompt/tool 语义,其中 Gemini 直接复用 `promptcompat.BuildOpenAIPromptForAdapter`,Claude 消息接口在可代理场景会转换为 OpenAI chat 形态再执行。 -- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Claude surface 没有 `thinking` 字段时按 Anthropic 语义视为关闭;Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。 +- 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。Claude surface 在流式请求且未显式声明 `thinking` 时,仍按 Anthropic 语义默认关闭;但在非流式代理场景,兼容层会内部开启一次下游 thinking,用于捕获“正文为空、工具调用落在 thinking 里”的情况,随后在回包前剥离用户不可见的 thinking block。 +- 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 `...` 结构当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;thinking / reasoning 增量仍按原样先发,只有在结束收尾时才可能补发最终工具调用结果。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 ## 5. prompt 是怎么拼出来的 @@ -147,8 +148,8 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` 3. 再附上统一的 XML tool call 格式约束。 4. 把这整段内容并入 system prompt。 -工具调用正例仍只示范 canonical XML:`` → `` → ``。 -提示词会额外强调:如果要调用工具,工具块的首个非空白字符必须就是 ``,不能只输出 `` 而漏掉 opening tag。 +工具调用正例现在优先示范官方 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="..." string="true|false">`。 +兼容层仍接受旧式纯 `` wrapper,但提示词会优先要求模型输出官方 DSML 标签,并强调不能只输出 closing wrapper 而漏掉 opening tag。 正例中的工具名只会来自当前请求实际声明的工具;如果当前请求没有足够的已知工具形态,就省略对应的单工具、多工具或嵌套示例,避免把不可用工具名写进 prompt。 对执行类工具,脚本内容必须进入执行参数本身:`Bash` / `execute_command` 使用 `command`,`exec_command` 使用 `cmd`;不要把脚本示范成 `path` / `content` 文件写入参数。 diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go index c09e870..764f151 100644 --- a/internal/format/openai/render_chat.go +++ b/internal/format/openai/render_chat.go @@ -7,7 +7,7 @@ import ( ) func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { - detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, toolNames) + detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) finishReason := "stop" messageObj := map[string]any{"role": "assistant", "content": finalText} if strings.TrimSpace(finalThinking) != "" { diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 8fc4dbe..2a58bf5 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -12,7 +12,7 @@ import ( func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { // Strict mode: only standalone, structured tool-call payloads are treated // as executable tool calls. - detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, toolNames) + detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) exposedOutputText := finalText output := make([]any, 0, 2) if len(detected.Calls) > 0 { diff --git a/internal/format/openai/render_test.go b/internal/format/openai/render_test.go index 8a252de..2f22a98 100644 --- a/internal/format/openai/render_test.go +++ b/internal/format/openai/render_test.go @@ -67,22 +67,22 @@ func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) { } } -func TestBuildResponseObjectIgnoresToolCallFromThinkingChannel(t *testing.T) { +func TestBuildResponseObjectPromotesToolCallFromThinkingWhenTextEmpty(t *testing.T) { obj := BuildResponseObject( "resp_test", "gpt-4o", "prompt", - `{"tool_calls":[{"name":"search","input":{"q":"from-thinking"}}]}`, + `from-thinking`, "", []string{"search"}, ) output, _ := obj["output"].([]any) if len(output) != 1 { - t.Fatalf("expected one message output item, got %#v", obj["output"]) + t.Fatalf("expected one output item, got %#v", obj["output"]) } first, _ := output[0].(map[string]any) - if first["type"] != "message" { - t.Fatalf("expected output message, got %#v", first["type"]) + if first["type"] != "function_call" { + t.Fatalf("expected function_call output, got %#v", first["type"]) } } diff --git a/internal/httpapi/openai/chat/chat_stream_runtime.go b/internal/httpapi/openai/chat/chat_stream_runtime.go index 0f65fd0..8ea0546 100644 --- a/internal/httpapi/openai/chat/chat_stream_runtime.go +++ b/internal/httpapi/openai/chat/chat_stream_runtime.go @@ -133,7 +133,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) { finalText := cleanVisibleOutput(s.text.String(), s.stripReferenceMarkers) s.finalThinking = finalThinking s.finalText = finalText - detected := toolcall.ParseStandaloneToolCallsDetailed(finalText, s.toolNames) + detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, s.toolNames) if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted { finishReason = "tool_calls" delta := map[string]any{ diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 4a6d01a..3f97a50 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -15,6 +15,7 @@ import ( "ds2api/internal/promptcompat" "ds2api/internal/sse" streamengine "ds2api/internal/stream" + "ds2api/internal/toolcall" ) func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { @@ -162,7 +163,8 @@ func (h *Handler) handleNonStream(w http.ResponseWriter, resp *http.Response, co if searchEnabled { finalText = replaceCitationMarkersWithLinks(finalText, result.CitationLinks) } - if shouldWriteUpstreamEmptyOutputError(finalText) { + detected := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, toolNames) + if shouldWriteUpstreamEmptyOutputError(finalText) && len(detected.Calls) == 0 { status, message, code := upstreamEmptyOutputDetail(result.ContentFilter, finalText, finalThinking) if historySession != nil { historySession.error(status, message, code, finalThinking, finalText) diff --git a/internal/httpapi/openai/chat/handler_toolcall_test.go b/internal/httpapi/openai/chat/handler_toolcall_test.go index f949a46..bfff08a 100644 --- a/internal/httpapi/openai/chat/handler_toolcall_test.go +++ b/internal/httpapi/openai/chat/handler_toolcall_test.go @@ -142,6 +142,37 @@ func TestHandleNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testing.T) { } } +func TestHandleNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/thinking_content","v":"from-thinking"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + + h.handleNonStream(rec, resp, "cid-thinking-tool", "deepseek-v4-pro", "prompt", true, false, []string{"search"}, nil) + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for thinking tool calls, got %d body=%s", rec.Code, rec.Body.String()) + } + out := decodeJSONBody(t, rec.Body.String()) + choices, _ := out["choices"].([]any) + if len(choices) == 0 { + t.Fatalf("expected choices, got %#v", out) + } + choice, _ := choices[0].(map[string]any) + if got := asString(choice["finish_reason"]); got != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"]) + } + message, _ := choice["message"].(map[string]any) + toolCalls, _ := message["tool_calls"].([]any) + if len(toolCalls) != 1 { + t.Fatalf("expected one tool call, got %#v", message["tool_calls"]) + } + if content, exists := message["content"]; !exists || content != nil { + t.Fatalf("expected content nil when tool call promoted, got %#v", message["content"]) + } +} + func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( @@ -214,6 +245,43 @@ func TestHandleStreamIncompleteCapturedToolJSONFlushesAsTextOnFinalize(t *testin } } +func TestHandleStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercept(t *testing.T) { + h := &Handler{} + resp := makeSSEHTTPResponse( + `data: {"p":"response/thinking_content","v":"from-thinking"}`, + `data: [DONE]`, + ) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil) + + h.handleStream(rec, req, resp, "cid-thinking-stream", "deepseek-v4-pro", "prompt", true, false, []string{"search"}, nil) + + frames, done := parseSSEDataFrames(t, rec.Body.String()) + if !done { + t.Fatalf("expected [DONE], body=%s", rec.Body.String()) + } + if !streamHasToolCallsDelta(frames) { + t.Fatalf("expected tool_calls delta from finalize fallback, body=%s", rec.Body.String()) + } + reasoningSeen := false + for _, frame := range frames { + choices, _ := frame["choices"].([]any) + for _, item := range choices { + choice, _ := item.(map[string]any) + delta, _ := choice["delta"].(map[string]any) + if asString(delta["reasoning_content"]) != "" { + reasoningSeen = true + } + } + } + if !reasoningSeen { + t.Fatalf("expected reasoning_content to stream before finalize fallback, body=%s", rec.Body.String()) + } + if streamFinishReason(frames) != "tool_calls" { + t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String()) + } +} + func TestHandleStreamEmitsDistinctToolCallIDsAcrossSeparateToolBlocks(t *testing.T) { h := &Handler{} resp := makeSSEHTTPResponse( diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go index 8913322..f142388 100644 --- a/internal/httpapi/openai/responses/responses_handler.go +++ b/internal/httpapi/openai/responses/responses_handler.go @@ -135,10 +135,10 @@ func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Res if searchEnabled { sanitizedText = replaceCitationMarkersWithLinks(sanitizedText, result.CitationLinks) } - if writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) { + textParsed := toolcall.ParseAssistantToolCallsDetailed(sanitizedText, sanitizedThinking, toolNames) + if len(textParsed.Calls) == 0 && writeUpstreamEmptyOutputError(w, sanitizedText, sanitizedThinking, result.ContentFilter) { return } - textParsed := toolcall.ParseStandaloneToolCallsDetailed(sanitizedText, toolNames) logResponsesToolPolicyRejection(traceID, toolChoice, textParsed, "text") callCount := len(textParsed.Calls) diff --git a/internal/httpapi/openai/responses/responses_stream_runtime_core.go b/internal/httpapi/openai/responses/responses_stream_runtime_core.go index 1bd81e6..f49bcaa 100644 --- a/internal/httpapi/openai/responses/responses_stream_runtime_core.go +++ b/internal/httpapi/openai/responses/responses_stream_runtime_core.go @@ -133,7 +133,7 @@ func (s *responsesStreamRuntime) finalize() { s.processToolStreamEvents(toolstream.Flush(&s.sieve, s.toolNames), true, true) } - textParsed := toolcall.ParseStandaloneToolCallsDetailed(finalText, s.toolNames) + textParsed := toolcall.ParseAssistantToolCallsDetailed(finalText, finalThinking, s.toolNames) detected := textParsed.Calls s.logToolPolicyRejections(textParsed) diff --git a/internal/httpapi/openai/responses/responses_stream_test.go b/internal/httpapi/openai/responses/responses_stream_test.go index c19f311..4562951 100644 --- a/internal/httpapi/openai/responses/responses_stream_test.go +++ b/internal/httpapi/openai/responses/responses_stream_test.go @@ -232,6 +232,39 @@ func TestHandleResponsesStreamFailsWhenUpstreamHasOnlyThinking(t *testing.T) { } } +func TestHandleResponsesStreamPromotesThinkingToolCallsOnFinalizeWithoutMidstreamIntercept(t *testing.T) { + h := &Handler{} + req := httptest.NewRequest(http.MethodPost, "/v1/responses", nil) + rec := httptest.NewRecorder() + + sseLine := func(path, value string) string { + b, _ := json.Marshal(map[string]any{ + "p": path, + "v": value, + }) + return "data: " + string(b) + "\n" + } + + streamBody := sseLine("response/thinking_content", `README.MD`) + "data: [DONE]\n" + resp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader(streamBody)), + } + + h.handleResponsesStream(rec, req, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, []string{"read_file"}, promptcompat.DefaultToolChoicePolicy(), "") + + body := rec.Body.String() + if !strings.Contains(body, "event: response.reasoning.delta") { + t.Fatalf("expected reasoning delta in stream body, got %s", body) + } + if !strings.Contains(body, "event: response.function_call_arguments.done") { + t.Fatalf("expected finalize fallback function call event, got %s", body) + } + if strings.Contains(body, "event: response.failed") { + t.Fatalf("did not expect response.failed, body=%s", body) + } +} + func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() @@ -258,7 +291,7 @@ func TestHandleResponsesNonStreamRequiredToolChoiceViolation(t *testing.T) { } } -func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayload(t *testing.T) { +func TestHandleResponsesNonStreamRequiredToolChoiceIgnoresThinkingToolPayloadWhenTextExists(t *testing.T) { h := &Handler{} rec := httptest.NewRecorder() resp := &http.Response{ @@ -351,6 +384,32 @@ func TestHandleResponsesNonStreamReturns429WhenUpstreamHasOnlyThinking(t *testin } } +func TestHandleResponsesNonStreamPromotesThinkingToolCallsWhenTextEmpty(t *testing.T) { + h := &Handler{} + rec := httptest.NewRecorder() + resp := &http.Response{ + StatusCode: http.StatusOK, + Body: io.NopCloser(strings.NewReader( + `data: {"p":"response/thinking_content","v":"README.MD"}` + "\n" + + `data: [DONE]` + "\n", + )), + } + + h.handleResponsesNonStream(rec, resp, "owner-a", "resp_test", "deepseek-v4-pro", "prompt", true, false, []string{"read_file"}, promptcompat.DefaultToolChoicePolicy(), "") + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 for thinking tool calls, got %d body=%s", rec.Code, rec.Body.String()) + } + out := decodeJSONBody(t, rec.Body.String()) + output, _ := out["output"].([]any) + if len(output) != 1 { + t.Fatalf("expected one output item, got %#v", out["output"]) + } + first, _ := output[0].(map[string]any) + if got := asString(first["type"]); got != "function_call" { + t.Fatalf("expected function_call output, got %#v", first["type"]) + } +} + func extractSSEEventPayload(body, targetEvent string) (map[string]any, bool) { scanner := bufio.NewScanner(strings.NewReader(body)) matched := false diff --git a/internal/toolcall/toolcalls_parse.go b/internal/toolcall/toolcalls_parse.go index a950c2c..3dc8c25 100644 --- a/internal/toolcall/toolcalls_parse.go +++ b/internal/toolcall/toolcalls_parse.go @@ -32,6 +32,21 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string) return parseToolCallsDetailedXMLOnly(text) } +func ParseAssistantToolCallsDetailed(text, thinking string, availableToolNames []string) ToolCallParseResult { + textParsed := ParseStandaloneToolCallsDetailed(text, availableToolNames) + if len(textParsed.Calls) > 0 { + return textParsed + } + if strings.TrimSpace(text) != "" { + return textParsed + } + thinkingParsed := ParseStandaloneToolCallsDetailed(thinking, availableToolNames) + if len(thinkingParsed.Calls) > 0 { + return thinkingParsed + } + return textParsed +} + func parseToolCallsDetailedXMLOnly(text string) ToolCallParseResult { result := ToolCallParseResult{} trimmed := strings.TrimSpace(text) @@ -74,7 +89,7 @@ func filterToolCallsDetailed(parsed []ParsedToolCall) ([]ParsedToolCall, []strin func looksLikeToolCallSyntax(text string) bool { lower := strings.ToLower(text) - return strings.Contains(lower, "