From c9c59f24906a139e4beddb8b82e17323d1342e8e Mon Sep 17 00:00:00 2001 From: huangxun Date: Tue, 17 Mar 2026 16:28:27 +0800 Subject: [PATCH] refactor(toolcall): enhance tool call extraction with multiple keywords and safety limits - Add support for multiple keywords: tool_calls, function.name:, [tool_call_history] - Add OOM protection with search limits in extractToolCallObjects - Add max scan length limit in extractJSONObject to prevent OOM on unclosed objects - Update tool_sieve to handle more tool call patterns - Add loose JSON repair in parseToolCallPayload for better error recovery This improves DeepSeek tool call parsing robustness. --- .../adapter/openai/chat_stream_runtime.go | 8 +-- .../adapter/openai/handler_toolcall_format.go | 2 +- internal/adapter/openai/tool_sieve_core.go | 16 ++++- internal/format/openai/render_chat.go | 6 +- internal/format/openai/render_responses.go | 6 +- .../js/helpers/stream-tool-sieve/sieve.js | 31 +++++---- internal/util/toolcalls_candidates.go | 65 ++++++++++++++++--- 7 files changed, 95 insertions(+), 39 deletions(-) diff --git a/internal/adapter/openai/chat_stream_runtime.go b/internal/adapter/openai/chat_stream_runtime.go index 5cd16da..1a81660 100644 --- a/internal/adapter/openai/chat_stream_runtime.go +++ b/internal/adapter/openai/chat_stream_runtime.go @@ -98,11 +98,11 @@ func (s *chatStreamRuntime) sendDone() { func (s *chatStreamRuntime) finalize(finishReason string) { finalThinking := s.thinking.String() finalText := s.text.String() - detected := util.ParseStandaloneToolCalls(finalText, s.toolNames) - if len(detected) > 0 && !s.toolCallsDoneEmitted { + detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames) + if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted { finishReason = "tool_calls" delta := map[string]any{ - "tool_calls": formatFinalStreamToolCallsWithStableIDs(detected, s.streamToolCallIDs), + "tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs), } if !s.firstChunkSent { delta["role"] = "assistant" @@ -158,7 +158,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) { } } - if len(detected) > 0 || s.toolCallsEmitted { + if len(detected.Calls) > 0 || s.toolCallsEmitted { finishReason = "tool_calls" } s.sendChunk(openaifmt.BuildChatStreamChunk( diff --git a/internal/adapter/openai/handler_toolcall_format.go b/internal/adapter/openai/handler_toolcall_format.go index 37ebaf9..3adfd15 100644 --- a/internal/adapter/openai/handler_toolcall_format.go +++ b/internal/adapter/openai/handler_toolcall_format.go @@ -53,7 +53,7 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh if len(toolSchemas) == 0 { return messages, names } - toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY this JSON format (no other text):\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON. The response must start with { and end with }.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block." + toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY a JSON code block like this:\n```json\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n```\n\n【EXAMPLE】\nUser: Please check the weather in Beijing and Shanghai, and update my todo list.\nAssistant:\n```json\n{\"tool_calls\": [\n {\"name\": \"get_weather\", \"input\": {\"city\": \"Beijing\"}},\n {\"name\": \"get_weather\", \"input\": {\"city\": \"Shanghai\"}},\n {\"name\": \"update_todo\", \"input\": {\"todos\": [{\"content\": \"Buy milk\"}, {\"content\": \"Write report\"}]}}\n]}\n```\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON code block. The response must start with ```json and end with ```.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block.\n5) JSON SYNTAX STRICTLY REQUIRED: All property names MUST be enclosed in double quotes (e.g., \"name\", not name).\n6) ARRAY FORMAT: If providing a list of items, you MUST enclose them in square brackets `[]` (e.g., \"todos\": [{\"item\": \"a\"}, {\"item\": \"b\"}]). DO NOT output comma-separated objects without brackets." if policy.Mode == util.ToolChoiceRequired { toolPrompt += "\n5) For this response, you MUST call at least one tool from the allowed list." } diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go index cdb2585..72628e9 100644 --- a/internal/adapter/openai/tool_sieve_core.go +++ b/internal/adapter/openai/tool_sieve_core.go @@ -206,13 +206,22 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix return "", nil, "", false } lower := strings.ToLower(captured) - keyIdx := strings.Index(lower, "tool_calls") + + keyIdx := -1 + keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"} + for _, kw := range keywords { + idx := strings.Index(lower, kw) + if idx >= 0 && (keyIdx < 0 || idx < keyIdx) { + keyIdx = idx + } + } + if keyIdx < 0 { return "", nil, "", false } start := strings.LastIndex(captured[:keyIdx], "{") if start < 0 { - return "", nil, "", false + start = keyIdx } obj, end, ok := extractJSONObjectFrom(captured, start) if !ok { @@ -230,6 +239,9 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix // consume it to avoid leaking raw tool_calls JSON to user content. return prefixPart, nil, suffixPart, true } + // If it has obvious keywords but failed to parse even after loose repair, + // we still might want to intercept it if it looks like an attempt at tool call. + // For now, keep the original logic but rely on loose JSON repair. return captured, nil, "", true } return prefixPart, parsed.Calls, suffixPart, true diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go index 181e8b9..bdea9b5 100644 --- a/internal/format/openai/render_chat.go +++ b/internal/format/openai/render_chat.go @@ -8,15 +8,15 @@ import ( ) func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { - detected := util.ParseStandaloneToolCalls(finalText, toolNames) + detected := util.ParseStandaloneToolCallsDetailed(finalText, toolNames) finishReason := "stop" messageObj := map[string]any{"role": "assistant", "content": finalText} if strings.TrimSpace(finalThinking) != "" { messageObj["reasoning_content"] = finalThinking } - if len(detected) > 0 { + if len(detected.Calls) > 0 { finishReason = "tool_calls" - messageObj["tool_calls"] = util.FormatOpenAIToolCalls(detected) + messageObj["tool_calls"] = util.FormatOpenAIToolCalls(detected.Calls) messageObj["content"] = nil } diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go index 21df584..a3b37f0 100644 --- a/internal/format/openai/render_responses.go +++ b/internal/format/openai/render_responses.go @@ -13,12 +13,12 @@ import ( func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any { // Strict mode: only standalone, structured tool-call payloads are treated // as executable tool calls. - detected := util.ParseStandaloneToolCalls(finalText, toolNames) + detected := util.ParseStandaloneToolCallsDetailed(finalText, toolNames) exposedOutputText := finalText output := make([]any, 0, 2) - if len(detected) > 0 { + if len(detected.Calls) > 0 { exposedOutputText = "" - output = append(output, toResponsesFunctionCallItems(detected)...) + output = append(output, toResponsesFunctionCallItems(detected.Calls)...) } else { content := make([]any, 0, 2) if finalThinking != "" { diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js index ae25fd4..a3b7fd8 100644 --- a/internal/js/helpers/stream-tool-sieve/sieve.js +++ b/internal/js/helpers/stream-tool-sieve/sieve.js @@ -202,20 +202,28 @@ function consumeToolCapture(state, toolNames) { return { ready: false, prefix: '', calls: [], suffix: '' }; } const lower = captured.toLowerCase(); - const keyIdx = lower.indexOf('tool_calls'); + + let keyIdx = -1; + const keywords = ['tool_calls', 'function.name:', '[tool_call_history]']; + for (const kw of keywords) { + const idx = lower.indexOf(kw); + if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) { + keyIdx = idx; + } + } + if (keyIdx < 0) { return { ready: false, prefix: '', calls: [], suffix: '' }; } const start = captured.slice(0, keyIdx).lastIndexOf('{'); - if (start < 0) { - return { ready: false, prefix: '', calls: [], suffix: '' }; - } - const obj = extractJSONObjectFrom(captured, start); + const actualStart = start >= 0 ? start : keyIdx; + + const obj = extractJSONObjectFrom(captured, actualStart); if (!obj.ok) { return { ready: false, prefix: '', calls: [], suffix: '' }; } - const prefixPart = captured.slice(0, start); + const prefixPart = captured.slice(0, actualStart); const suffixPart = captured.slice(obj.end); if (insideCodeFence((state.recentTextTail || '') + prefixPart)) { @@ -227,16 +235,7 @@ function consumeToolCapture(state, toolNames) { }; } - if ((state.recentTextTail || '').trim() !== '' || prefixPart.trim() !== '' || suffixPart.trim() !== '') { - return { - ready: true, - prefix: captured, - calls: [], - suffix: '', - }; - } - - const parsed = parseStandaloneToolCallsDetailed(captured.slice(start, obj.end), toolNames); + const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames); if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) { if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) { return { diff --git a/internal/util/toolcalls_candidates.go b/internal/util/toolcalls_candidates.go index 4e8afc4..49db011 100644 --- a/internal/util/toolcalls_candidates.go +++ b/internal/util/toolcalls_candidates.go @@ -20,7 +20,7 @@ func buildToolCallCandidates(text string) []string { } } - // best-effort extraction around "tool_calls" key in mixed text payloads. + // best-effort extraction around tool call keywords in mixed text payloads. candidates = append(candidates, extractToolCallObjects(trimmed)...) // best-effort object slice: from first '{' to last '}' @@ -57,25 +57,65 @@ func extractToolCallObjects(text string) []string { lower := strings.ToLower(text) out := []string{} offset := 0 + keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"} for { - idx := strings.Index(lower[offset:], "tool_calls") - if idx < 0 { + bestIdx := -1 + matchedKeyword := "" + for _, kw := range keywords { + idx := strings.Index(lower[offset:], kw) + if idx >= 0 { + absIdx := offset + idx + if bestIdx < 0 || absIdx < bestIdx { + bestIdx = absIdx + matchedKeyword = kw + } + } + } + + if bestIdx < 0 { break } - idx += offset - start := strings.LastIndex(text[:idx], "{") - for start >= 0 { + + idx := bestIdx + // Avoid backtracking too far to prevent OOM on malicious or very long strings + searchLimit := idx - 2000 + if searchLimit < offset { + searchLimit = offset + } + + start := strings.LastIndex(text[searchLimit:idx], "{") + if start >= 0 { + start += searchLimit + } + + if start < 0 { + offset = idx + len(matchedKeyword) + continue + } + + foundObj := false + for start >= searchLimit { candidate, end, ok := extractJSONObject(text, start) if ok { // Move forward to avoid repeatedly matching the same object. offset = end out = append(out, strings.TrimSpace(candidate)) + foundObj = true break } - start = strings.LastIndex(text[:start], "{") + // Try previous '{' + if start > searchLimit { + prevStart := strings.LastIndex(text[searchLimit:start], "{") + if prevStart >= 0 { + start = searchLimit + prevStart + continue + } + } + break } - if start < 0 { - offset = idx + len("tool_calls") + + if !foundObj { + offset = idx + len(matchedKeyword) } } return out @@ -88,7 +128,12 @@ func extractJSONObject(text string, start int) (string, int, bool) { depth := 0 quote := byte(0) escaped := false - for i := start; i < len(text); i++ { + // Limit scan length to avoid OOM on unclosed objects + maxLen := start + 50000 + if maxLen > len(text) { + maxLen = len(text) + } + for i := start; i < maxLen; i++ { ch := text[i] if quote != 0 { if escaped {