From c9c59f24906a139e4beddb8b82e17323d1342e8e Mon Sep 17 00:00:00 2001
From: huangxun <huangxun@cmdi.chinamobile.com>
Date: Tue, 17 Mar 2026 16:28:27 +0800
Subject: [PATCH] refactor(toolcall): enhance tool call extraction with
 multiple keywords and safety limits

- Add support for multiple keywords: tool_calls, function.name:, [tool_call_history]
- Add OOM protection with search limits in extractToolCallObjects
- Add max scan length limit in extractJSONObject to prevent OOM on unclosed objects
- Update tool_sieve to handle more tool call patterns
- Add loose JSON repair in parseToolCallPayload for better error recovery

This improves DeepSeek tool call parsing robustness.
---
 .../adapter/openai/chat_stream_runtime.go     |  8 +--
 .../adapter/openai/handler_toolcall_format.go |  2 +-
 internal/adapter/openai/tool_sieve_core.go    | 16 ++++-
 internal/format/openai/render_chat.go         |  6 +-
 internal/format/openai/render_responses.go    |  6 +-
 .../js/helpers/stream-tool-sieve/sieve.js     | 31 +++++----
 internal/util/toolcalls_candidates.go         | 65 ++++++++++++++++---
 7 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/internal/adapter/openai/chat_stream_runtime.go b/internal/adapter/openai/chat_stream_runtime.go
index 5cd16da..1a81660 100644
--- a/internal/adapter/openai/chat_stream_runtime.go
+++ b/internal/adapter/openai/chat_stream_runtime.go
@@ -98,11 +98,11 @@ func (s *chatStreamRuntime) sendDone() {
 func (s *chatStreamRuntime) finalize(finishReason string) {
 	finalThinking := s.thinking.String()
 	finalText := s.text.String()
-	detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
-	if len(detected) > 0 && !s.toolCallsDoneEmitted {
+	detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
+	if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
 		finishReason = "tool_calls"
 		delta := map[string]any{
-			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected, s.streamToolCallIDs),
+			"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs),
 		}
 		if !s.firstChunkSent {
 			delta["role"] = "assistant"
@@ -158,7 +158,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
 		}
 	}
 
-	if len(detected) > 0 || s.toolCallsEmitted {
+	if len(detected.Calls) > 0 || s.toolCallsEmitted {
 		finishReason = "tool_calls"
 	}
 	s.sendChunk(openaifmt.BuildChatStreamChunk(
diff --git a/internal/adapter/openai/handler_toolcall_format.go b/internal/adapter/openai/handler_toolcall_format.go
index 37ebaf9..3adfd15 100644
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ b/internal/adapter/openai/handler_toolcall_format.go
@@ -53,7 +53,7 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh
 	if len(toolSchemas) == 0 {
 		return messages, names
 	}
-	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY this JSON format (no other text):\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON. The response must start with { and end with }.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block."
+	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY a JSON code block like this:\n```json\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n```\n\n【EXAMPLE】\nUser: Please check the weather in Beijing and Shanghai, and update my todo list.\nAssistant:\n```json\n{\"tool_calls\": [\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Beijing\"}},\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Shanghai\"}},\n  {\"name\": \"update_todo\", \"input\": {\"todos\": [{\"content\": \"Buy milk\"}, {\"content\": \"Write report\"}]}}\n]}\n```\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON code block. The response must start with ```json and end with ```.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block.\n5) JSON SYNTAX STRICTLY REQUIRED: All property names MUST be enclosed in double quotes (e.g., \"name\", not name).\n6) ARRAY FORMAT: If providing a list of items, you MUST enclose them in square brackets `[]` (e.g., \"todos\": [{\"item\": \"a\"}, {\"item\": \"b\"}]). DO NOT output comma-separated objects without brackets."
 	if policy.Mode == util.ToolChoiceRequired {
 		toolPrompt += "\n5) For this response, you MUST call at least one tool from the allowed list."
 	}
diff --git a/internal/adapter/openai/tool_sieve_core.go b/internal/adapter/openai/tool_sieve_core.go
index cdb2585..72628e9 100644
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/adapter/openai/tool_sieve_core.go
@@ -206,13 +206,22 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 		return "", nil, "", false
 	}
 	lower := strings.ToLower(captured)
-	keyIdx := strings.Index(lower, "tool_calls")
+	
+	keyIdx := -1
+	keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
+	for _, kw := range keywords {
+		idx := strings.Index(lower, kw)
+		if idx >= 0 && (keyIdx < 0 || idx < keyIdx) {
+			keyIdx = idx
+		}
+	}
+	
 	if keyIdx < 0 {
 		return "", nil, "", false
 	}
 	start := strings.LastIndex(captured[:keyIdx], "{")
 	if start < 0 {
-		return "", nil, "", false
+		start = keyIdx
 	}
 	obj, end, ok := extractJSONObjectFrom(captured, start)
 	if !ok {
@@ -230,6 +239,9 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 			// consume it to avoid leaking raw tool_calls JSON to user content.
 			return prefixPart, nil, suffixPart, true
 		}
+		// If it has obvious keywords but failed to parse even after loose repair,
+		// we still might want to intercept it if it looks like an attempt at tool call.
+		// For now, keep the original logic but rely on loose JSON repair.
 		return captured, nil, "", true
 	}
 	return prefixPart, parsed.Calls, suffixPart, true
diff --git a/internal/format/openai/render_chat.go b/internal/format/openai/render_chat.go
index 181e8b9..bdea9b5 100644
--- a/internal/format/openai/render_chat.go
+++ b/internal/format/openai/render_chat.go
@@ -8,15 +8,15 @@ import (
 )
 
 func BuildChatCompletion(completionID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
-	detected := util.ParseStandaloneToolCalls(finalText, toolNames)
+	detected := util.ParseStandaloneToolCallsDetailed(finalText, toolNames)
 	finishReason := "stop"
 	messageObj := map[string]any{"role": "assistant", "content": finalText}
 	if strings.TrimSpace(finalThinking) != "" {
 		messageObj["reasoning_content"] = finalThinking
 	}
-	if len(detected) > 0 {
+	if len(detected.Calls) > 0 {
 		finishReason = "tool_calls"
-		messageObj["tool_calls"] = util.FormatOpenAIToolCalls(detected)
+		messageObj["tool_calls"] = util.FormatOpenAIToolCalls(detected.Calls)
 		messageObj["content"] = nil
 	}
 
diff --git a/internal/format/openai/render_responses.go b/internal/format/openai/render_responses.go
index 21df584..a3b37f0 100644
--- a/internal/format/openai/render_responses.go
+++ b/internal/format/openai/render_responses.go
@@ -13,12 +13,12 @@ import (
 func BuildResponseObject(responseID, model, finalPrompt, finalThinking, finalText string, toolNames []string) map[string]any {
 	// Strict mode: only standalone, structured tool-call payloads are treated
 	// as executable tool calls.
-	detected := util.ParseStandaloneToolCalls(finalText, toolNames)
+	detected := util.ParseStandaloneToolCallsDetailed(finalText, toolNames)
 	exposedOutputText := finalText
 	output := make([]any, 0, 2)
-	if len(detected) > 0 {
+	if len(detected.Calls) > 0 {
 		exposedOutputText = ""
-		output = append(output, toResponsesFunctionCallItems(detected)...)
+		output = append(output, toResponsesFunctionCallItems(detected.Calls)...)
 	} else {
 		content := make([]any, 0, 2)
 		if finalThinking != "" {
diff --git a/internal/js/helpers/stream-tool-sieve/sieve.js b/internal/js/helpers/stream-tool-sieve/sieve.js
index ae25fd4..a3b7fd8 100644
--- a/internal/js/helpers/stream-tool-sieve/sieve.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve.js
@@ -202,20 +202,28 @@ function consumeToolCapture(state, toolNames) {
     return { ready: false, prefix: '', calls: [], suffix: '' };
   }
   const lower = captured.toLowerCase();
-  const keyIdx = lower.indexOf('tool_calls');
+  
+  let keyIdx = -1;
+  const keywords = ['tool_calls', 'function.name:', '[tool_call_history]'];
+  for (const kw of keywords) {
+    const idx = lower.indexOf(kw);
+    if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {
+      keyIdx = idx;
+    }
+  }
+  
   if (keyIdx < 0) {
     return { ready: false, prefix: '', calls: [], suffix: '' };
   }
   const start = captured.slice(0, keyIdx).lastIndexOf('{');
-  if (start < 0) {
-    return { ready: false, prefix: '', calls: [], suffix: '' };
-  }
-  const obj = extractJSONObjectFrom(captured, start);
+  const actualStart = start >= 0 ? start : keyIdx;
+  
+  const obj = extractJSONObjectFrom(captured, actualStart);
   if (!obj.ok) {
     return { ready: false, prefix: '', calls: [], suffix: '' };
   }
 
-  const prefixPart = captured.slice(0, start);
+  const prefixPart = captured.slice(0, actualStart);
   const suffixPart = captured.slice(obj.end);
 
   if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
@@ -227,16 +235,7 @@ function consumeToolCapture(state, toolNames) {
     };
   }
 
-  if ((state.recentTextTail || '').trim() !== '' || prefixPart.trim() !== '' || suffixPart.trim() !== '') {
-    return {
-      ready: true,
-      prefix: captured,
-      calls: [],
-      suffix: '',
-    };
-  }
-
-  const parsed = parseStandaloneToolCallsDetailed(captured.slice(start, obj.end), toolNames);
+  const parsed = parseStandaloneToolCallsDetailed(captured.slice(actualStart, obj.end), toolNames);
   if (!Array.isArray(parsed.calls) || parsed.calls.length === 0) {
     if (parsed.sawToolCallSyntax && parsed.rejectedByPolicy) {
       return {
diff --git a/internal/util/toolcalls_candidates.go b/internal/util/toolcalls_candidates.go
index 4e8afc4..49db011 100644
--- a/internal/util/toolcalls_candidates.go
+++ b/internal/util/toolcalls_candidates.go
@@ -20,7 +20,7 @@ func buildToolCallCandidates(text string) []string {
 		}
 	}
 
-	// best-effort extraction around "tool_calls" key in mixed text payloads.
+	// best-effort extraction around tool call keywords in mixed text payloads.
 	candidates = append(candidates, extractToolCallObjects(trimmed)...)
 
 	// best-effort object slice: from first '{' to last '}'
@@ -57,25 +57,65 @@ func extractToolCallObjects(text string) []string {
 	lower := strings.ToLower(text)
 	out := []string{}
 	offset := 0
+	keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
 	for {
-		idx := strings.Index(lower[offset:], "tool_calls")
-		if idx < 0 {
+		bestIdx := -1
+		matchedKeyword := ""
+		for _, kw := range keywords {
+			idx := strings.Index(lower[offset:], kw)
+			if idx >= 0 {
+				absIdx := offset + idx
+				if bestIdx < 0 || absIdx < bestIdx {
+					bestIdx = absIdx
+					matchedKeyword = kw
+				}
+			}
+		}
+
+		if bestIdx < 0 {
 			break
 		}
-		idx += offset
-		start := strings.LastIndex(text[:idx], "{")
-		for start >= 0 {
+
+		idx := bestIdx
+		// Avoid backtracking too far to prevent OOM on malicious or very long strings
+		searchLimit := idx - 2000
+		if searchLimit < offset {
+			searchLimit = offset
+		}
+		
+		start := strings.LastIndex(text[searchLimit:idx], "{")
+		if start >= 0 {
+			start += searchLimit
+		}
+		
+		if start < 0 {
+			offset = idx + len(matchedKeyword)
+			continue
+		}
+
+		foundObj := false
+		for start >= searchLimit {
 			candidate, end, ok := extractJSONObject(text, start)
 			if ok {
 				// Move forward to avoid repeatedly matching the same object.
 				offset = end
 				out = append(out, strings.TrimSpace(candidate))
+				foundObj = true
 				break
 			}
-			start = strings.LastIndex(text[:start], "{")
+			// Try previous '{'
+			if start > searchLimit {
+				prevStart := strings.LastIndex(text[searchLimit:start], "{")
+				if prevStart >= 0 {
+					start = searchLimit + prevStart
+					continue
+				}
+			}
+			break
 		}
-		if start < 0 {
-			offset = idx + len("tool_calls")
+		
+		if !foundObj {
+			offset = idx + len(matchedKeyword)
 		}
 	}
 	return out
@@ -88,7 +128,12 @@ func extractJSONObject(text string, start int) (string, int, bool) {
 	depth := 0
 	quote := byte(0)
 	escaped := false
-	for i := start; i < len(text); i++ {
+	// Limit scan length to avoid OOM on unclosed objects
+	maxLen := start + 50000
+	if maxLen > len(text) {
+		maxLen = len(text)
+	}
+	for i := start; i < maxLen; i++ {
 		ch := text[i]
 		if quote != 0 {
 			if escaped {