refactor(toolcall): enhance tool call extraction with multiple keywords and safety limits

- Add support for multiple keywords: tool_calls, function.name:, [tool_call_history]
- Add OOM protection with search limits in extractToolCallObjects
- Add max scan length limit in extractJSONObject to prevent OOM on unclosed objects
- Update tool_sieve to handle more tool call patterns
- Add loose JSON repair in parseToolCallPayload for better error recovery

This improves DeepSeek tool call parsing robustness.
This commit is contained in:
huangxun
2026-03-17 16:28:27 +08:00
parent 16216cc2ca
commit c9c59f2490
7 changed files with 95 additions and 39 deletions

View File

@@ -98,11 +98,11 @@ func (s *chatStreamRuntime) sendDone() {
func (s *chatStreamRuntime) finalize(finishReason string) {
finalThinking := s.thinking.String()
finalText := s.text.String()
detected := util.ParseStandaloneToolCalls(finalText, s.toolNames)
if len(detected) > 0 && !s.toolCallsDoneEmitted {
detected := util.ParseStandaloneToolCallsDetailed(finalText, s.toolNames)
if len(detected.Calls) > 0 && !s.toolCallsDoneEmitted {
finishReason = "tool_calls"
delta := map[string]any{
"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected, s.streamToolCallIDs),
"tool_calls": formatFinalStreamToolCallsWithStableIDs(detected.Calls, s.streamToolCallIDs),
}
if !s.firstChunkSent {
delta["role"] = "assistant"
@@ -158,7 +158,7 @@ func (s *chatStreamRuntime) finalize(finishReason string) {
}
}
if len(detected) > 0 || s.toolCallsEmitted {
if len(detected.Calls) > 0 || s.toolCallsEmitted {
finishReason = "tool_calls"
}
s.sendChunk(openaifmt.BuildChatStreamChunk(

View File

@@ -53,7 +53,7 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh
if len(toolSchemas) == 0 {
return messages, names
}
toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY this JSON format (no other text):\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON. The response must start with { and end with }.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block."
toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY a JSON code block like this:\n```json\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n```\n\n【EXAMPLE】\nUser: Please check the weather in Beijing and Shanghai, and update my todo list.\nAssistant:\n```json\n{\"tool_calls\": [\n {\"name\": \"get_weather\", \"input\": {\"city\": \"Beijing\"}},\n {\"name\": \"get_weather\", \"input\": {\"city\": \"Shanghai\"}},\n {\"name\": \"update_todo\", \"input\": {\"todos\": [{\"content\": \"Buy milk\"}, {\"content\": \"Write report\"}]}}\n]}\n```\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON code block. The response must start with ```json and end with ```.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block.\n5) JSON SYNTAX STRICTLY REQUIRED: All property names MUST be enclosed in double quotes (e.g., \"name\", not name).\n6) ARRAY FORMAT: If providing a list of items, you MUST enclose them in square brackets `[]` (e.g., \"todos\": [{\"item\": \"a\"}, {\"item\": \"b\"}]). DO NOT output comma-separated objects without brackets."
if policy.Mode == util.ToolChoiceRequired {
toolPrompt += "\n5) For this response, you MUST call at least one tool from the allowed list."
}

View File

@@ -206,13 +206,22 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
return "", nil, "", false
}
lower := strings.ToLower(captured)
keyIdx := strings.Index(lower, "tool_calls")
keyIdx := -1
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
for _, kw := range keywords {
idx := strings.Index(lower, kw)
if idx >= 0 && (keyIdx < 0 || idx < keyIdx) {
keyIdx = idx
}
}
if keyIdx < 0 {
return "", nil, "", false
}
start := strings.LastIndex(captured[:keyIdx], "{")
if start < 0 {
return "", nil, "", false
start = keyIdx
}
obj, end, ok := extractJSONObjectFrom(captured, start)
if !ok {
@@ -230,6 +239,9 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
// consume it to avoid leaking raw tool_calls JSON to user content.
return prefixPart, nil, suffixPart, true
}
// If it has obvious keywords but failed to parse even after loose repair,
// we still might want to intercept it if it looks like an attempt at tool call.
// For now, keep the original logic but rely on loose JSON repair.
return captured, nil, "", true
}
return prefixPart, parsed.Calls, suffixPart, true