Merge pull request #132 from CJackHwang/codex/toolcallhistory-6t7271

Preserve code fences around standalone tool JSON and add marker-output guards
2026-05-05 08:55:28 +08:00 · 2026-03-21 17:44:05 +08:00
parent 492c603300 7061094964
commit 67787d9c99
8 changed files with 138 additions and 5 deletions
--- a/internal/adapter/claude/handler_util_test.go
+++ b/internal/adapter/claude/handler_util_test.go
@@ -128,6 +128,9 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) {
 	if !containsStr(prompt, "tool_use") {
 		t.Fatalf("expected tool_use instruction in prompt")
 	}
+	if !containsStr(prompt, "Never output [TOOL_CALL_HISTORY] or [TOOL_RESULT_HISTORY] markers yourself") {
+		t.Fatalf("expected marker guard instruction in prompt")
+	}
 	if containsStr(prompt, "tool_calls") {
 		t.Fatalf("expected prompt to avoid tool_calls JSON instruction")
 	}
--- a/internal/adapter/claude/handler_utils.go
+++ b/internal/adapter/claude/handler_utils.go
@@ -54,6 +54,7 @@ func buildClaudeToolPrompt(tools []any) string {
 		"When you need a tool, respond with Claude-native tool use (tool_use) using the provided tool schema. Do not print tool-call JSON in text.",
 		"History markers in conversation: [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] are your previous tool calls; [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] are runtime tool outputs, not user input.",
 		"After a valid [TOOL_RESULT_HISTORY], continue with final answer instead of repeating the same call unless required fields are still missing.",
+		"Never output [TOOL_CALL_HISTORY] or [TOOL_RESULT_HISTORY] markers yourself; they are system-side context only.",
 	)
 	return strings.Join(parts, "\n\n")
 }
--- a/internal/adapter/openai/handler_toolcall_format.go
+++ b/internal/adapter/openai/handler_toolcall_format.go
@@ -53,7 +53,7 @@ func injectToolPrompt(messages []map[string]any, tools []any, policy util.ToolCh
 	if len(toolSchemas) == 0 {
 		return messages, names
 	}
-	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY a JSON code block like this:\n```json\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n```\n\n【EXAMPLE】\nUser: Please check the weather in Beijing and Shanghai, and update my todo list.\nAssistant:\n```json\n{\"tool_calls\": [\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Beijing\"}},\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Shanghai\"}},\n  {\"name\": \"update_todo\", \"input\": {\"todos\": [{\"content\": \"Buy milk\"}, {\"content\": \"Write report\"}]}}\n]}\n```\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON code block. The response must start with ```json and end with ```.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block.\n5) JSON SYNTAX STRICTLY REQUIRED: All property names MUST be enclosed in double quotes (e.g., \"name\", not name).\n6) ARRAY FORMAT: If providing a list of items, you MUST enclose them in square brackets `[]` (e.g., \"todos\": [{\"item\": \"a\"}, {\"item\": \"b\"}]). DO NOT output comma-separated objects without brackets."
+	toolPrompt := "You have access to these tools:\n\n" + strings.Join(toolSchemas, "\n\n") + "\n\nWhen you need to use tools, output ONLY a JSON code block like this:\n```json\n{\"tool_calls\": [{\"name\": \"tool_name\", \"input\": {\"param\": \"value\"}}]}\n```\n\n【EXAMPLE】\nUser: Please check the weather in Beijing and Shanghai, and update my todo list.\nAssistant:\n```json\n{\"tool_calls\": [\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Beijing\"}},\n  {\"name\": \"get_weather\", \"input\": {\"city\": \"Shanghai\"}},\n  {\"name\": \"update_todo\", \"input\": {\"todos\": [{\"content\": \"Buy milk\"}, {\"content\": \"Write report\"}]}}\n]}\n```\n\nHistory markers in conversation:\n- [TOOL_CALL_HISTORY]...[/TOOL_CALL_HISTORY] means a tool call you already made earlier.\n- [TOOL_RESULT_HISTORY]...[/TOOL_RESULT_HISTORY] means the runtime returned a tool result (not user input).\n\nIMPORTANT:\n1) If calling tools, output ONLY the JSON code block. The response must start with ```json and end with ```.\n2) After receiving a tool result, you MUST use it to produce the final answer.\n3) Only call another tool when the previous result is missing required data or returned an error.\n4) Do not repeat a tool call that is already satisfied by an existing [TOOL_RESULT_HISTORY] block.\n5) Never output [TOOL_CALL_HISTORY] or [TOOL_RESULT_HISTORY] markers in your answer; these markers are system-side context only.\n6) JSON SYNTAX STRICTLY REQUIRED: All property names MUST be enclosed in double quotes (e.g., \"name\", not name).\n7) ARRAY FORMAT: If providing a list of items, you MUST enclose them in square brackets `[]` (e.g., \"todos\": [{\"item\": \"a\"}, {\"item\": \"b\"}]). DO NOT output comma-separated objects without brackets."
 	if policy.Mode == util.ToolChoiceRequired {
 		toolPrompt += "\n5) For this response, you MUST call at least one tool from the allowed list."
 	}
--- a/internal/adapter/openai/handler_toolcall_test.go
+++ b/internal/adapter/openai/handler_toolcall_test.go
@@ -651,6 +651,48 @@ func TestHandleStreamFencedToolCallSnippetPromotesToolCall(t *testing.T) {
 	if strings.Contains(strings.ToLower(got), "tool_calls") {
 		t.Fatalf("expected raw fenced tool_calls snippet stripped from content, got=%q", got)
 	}
+	if strings.Contains(strings.ToLower(got), "```json") || strings.Contains(got, "\n```\n") {
+		t.Fatalf("expected consumed fenced tool payload to not leave empty code fence, got=%q", got)
+	}
+	if streamFinishReason(frames) != "tool_calls" {
+		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
+	}
+}
+
+func TestHandleStreamStandaloneToolCallAfterClosedFenceKeepsFence(t *testing.T) {
+	h := &Handler{}
+	resp := makeSSEHTTPResponse(
+		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "先给一个代码示例：\n```text\nhello\n```\n"),
+		fmt.Sprintf(`data: {"p":"response/content","v":%q}`, "{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"),
+		`data: [DONE]`,
+	)
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
+
+	h.handleStream(rec, req, resp, "cid7g", "deepseek-chat", "prompt", false, false, []string{"search"})
+
+	frames, done := parseSSEDataFrames(t, rec.Body.String())
+	if !done {
+		t.Fatalf("expected [DONE], body=%s", rec.Body.String())
+	}
+	if !streamHasToolCallsDelta(frames) {
+		t.Fatalf("expected tool_calls delta for standalone payload, body=%s", rec.Body.String())
+	}
+	content := strings.Builder{}
+	for _, frame := range frames {
+		choices, _ := frame["choices"].([]any)
+		for _, item := range choices {
+			choice, _ := item.(map[string]any)
+			delta, _ := choice["delta"].(map[string]any)
+			if c, ok := delta["content"].(string); ok {
+				content.WriteString(c)
+			}
+		}
+	}
+	got := content.String()
+	if !strings.Contains(got, "```") {
+		t.Fatalf("expected closed fence before standalone tool json to be preserved, got=%q", got)
+	}
 	if streamFinishReason(frames) != "tool_calls" {
 		t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
 	}
--- a/internal/adapter/openai/prompt_build_test.go
+++ b/internal/adapter/openai/prompt_build_test.go
@@ -80,4 +80,7 @@ func TestBuildOpenAIFinalPrompt_VercelPreparePathKeepsFinalAnswerInstruction(t *
 	if !strings.Contains(finalPrompt, "[TOOL_RESULT_HISTORY]") {
 		t.Fatalf("vercel prepare finalPrompt missing history marker instruction: %q", finalPrompt)
 	}
+	if !strings.Contains(finalPrompt, "Never output [TOOL_CALL_HISTORY] or [TOOL_RESULT_HISTORY] markers in your answer") {
+		t.Fatalf("vercel prepare finalPrompt missing marker-output guard instruction: %q", finalPrompt)
+	}
 }
--- a/internal/adapter/openai/tool_sieve_core.go
+++ b/internal/adapter/openai/tool_sieve_core.go
@@ -182,6 +182,9 @@ func findToolSegmentStart(s string) int {
 	if start < 0 {
 		start = bestKeyIdx
 	}
+	if fenceStart, ok := openFenceStartBefore(s, start); ok {
+		return fenceStart
+	}
 	return start
 }

@@ -191,7 +194,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 		return "", nil, "", false
 	}
 	lower := strings.ToLower(captured)
-	
+
 	keyIdx := -1
 	keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
 	for _, kw := range keywords {
@@ -200,7 +203,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 			keyIdx = idx
 		}
 	}
-	
+
 	if keyIdx < 0 {
 		return "", nil, "", false
 	}
@@ -226,5 +229,45 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
 		// For now, keep the original logic but rely on loose JSON repair.
 		return captured, nil, "", true
 	}
+	prefixPart, suffixPart = trimWrappingJSONFence(prefixPart, suffixPart)
 	return prefixPart, parsed.Calls, suffixPart, true
 }
+
+func trimWrappingJSONFence(prefix, suffix string) (string, string) {
+	trimmedPrefix := strings.TrimRight(prefix, " \t\r\n")
+	fenceIdx := strings.LastIndex(trimmedPrefix, "```")
+	if fenceIdx < 0 {
+		return prefix, suffix
+	}
+	// Only strip when the trailing fence in prefix behaves like an opening fence.
+	// A legitimate closing fence before a standalone tool JSON must be preserved.
+	if strings.Count(trimmedPrefix[:fenceIdx+3], "```")%2 == 0 {
+		return prefix, suffix
+	}
+	fenceHeader := strings.TrimSpace(trimmedPrefix[fenceIdx+3:])
+	if fenceHeader != "" && !strings.EqualFold(fenceHeader, "json") {
+		return prefix, suffix
+	}
+
+	trimmedSuffix := strings.TrimLeft(suffix, " \t\r\n")
+	if !strings.HasPrefix(trimmedSuffix, "```") {
+		return prefix, suffix
+	}
+	consumedLeading := len(suffix) - len(trimmedSuffix)
+	return trimmedPrefix[:fenceIdx], suffix[consumedLeading+3:]
+}
+
+func openFenceStartBefore(s string, pos int) (int, bool) {
+	if pos <= 0 || pos > len(s) {
+		return -1, false
+	}
+	segment := s[:pos]
+	lastFence := strings.LastIndex(segment, "```")
+	if lastFence < 0 {
+		return -1, false
+	}
+	if strings.Count(segment, "```")%2 == 1 {
+		return lastFence, true
+	}
+	return -1, false
+}
--- a/internal/js/helpers/stream-tool-sieve/sieve.js
+++ b/internal/js/helpers/stream-tool-sieve/sieve.js
@@ -256,11 +256,40 @@ function consumeToolCapture(state, toolNames) {
    };
  }

+  const trimmedFence = trimWrappingJSONFence(prefixPart, suffixPart);
  return {
    ready: true,
-    prefix: prefixPart,
+    prefix: trimmedFence.prefix,
    calls: parsed.calls,
-    suffix: suffixPart,
+    suffix: trimmedFence.suffix,
+  };
+}
+
+function trimWrappingJSONFence(prefix, suffix) {
+  const rightTrimmedPrefix = (prefix || '').replace(/[ \t\r\n]+$/g, '');
+  const fenceIdx = rightTrimmedPrefix.lastIndexOf('```');
+  if (fenceIdx < 0) {
+    return { prefix, suffix };
+  }
+  // Only strip when this behaves like an opening fence.
+  // If it's a legitimate closing fence before standalone tool JSON, keep it.
+  const fenceCount = (rightTrimmedPrefix.slice(0, fenceIdx + 3).match(/```/g) || []).length;
+  if (fenceCount % 2 === 0) {
+    return { prefix, suffix };
+  }
+  const header = rightTrimmedPrefix.slice(fenceIdx + 3).trim().toLowerCase();
+  if (header && header !== 'json') {
+    return { prefix, suffix };
+  }
+
+  const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
+  if (!leftTrimmedSuffix.startsWith('```')) {
+    return { prefix, suffix };
+  }
+  const consumed = (suffix || '').length - leftTrimmedSuffix.length;
+  return {
+    prefix: rightTrimmedPrefix.slice(0, fenceIdx),
+    suffix: (suffix || '').slice(consumed + 3),
  };
 }

--- a/tests/node/stream-tool-sieve.test.js
+++ b/tests/node/stream-tool-sieve.test.js
@@ -286,6 +286,18 @@ test('sieve emits tool_calls and keeps trailing prose when payload and prose sha
  assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
 });

+test('sieve preserves closed fence before standalone tool payload', () => {
+  const events = runSieve(
+    ['先给一个代码示例：\n```text\nhello\n```\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}'],
+    ['read_file'],
+  );
+  const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
+  const leakedText = collectText(events);
+  assert.equal(hasTool, true);
+  assert.equal(leakedText.includes('```'), true);
+  assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
+});
+
 test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
  const idStore = new Map();
  const calls = [{ name: 'read_file', input: { path: 'README.MD' } }];