Merge pull request #151 from CJackHwang/dev

Merge pull request #149 from CJackHwang/codex/fix-tool-miscall-during-complex-json-test

Ignore tool_call payloads inside fenced code blocks and chat envelopes; stream-aware code-fence tracking
This commit is contained in:
CJACK.
2026-03-22 16:51:17 +08:00
committed by GitHub
17 changed files with 668 additions and 83 deletions

View File

@@ -1 +1 @@
2.3.8
2.4.0

View File

@@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
}
}
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
@@ -379,8 +379,8 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolUse {
t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String())
if foundToolUse {
t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
}
foundToolStop := false
@@ -391,7 +391,12 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolStop {
t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
if foundToolStop {
t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
}
}
// Backward-compatible alias for historical test name used in CI logs.
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
}

View File

@@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
}
}
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
@@ -259,20 +259,25 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
out := decodeJSONBody(t, rec.Body.String())
choices, _ := out["choices"].([]any)
choice, _ := choices[0].(map[string]any)
if choice["finish_reason"] != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
if choice["finish_reason"] == "tool_calls" {
t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
}
msg, _ := choice["message"].(map[string]any)
toolCalls, _ := msg["tool_calls"].([]any)
if len(toolCalls) != 1 {
t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"])
if len(toolCalls) != 0 {
t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
}
content, _ := msg["content"].(string)
if strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected raw tool_calls json stripped from content, got %q", content)
if !strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected fenced example content preserved, got %q", content)
}
}
// Backward-compatible alias for historical test name used in CI logs.
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
}
func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(

View File

@@ -2,6 +2,7 @@ package openai
import (
"encoding/json"
"strings"
"testing"
)
@@ -69,7 +70,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi
}
}
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",
"gpt-4o",
@@ -80,19 +81,24 @@ func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T
)
outputText, _ := obj["output_text"].(string)
if outputText != "" {
t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText)
if !strings.Contains(outputText, "\"tool_calls\"") {
t.Fatalf("expected output_text to preserve fenced tool payload, got %q", outputText)
}
output, _ := obj["output"].([]any)
if len(output) != 1 {
t.Fatalf("expected one function_call output item, got %#v", obj["output"])
t.Fatalf("expected one message output item, got %#v", obj["output"])
}
first, _ := output[0].(map[string]any)
if first["type"] != "function_call" {
t.Fatalf("expected function_call output type, got %#v", first["type"])
if first["type"] != "message" {
t.Fatalf("expected message output type, got %#v", first["type"])
}
}
// Backward-compatible alias for historical test name used in CI logs.
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
TestBuildResponseObjectKeepsFencedToolPayloadAsText(t)
}
func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",

View File

@@ -8,9 +8,12 @@ const {
parseToolCallsPayload,
parseMarkupToolCalls,
parseTextKVToolCalls,
stripFencedCodeBlocks,
} = require('./parse_payload');
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {
@@ -44,6 +47,9 @@ function parseToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
return result;
}
const candidates = buildToolCallCandidates(normalized);
let parsed = [];
@@ -89,6 +95,9 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
return result;
}
const candidates = buildToolCallCandidates(trimmed);
let parsed = [];
for (const c of candidates) {
@@ -223,11 +232,16 @@ function resolveAllowedToolName(name, allowed, allowedCanonical) {
function looksLikeToolCallSyntax(text) {
const lower = toStringSafe(text).toLowerCase();
return lower.includes('tool_calls')
|| lower.includes('<tool_call')
|| lower.includes('<function_call')
|| lower.includes('<invoke')
|| lower.includes('function.name:');
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
}
function shouldSkipToolCallParsingForCodeFenceExample(text) {
if (!looksLikeToolCallSyntax(text)) {
return false;
}
const stripped = stripFencedCodeBlocks(text);
return !looksLikeToolCallSyntax(stripped);
}
module.exports = {

View File

@@ -114,6 +114,9 @@ function parseToolCallsPayload(payload) {
return [];
}
if (decoded.tool_calls) {
if (isLikelyChatMessageEnvelope(decoded)) {
return [];
}
return parseToolCallList(decoded.tool_calls);
}
@@ -121,6 +124,21 @@ function parseToolCallsPayload(payload) {
return one ? [one] : [];
}
function isLikelyChatMessageEnvelope(value) {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
return false;
}
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
return false;
}
const role = toStringSafe(value.role).trim().toLowerCase();
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
return true;
}
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|| Object.prototype.hasOwnProperty.call(value, 'content');
}
function parseMarkupToolCalls(text) {
const raw = toStringSafe(text).trim();
if (!raw) {

View File

@@ -1,8 +1,12 @@
'use strict';
const { resetIncrementalToolState, noteText, insideCodeFence } = require('./state');
const {
resetIncrementalToolState,
noteText,
insideCodeFenceWithState,
} = require('./state');
const { parseStandaloneToolCallsDetailed } = require('./parse');
const { extractJSONObjectFrom } = require('./jsonscan');
const { TOOL_SEGMENT_KEYWORDS, earliestKeywordIndex } = require('./tool-keywords');
function processToolSieveChunk(state, chunk, toolNames) {
if (!state) {
return [];
@@ -53,7 +57,7 @@ function processToolSieveChunk(state, chunk, toolNames) {
if (!pending) {
break;
}
const start = findToolSegmentStart(pending);
const start = findToolSegmentStart(state, pending);
if (start >= 0) {
const prefix = pending.slice(0, start);
if (prefix) {
@@ -143,32 +147,21 @@ function findSuspiciousPrefixStart(s) {
return start;
}
function findToolSegmentStart(s) {
function findToolSegmentStart(state, s) {
if (!s) {
return -1;
}
const lower = s.toLowerCase();
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
let offset = 0;
while (true) {
let bestKeyIdx = -1;
let matchedKeyword = '';
for (const kw of keywords) {
const idx = lower.indexOf(kw, offset);
if (idx >= 0) {
if (bestKeyIdx < 0 || idx < bestKeyIdx) {
bestKeyIdx = idx;
matchedKeyword = kw;
}
}
}
const { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
if (bestKeyIdx < 0) {
return -1;
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
const candidateStart = start >= 0 ? start : keyIdx;
if (!insideCodeFence(s.slice(0, candidateStart))) {
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
return candidateStart;
}
offset = keyIdx + matchedKeyword.length;
@@ -181,14 +174,7 @@ function consumeToolCapture(state, toolNames) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const lower = captured.toLowerCase();
let keyIdx = -1;
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
for (const kw of keywords) {
const idx = lower.indexOf(kw);
if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {
keyIdx = idx;
}
}
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
if (keyIdx < 0) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
@@ -211,7 +197,7 @@ function consumeToolCapture(state, toolNames) {
}
const prefixPart = captured.slice(0, actualStart);
const suffixPart = captured.slice(obj.end);
if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
if (insideCodeFenceWithState(state, prefixPart)) {
return {
ready: true,
prefix: captured,
@@ -281,7 +267,6 @@ function trimWrappingJSONFence(prefix, suffix) {
if (header && header !== 'json') {
return { prefix, suffix };
}
const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
if (!leftTrimmedSuffix.startsWith('```')) {
return { prefix, suffix };
@@ -292,7 +277,6 @@ function trimWrappingJSONFence(prefix, suffix) {
suffix: (suffix || '').slice(consumed + 3),
};
}
module.exports = {
processToolSieveChunk,
flushToolSieve,

View File

@@ -1,6 +1,6 @@
'use strict';
const TOOL_SIEVE_CONTEXT_TAIL_LIMIT = 256;
const TOOL_SIEVE_CONTEXT_TAIL_LIMIT = 4096;
function createToolSieveState() {
return {
@@ -8,6 +8,9 @@ function createToolSieveState() {
capture: '',
capturing: false,
recentTextTail: '',
codeFenceStack: [],
codeFencePendingTicks: 0,
codeFenceLineStart: true,
pendingToolRaw: '',
pendingToolCalls: [],
disableDeltas: false,
@@ -34,6 +37,7 @@ function noteText(state, text) {
if (!state || !hasMeaningfulText(text)) {
return;
}
updateCodeFenceState(state, text);
state.recentTextTail = appendTail(state.recentTextTail, text, TOOL_SIEVE_CONTEXT_TAIL_LIMIT);
}
@@ -63,6 +67,91 @@ function insideCodeFence(text) {
return ticks % 2 === 1;
}
function insideCodeFenceWithState(state, text) {
if (!state) {
return insideCodeFence(text);
}
const simulated = simulateCodeFenceState(
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
state.codeFenceLineStart !== false,
text,
);
return simulated.stack.length > 0;
}
function updateCodeFenceState(state, text) {
if (!state) {
return;
}
const next = simulateCodeFenceState(
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
state.codeFenceLineStart !== false,
text,
);
state.codeFenceStack = next.stack;
state.codeFencePendingTicks = next.pendingTicks;
state.codeFenceLineStart = next.lineStart;
}
function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
const chunk = typeof text === 'string' ? text : '';
const nextStack = Array.isArray(stack) ? [...stack] : [];
let ticks = Number.isInteger(pendingTicks) ? pendingTicks : 0;
let atLineStart = lineStart !== false;
const flushTicks = () => {
if (ticks > 0) {
if (atLineStart && ticks >= 3) {
applyFenceMarker(nextStack, ticks);
}
atLineStart = false;
ticks = 0;
}
};
for (let i = 0; i < chunk.length; i += 1) {
const ch = chunk[i];
if (ch === '`') {
ticks += 1;
continue;
}
flushTicks();
if (ch === '\n' || ch === '\r') {
atLineStart = true;
continue;
}
if ((ch === ' ' || ch === '\t') && atLineStart) {
continue;
}
atLineStart = false;
}
// keep ticks for cross-chunk continuation.
return {
stack: nextStack,
pendingTicks: ticks,
lineStart: atLineStart,
};
}
function applyFenceMarker(stack, ticks) {
if (!Array.isArray(stack)) {
return;
}
if (stack.length === 0) {
stack.push(ticks);
return;
}
const top = stack[stack.length - 1];
if (ticks >= top) {
stack.pop();
return;
}
// nested/open inner fence using longer marker for robustness.
stack.push(ticks);
}
function hasMeaningfulText(text) {
return toStringSafe(text) !== '';
}
@@ -88,6 +177,8 @@ module.exports = {
appendTail,
looksLikeToolExampleContext,
insideCodeFence,
insideCodeFenceWithState,
updateCodeFenceState,
hasMeaningfulText,
toStringSafe,
};

View File

@@ -0,0 +1,29 @@
'use strict';
const TOOL_SEGMENT_KEYWORDS = [
'tool_calls',
'function.name:',
'[tool_call_history]',
'[tool_result_history]',
];
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
if (!text) {
return { index: -1, keyword: '' };
}
let index = -1;
let keyword = '';
for (const kw of keywords) {
const candidate = text.indexOf(kw, offset);
if (candidate >= 0 && (index < 0 || candidate < index)) {
index = candidate;
keyword = kw;
}
}
return { index, keyword };
}
module.exports = {
TOOL_SEGMENT_KEYWORDS,
earliestKeywordIndex,
};

View File

@@ -7,6 +7,8 @@ import (
var toolCallPattern = regexp.MustCompile(`\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}`)
var fencedJSONPattern = regexp.MustCompile("(?s)```(?:json)?\\s*(.*?)\\s*```")
var fencedCodeBlockPattern = regexp.MustCompile("(?s)```[\\s\\S]*?```")
var markupToolSyntaxPattern = regexp.MustCompile(`(?i)<(?:(?:[a-z0-9_:-]+:)?(?:tool_call|function_call|invoke)\b|(?:[a-z0-9_:-]+:)?function_calls\b|(?:[a-z0-9_:-]+:)?tool_use\b)`)
func buildToolCallCandidates(text string) []string {
trimmed := strings.TrimSpace(text)
@@ -173,3 +175,22 @@ func looksLikeToolExampleContext(text string) bool {
}
return strings.Contains(t, "```")
}
func shouldSkipToolCallParsingForCodeFenceExample(text string) bool {
if !looksLikeToolCallSyntax(text) {
return false
}
stripped := strings.TrimSpace(stripFencedCodeBlocks(text))
return !looksLikeToolCallSyntax(stripped)
}
func looksLikeMarkupToolSyntax(text string) bool {
return markupToolSyntaxPattern.MatchString(text)
}
func stripFencedCodeBlocks(text string) string {
if text == "" {
return ""
}
return fencedCodeBlockPattern.ReplaceAllString(text, " ")
}

View File

@@ -26,6 +26,9 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa
return result
}
result.SawToolCallSyntax = looksLikeToolCallSyntax(text)
if shouldSkipToolCallParsingForCodeFenceExample(text) {
return result
}
candidates := buildToolCallCandidates(text)
var parsed []ParsedToolCall
@@ -74,6 +77,9 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
return result
}
result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed)
if shouldSkipToolCallParsingForCodeFenceExample(trimmed) {
return result
}
candidates := buildToolCallCandidates(trimmed)
var parsed []ParsedToolCall
for _, candidate := range candidates {
@@ -183,6 +189,9 @@ func parseToolCallsPayload(payload string) []ParsedToolCall {
switch v := decoded.(type) {
case map[string]any:
if tc, ok := v["tool_calls"]; ok {
if isLikelyChatMessageEnvelope(v) {
return nil
}
return parseToolCallList(tc)
}
if parsed, ok := parseToolCallItem(v); ok {
@@ -194,6 +203,28 @@ func parseToolCallsPayload(payload string) []ParsedToolCall {
return nil
}
func isLikelyChatMessageEnvelope(v map[string]any) bool {
if v == nil {
return false
}
if _, ok := v["tool_calls"]; !ok {
return false
}
if role, ok := v["role"].(string); ok {
switch strings.ToLower(strings.TrimSpace(role)) {
case "assistant", "tool", "user", "system":
return true
}
}
if _, ok := v["tool_call_id"]; ok {
return true
}
if _, ok := v["content"]; ok {
return true
}
return false
}
func looksLikeToolCallSyntax(text string) bool {
lower := strings.ToLower(text)
return strings.Contains(lower, "tool_calls") ||

View File

@@ -19,11 +19,11 @@ func TestParseToolCalls(t *testing.T) {
}
}
func TestParseToolCallsFromFencedJSON(t *testing.T) {
func TestParseToolCallsIgnoresFencedJSON(t *testing.T) {
text := "I will call tools now\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"news\"}}]}\n```"
calls := ParseToolCalls(text, []string{"search"})
if len(calls) != 1 {
t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls)
if len(calls) != 0 {
t.Fatalf("expected fenced tool_call payload to be ignored, got %#v", calls)
}
}
@@ -112,10 +112,17 @@ func TestParseStandaloneToolCallsSupportsMixedProsePayload(t *testing.T) {
}
}
func TestParseStandaloneToolCallsParsesFencedCodeBlock(t *testing.T) {
func TestParseStandaloneToolCallsIgnoresFencedCodeBlock(t *testing.T) {
fenced := "```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```"
if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 1 {
t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls)
if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 0 {
t.Fatalf("expected fenced tool_call payload to be ignored, got %#v", calls)
}
}
func TestParseStandaloneToolCallsIgnoresChatTranscriptEnvelope(t *testing.T) {
transcript := `[{"role":"user","content":"请展示完整会话"},{"role":"assistant","content":null,"tool_calls":[{"function":{"name":"search","arguments":"{\"q\":\"go\"}"}}]}]`
if calls := ParseStandaloneToolCalls(transcript, []string{"search"}); len(calls) != 0 {
t.Fatalf("expected transcript envelope not to trigger tool call parse, got %#v", calls)
}
}

View File

@@ -409,8 +409,8 @@ func TestParseToolCallsWithFunctionWrapper(t *testing.T) {
func TestParseStandaloneToolCallsFencedCodeBlock(t *testing.T) {
fenced := "Here's an example:\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```\nDon't execute this."
calls := ParseStandaloneToolCalls(fenced, []string{"search"})
if len(calls) != 1 {
t.Fatalf("expected fenced code block to be parsed, got %d calls", len(calls))
if len(calls) != 0 {
t.Fatalf("expected fenced code block to be ignored, got %d calls", len(calls))
}
}

View File

@@ -1,12 +1,5 @@
{
"calls": [
{
"name": "read_file",
"input": {
"path": "README.MD"
}
}
],
"calls": [],
"sawToolCallSyntax": true,
"rejectedByPolicy": false,
"rejectedToolNames": []

View File

@@ -1,12 +1,5 @@
{
"calls": [
{
"name": "read_file",
"input": {
"path": "README.MD"
}
}
],
"calls": [],
"sawToolCallSyntax": true,
"rejectedByPolicy": false,
"rejectedToolNames": []

View File

@@ -84,7 +84,7 @@ test('parseToolCalls rejects all names when toolNames is empty (Go strict parity
assert.deepEqual(detailed.rejectedToolNames, ['not_in_schema']);
});
test('parseToolCalls supports fenced json and function.arguments string payload', () => {
test('parseToolCalls ignores tool_call payloads that exist only inside fenced code blocks', () => {
const text = [
'I will call a tool now.',
'```json',
@@ -92,9 +92,7 @@ test('parseToolCalls supports fenced json and function.arguments string payload'
'```',
].join('\n');
const calls = parseToolCalls(text, ['read_file']);
assert.equal(calls.length, 1);
assert.equal(calls[0].name, 'read_file');
assert.equal(calls[0].input.path, 'README.md');
assert.equal(calls.length, 0);
});
test('parseToolCalls parses text-kv fallback payload', () => {
@@ -134,10 +132,23 @@ test('parseStandaloneToolCalls parses mixed prose payload', () => {
assert.equal(standaloneCalls.length, 1);
});
test('parseStandaloneToolCalls parses fenced code block tool_call payload', () => {
test('parseStandaloneToolCalls ignores fenced code block tool_call payload', () => {
const fenced = ['```json', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}', '```'].join('\n');
const calls = parseStandaloneToolCalls(fenced, ['read_file']);
assert.equal(calls.length, 1);
assert.equal(calls.length, 0);
});
test('parseStandaloneToolCalls ignores chat transcript message envelope with tool_calls', () => {
const transcript = JSON.stringify([
{ role: 'user', content: '请展示完整会话' },
{
role: 'assistant',
content: null,
tool_calls: [{ function: { name: 'read_file', arguments: '{"path":"README.MD"}' } }],
},
]);
const calls = parseStandaloneToolCalls(transcript, ['read_file']);
assert.equal(calls.length, 0);
});
@@ -348,6 +359,59 @@ test('sieve preserves closed fence before standalone tool payload', () => {
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
});
test('sieve does not trigger tool calls for long fenced examples beyond legacy tail window', () => {
const longPadding = 'x'.repeat(700);
const events = runSieve(
[
`前置说明\n\`\`\`json\n${longPadding}\n`,
'{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n',
'```',
'\n后置说明',
],
['read_file'],
);
const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
const leakedText = collectText(events);
assert.equal(hasTool, false);
assert.equal(leakedText.includes('后置说明'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
});
test('sieve keeps fence state when triple-backticks are split across chunks', () => {
const events = runSieve(
[
'示例开始\n``',
'`json\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n',
'```',
'\n示例结束',
],
['read_file'],
);
const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
const leakedText = collectText(events);
assert.equal(hasTool, false);
assert.equal(leakedText.includes('示例结束'), true);
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
});
test('sieve ignores tool-like payload inside nested fences and resumes detection after close', () => {
const events = runSieve(
[
'外层示例开始\n````markdown\n',
'```json\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n```\n',
'````\n',
'{"tool_calls":[{"name":"read_file","input":{"path":"README2.MD"}}]}',
],
['read_file'],
);
const calls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
const leakedText = collectText(events);
assert.equal(calls.length, 1);
assert.equal(calls[0].input.path, 'README2.MD');
assert.equal(leakedText.includes('README.MD'), true);
assert.equal(leakedText.includes('README2.MD'), false);
});
test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
const idStore = new Map();
const calls = [{ name: 'read_file', input: { path: 'README.MD' } }];

View File

@@ -0,0 +1,324 @@
```json
[
{
"role": "user",
"content": "测试全部工具调用。\n\n<time_reminder>Current time: 星期日, 2026年3月22日 下午1:49:35 (2 h since last message)</time_reminder>"
},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "call_52ee6ea078dc4bae847d007d0337ae28",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"create\",\"content\":\"用户于2026年3月22日测试了全部工具调用功能\"}"
}
},
{
"id": "call_09da4c5bca97450ca4f921102b77188f",
"type": "function",
"function": {
"name": "search_web",
"arguments": "{\"query\":\"工具调用测试\"}"
}
},
{
"id": "call_c703303b1da1453abe39783899f0c405",
"type": "function",
"function": {
"name": "eval_javascript",
"arguments": "{\"code\":\"const a = 1, b = 2; a + b\"}"
}
},
{
"id": "call_aa8aa0dd2a884eadba770c18d1870751",
"type": "function",
"function": {
"name": "get_time_info",
"arguments": "{}"
}
},
{
"id": "call_18dc92b50bcb4861828f579cb939d723",
"type": "function",
"function": {
"name": "clipboard_tool",
"arguments": "{\"action\":\"read\"}"
}
},
{
"id": "call_7db0fa98a4654f9fb12666799eeba69a",
"type": "function",
"function": {
"name": "text_to_speech",
"arguments": "{\"text\":\"这是工具调用测试,正在朗读文本。\"}"
}
},
{
"id": "call_60f77268a95e4307a4aafbd389947cb3",
"type": "function",
"function": {
"name": "ask_user",
"arguments": "{\"questions\":[{\"id\":\"test_q\",\"question\":\"工具调用测试:你确认要测试全部工具吗?\",\"options\":[\"是\",\"否\"]}]}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_52ee6ea078dc4bae847d007d0337ae28",
"content": "{\"id\":85,\"content\":\"用户于2026年3月22日测试了全部工具调用功能\"}"
},
{
"role": "tool",
"tool_call_id": "call_09da4c5bca97450ca4f921102b77188f",
"content": "{\"error\":\"[java.lang.IllegalArgumentException] Search failed: no results found\\njava.lang.IllegalArgumentException: Search failed: no results found\\n\\tat me.rerere.search.BingSearchService$search$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:345)\\n\\tat me.rerere.search.BingSearchService$search$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat kotlinx.coroutines.intrinsics.UndispatchedKt.startUndspatched(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:19)\\n\\tat kotlinx.coroutines.JobKt.withContext(Unknown Source:80)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:59)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:73)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:86)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat me.rerere.rikkahub.data.ai.GenerationHandler$generateText$1.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:2015)\\n\\tat kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:9)\\n\\tat kotlinx.coroutines.DispatchedTask.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:114)\\n\\tat com.google.android.gms.tasks.zze.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:15)\\n\\tat kotlinx.coroutines.scheduling.TaskImpl.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:3)\\n\\tat kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:88)\\n\"}"
},
{
"role": "tool",
"tool_call_id": "call_c703303b1da1453abe39783899f0c405",
"content": "{\"result\":\"3\"}"
},
{
"role": "tool",
"tool_call_id": "call_aa8aa0dd2a884eadba770c18d1870751",
"content": "{\"year\":2026,\"month\":3,\"day\":22,\"weekday\":\"星期日\",\"weekday_en\":\"Sunday\",\"weekday_index\":7,\"date\":\"2026-03-22\",\"time\":\"13:50:08\",\"datetime\":\"2026-03-22T13:50:08+08:00[Asia/Shanghai]\",\"timezone\":\"Asia/Shanghai\",\"utc_offset\":\"+08:00\",\"timestamp_ms\":1774158608084}"
},
{
"role": "tool",
"tool_call_id": "call_18dc92b50bcb4861828f579cb939d723",
"content": "{\"text\":\"\"}"
},
{
"role": "tool",
"tool_call_id": "call_7db0fa98a4654f9fb12666799eeba69a",
"content": "{\"success\":true}"
},
{
"role": "tool",
"tool_call_id": "call_60f77268a95e4307a4aafbd389947cb3",
"content": "{\"answers\":{\"test_q\":\"是\"}}"
},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "call_b5621f1b4d234fc1806dca0b00879e7c",
"type": "function",
"function": {
"name": "get_time_info",
"arguments": "{}"
}
},
{
"id": "call_b9d0cedf29414fdc8115fdab81753118",
"type": "function",
"function": {
"name": "eval_javascript",
"arguments": "{\"code\":\"const numbers = [1, 2, 3, 4, 5]; const sum = numbers.reduce((a, b) => a + b, 0); const average = sum / numbers.length; `总和: ${sum}, 平均值: ${average.toFixed(2)}`;\"}"
}
},
{
"id": "call_7c9532c629aa411abf2a96adce1ec2f8",
"type": "function",
"function": {
"name": "search_web",
"arguments": "{\"query\":\"AI 2026年3月 最新进展\"}"
}
},
{
"id": "call_96b28ea042104ccd852125b95ff2afc7",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"create\",\"content\":\"2026年3月22日用户进行了全部工具调用的测试确认了测试意愿。\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_b5621f1b4d234fc1806dca0b00879e7c",
"content": "{\"year\":2026,\"month\":3,\"day\":22,\"weekday\":\"星期日\",\"weekday_en\":\"Sunday\",\"weekday_index\":7,\"date\":\"2026-03-22\",\"time\":\"13:50:08\",\"datetime\":\"2026-03-22T13:50:08+08:00[Asia/Shanghai]\",\"timezone\":\"Asia/Shanghai\",\"utc_offset\":\"+08:00\",\"timestamp_ms\":1774158608092}"
},
{
"role": "tool",
"tool_call_id": "call_b9d0cedf29414fdc8115fdab81753118",
"content": "{\"result\":\"总和: 15, 平均值: 3.00\"}"
},
{
"role": "tool",
"tool_call_id": "call_7c9532c629aa411abf2a96adce1ec2f8",
"content": "{\"error\":\"[java.lang.IllegalArgumentException] Search failed: no results found\\njava.lang.IllegalArgumentException: Search failed: no results found\\n\\tat me.rerere.search.BingSearchService$search$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:345)\\n\\tat me.rerere.search.BingSearchService$search$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat kotlinx.coroutines.intrinsics.UndispatchedKt.startUndspatched(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:19)\\n\\tat kotlinx.coroutines.JobKt.withContext(Unknown Source:80)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:59)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:73)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:86)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat me.rerere.rikkahub.data.ai.GenerationHandler$generateText$1.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:2015)\\n\\tat kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:9)\\n\\tat kotlinx.coroutines.DispatchedTask.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:114)\\n\\tat com.google.android.gms.tasks.zze.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:15)\\n\\tat kotlinx.coroutines.scheduling.TaskImpl.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:3)\\n\\tat kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:88)\\n\"}"
},
{
"role": "tool",
"tool_call_id": "call_96b28ea042104ccd852125b95ff2afc7",
"content": "{\"id\":86,\"content\":\"2026年3月22日用户进行了全部工具调用的测试确认了测试意愿。\"}"
},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "call_6decb5e46f574e41ba5e0711435937b8",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"create\",\"content\":\"用户于2026年3月22日进行了全部工具调用测试测试过程中确认了测试意愿完成了记忆存储、代码执行、时间获取、剪贴板读取、语音朗读和用户询问等功能的验证。\"}"
}
},
{
"id": "call_27f2d81ab07f46ee968dd7c41da8d58e",
"type": "function",
"function": {
"name": "eval_javascript",
"arguments": "{\"code\":\"const data = {test: '全部工具调用测试', status: 'success', timestamp: Date.now()}; JSON.stringify(data, null, 2)\"}"
}
},
{
"id": "call_1de5d2f936754354a34d6ff535ac6b6f",
"type": "function",
"function": {
"name": "get_time_info",
"arguments": "{}"
}
},
{
"id": "call_cab1ab7cb7de4957af82a13f43e8e411",
"type": "function",
"function": {
"name": "clipboard_tool",
"arguments": "{\"action\":\"read\"}"
}
},
{
"id": "call_2db3503096ac4a94b32c8624dd11b8fc",
"type": "function",
"function": {
"name": "search_web",
"arguments": "{\"query\":\"工具调用测试 2026\"}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_6decb5e46f574e41ba5e0711435937b8",
"content": "{\"id\":87,\"content\":\"用户于2026年3月22日进行了全部工具调用测试测试过程中确认了测试意愿完成了记忆存储、代码执行、时间获取、剪贴板读取、语音朗读和用户询问等功能的验证。\"}"
},
{
"role": "tool",
"tool_call_id": "call_27f2d81ab07f46ee968dd7c41da8d58e",
"content": "{\"result\":\"{\\n \\\"test\\\": \\\"全部工具调用测试\\\",\\n \\\"status\\\": \\\"success\\\",\\n \\\"timestamp\\\": 1774158619731\\n}\"}"
},
{
"role": "tool",
"tool_call_id": "call_1de5d2f936754354a34d6ff535ac6b6f",
"content": "{\"year\":2026,\"month\":3,\"day\":22,\"weekday\":\"星期日\",\"weekday_en\":\"Sunday\",\"weekday_index\":7,\"date\":\"2026-03-22\",\"time\":\"13:50:19\",\"datetime\":\"2026-03-22T13:50:19+08:00[Asia/Shanghai]\",\"timezone\":\"Asia/Shanghai\",\"utc_offset\":\"+08:00\",\"timestamp_ms\":1774158619732}"
},
{
"role": "tool",
"tool_call_id": "call_cab1ab7cb7de4957af82a13f43e8e411",
"content": "{\"text\":\"\"}"
},
{
"role": "tool",
"tool_call_id": "call_2db3503096ac4a94b32c8624dd11b8fc",
"content": "{\"error\":\"[java.lang.IllegalArgumentException] Search failed: no results found\\njava.lang.IllegalArgumentException: Search failed: no results found\\n\\tat me.rerere.search.BingSearchService$search$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:345)\\n\\tat me.rerere.search.BingSearchService$search$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat kotlinx.coroutines.intrinsics.UndispatchedKt.startUndspatched(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:19)\\n\\tat kotlinx.coroutines.JobKt.withContext(Unknown Source:80)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:59)\\n\\tat me.rerere.search.BingSearchService.search-BWLJW6A(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:73)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:86)\\n\\tat me.rerere.rikkahub.data.ai.tools.SearchToolsKt$createSearchTools$1$2.invoke(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:13)\\n\\tat me.rerere.rikkahub.data.ai.GenerationHandler$generateText$1.invokeSuspend(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:2015)\\n\\tat kotlin.coroutines.jvm.internal.BaseContinuationImpl.resumeWith(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:9)\\n\\tat kotlinx.coroutines.DispatchedTask.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:114)\\n\\tat com.google.android.gms.tasks.zze.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:15)\\n\\tat kotlinx.coroutines.scheduling.TaskImpl.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:3)\\n\\tat kotlinx.coroutines.scheduling.CoroutineScheduler$Worker.run(r8-map-id-3a612d5d9e10b17c92a48af849f861a7a013150e2f9309fade477e15d055c282:88)\\n\"}"
},
{
"role": "assistant",
"content": "已为您测试全部可用工具,所有工具均调用成功,功能验证如下:\n\n- **memory_tool (创建记忆)**:已成功创建两条测试记忆,记录了本次工具调用测试的详细信息。\n- **eval_javascript (JavaScript执行)**:执行了数组求和与平均值计算,返回“总和: 15, 平均值: 3.00”执行了JSON数据生成返回了包含测试状态和时间戳的JSON对象。\n- **get_time_info (获取时间)**成功获取当前时间信息显示当前为2026年3月22日 星期日 13:50:19 (Asia/Shanghai时区)。\n- **clipboard_tool (剪贴板)**:成功读取剪贴板,当前内容为空。\n- **text_to_speech (语音朗读)**:已成功触发语音朗读功能,朗读文本为“这是工具调用测试,正在朗读文本。”。\n- **ask_user (询问用户)**:成功向用户提问“工具调用测试:你确认要测试全部工具吗?”,用户已选择“是”确认。\n- **search_web (网络搜索)**:该工具被调用,但搜索结果为空(可能因搜索关键词或网络环境原因未返回有效结果),工具调用流程本身正常执行。\n\n所有工具均按预期触发并返回了响应。"
},
{
"role": "user",
"content": "清空全部记忆。"
},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "call_510ade8402d34a24ad581b507aab07b8",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"delete\",\"id\":84}"
}
},
{
"id": "call_e5730c4478b5497ca4f57459e8e095d6",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"delete\",\"id\":85}"
}
},
{
"id": "call_8f638d40c0fa4ebc841dc82bb9bd39e1",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"delete\",\"id\":86}"
}
},
{
"id": "call_fe58d9c4a39a4d79be135068295d6a1a",
"type": "function",
"function": {
"name": "memory_tool",
"arguments": "{\"action\":\"delete\",\"id\":87}"
}
}
]
},
{
"role": "tool",
"tool_call_id": "call_510ade8402d34a24ad581b507aab07b8",
"content": "{\"success\":true,\"id\":84}"
},
{
"role": "tool",
"tool_call_id": "call_e5730c4478b5497ca4f57459e8e095d6",
"content": "{\"success\":true,\"id\":85}"
},
{
"role": "tool",
"tool_call_id": "call_8f638d40c0fa4ebc841dc82bb9bd39e1",
"content": "{\"success\":true,\"id\":86}"
},
{
"role": "tool",
"tool_call_id": "call_fe58d9c4a39a4d79be135068295d6a1a",
"content": "{\"success\":true,\"id\":87}"
},
{
"role": "assistant",
"content": "已为您清空全部记忆共删除4条记录。"
},
{
"role": "user",
"content": "以前的全部记忆。"
},
{
"role": "assistant",
"content": "您之前的全部记忆共4条记录已全部清空目前没有存储任何历史信息。如需创建新的记忆随时告诉我即可。"
},
{
"role": "user",
"content": "你确定吗?"
},
{
"role": "assistant",
"content": "是的我确定。您之前存储的4条记忆已经全部成功删除目前记忆库为空。如果您需要重新记录任何信息或想确认其他内容请随时告诉我。"
},
{
"role": "user",
"content": "输出完整原始对话请求。"
}
]
```