mirror of
https://github.com/CJackHwang/ds2api.git
synced 2026-05-04 00:15:28 +08:00
Merge pull request #149 from CJackHwang/codex/fix-tool-miscall-during-complex-json-test
Ignore tool_call payloads inside fenced code blocks and chat envelopes; stream-aware code-fence tracking
This commit is contained in:
@@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
|
||||
func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeClaudeSSEHTTPResponse(
|
||||
"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
|
||||
@@ -379,8 +379,8 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundToolUse {
|
||||
t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String())
|
||||
if foundToolUse {
|
||||
t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
|
||||
}
|
||||
|
||||
foundToolStop := false
|
||||
@@ -391,7 +391,12 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundToolStop {
|
||||
t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
|
||||
if foundToolStop {
|
||||
t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Backward-compatible alias for historical test name used in CI logs.
|
||||
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
|
||||
TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
|
||||
}
|
||||
|
||||
@@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
|
||||
func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
|
||||
@@ -259,20 +259,25 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
|
||||
out := decodeJSONBody(t, rec.Body.String())
|
||||
choices, _ := out["choices"].([]any)
|
||||
choice, _ := choices[0].(map[string]any)
|
||||
if choice["finish_reason"] != "tool_calls" {
|
||||
t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
|
||||
if choice["finish_reason"] == "tool_calls" {
|
||||
t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
|
||||
}
|
||||
msg, _ := choice["message"].(map[string]any)
|
||||
toolCalls, _ := msg["tool_calls"].([]any)
|
||||
if len(toolCalls) != 1 {
|
||||
t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"])
|
||||
if len(toolCalls) != 0 {
|
||||
t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
|
||||
}
|
||||
content, _ := msg["content"].(string)
|
||||
if strings.Contains(content, `"tool_calls"`) {
|
||||
t.Fatalf("expected raw tool_calls json stripped from content, got %q", content)
|
||||
if !strings.Contains(content, `"tool_calls"`) {
|
||||
t.Fatalf("expected fenced example content preserved, got %q", content)
|
||||
}
|
||||
}
|
||||
|
||||
// Backward-compatible alias for historical test name used in CI logs.
|
||||
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
|
||||
TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
|
||||
}
|
||||
|
||||
func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
|
||||
h := &Handler{}
|
||||
resp := makeSSEHTTPResponse(
|
||||
|
||||
@@ -2,6 +2,7 @@ package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -69,7 +70,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
|
||||
func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) {
|
||||
obj := BuildResponseObject(
|
||||
"resp_test",
|
||||
"gpt-4o",
|
||||
@@ -80,19 +81,24 @@ func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T
|
||||
)
|
||||
|
||||
outputText, _ := obj["output_text"].(string)
|
||||
if outputText != "" {
|
||||
t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText)
|
||||
if !strings.Contains(outputText, "\"tool_calls\"") {
|
||||
t.Fatalf("expected output_text to preserve fenced tool payload, got %q", outputText)
|
||||
}
|
||||
output, _ := obj["output"].([]any)
|
||||
if len(output) != 1 {
|
||||
t.Fatalf("expected one function_call output item, got %#v", obj["output"])
|
||||
t.Fatalf("expected one message output item, got %#v", obj["output"])
|
||||
}
|
||||
first, _ := output[0].(map[string]any)
|
||||
if first["type"] != "function_call" {
|
||||
t.Fatalf("expected function_call output type, got %#v", first["type"])
|
||||
if first["type"] != "message" {
|
||||
t.Fatalf("expected message output type, got %#v", first["type"])
|
||||
}
|
||||
}
|
||||
|
||||
// Backward-compatible alias for historical test name used in CI logs.
|
||||
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
|
||||
TestBuildResponseObjectKeepsFencedToolPayloadAsText(t)
|
||||
}
|
||||
|
||||
func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) {
|
||||
obj := BuildResponseObject(
|
||||
"resp_test",
|
||||
|
||||
@@ -8,9 +8,12 @@ const {
|
||||
parseToolCallsPayload,
|
||||
parseMarkupToolCalls,
|
||||
parseTextKVToolCalls,
|
||||
stripFencedCodeBlocks,
|
||||
} = require('./parse_payload');
|
||||
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');
|
||||
|
||||
const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
|
||||
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];
|
||||
|
||||
function extractToolNames(tools) {
|
||||
if (!Array.isArray(tools) || tools.length === 0) {
|
||||
@@ -44,6 +47,9 @@ function parseToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const candidates = buildToolCallCandidates(normalized);
|
||||
let parsed = [];
|
||||
@@ -89,6 +95,9 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
|
||||
return result;
|
||||
}
|
||||
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
|
||||
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
|
||||
return result;
|
||||
}
|
||||
const candidates = buildToolCallCandidates(trimmed);
|
||||
let parsed = [];
|
||||
for (const c of candidates) {
|
||||
@@ -223,11 +232,16 @@ function resolveAllowedToolName(name, allowed, allowedCanonical) {
|
||||
|
||||
function looksLikeToolCallSyntax(text) {
|
||||
const lower = toStringSafe(text).toLowerCase();
|
||||
return lower.includes('tool_calls')
|
||||
|| lower.includes('<tool_call')
|
||||
|| lower.includes('<function_call')
|
||||
|| lower.includes('<invoke')
|
||||
|| lower.includes('function.name:');
|
||||
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|
||||
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
|
||||
}
|
||||
|
||||
function shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
if (!looksLikeToolCallSyntax(text)) {
|
||||
return false;
|
||||
}
|
||||
const stripped = stripFencedCodeBlocks(text);
|
||||
return !looksLikeToolCallSyntax(stripped);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -114,6 +114,9 @@ function parseToolCallsPayload(payload) {
|
||||
return [];
|
||||
}
|
||||
if (decoded.tool_calls) {
|
||||
if (isLikelyChatMessageEnvelope(decoded)) {
|
||||
return [];
|
||||
}
|
||||
return parseToolCallList(decoded.tool_calls);
|
||||
}
|
||||
|
||||
@@ -121,6 +124,21 @@ function parseToolCallsPayload(payload) {
|
||||
return one ? [one] : [];
|
||||
}
|
||||
|
||||
function isLikelyChatMessageEnvelope(value) {
|
||||
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
||||
return false;
|
||||
}
|
||||
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
|
||||
return false;
|
||||
}
|
||||
const role = toStringSafe(value.role).trim().toLowerCase();
|
||||
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
|
||||
return true;
|
||||
}
|
||||
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|
||||
|| Object.prototype.hasOwnProperty.call(value, 'content');
|
||||
}
|
||||
|
||||
function parseMarkupToolCalls(text) {
|
||||
const raw = toStringSafe(text).trim();
|
||||
if (!raw) {
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
'use strict';
|
||||
const { resetIncrementalToolState, noteText, insideCodeFence } = require('./state');
|
||||
const {
|
||||
resetIncrementalToolState,
|
||||
noteText,
|
||||
insideCodeFenceWithState,
|
||||
} = require('./state');
|
||||
const { parseStandaloneToolCallsDetailed } = require('./parse');
|
||||
const { extractJSONObjectFrom } = require('./jsonscan');
|
||||
|
||||
const { TOOL_SEGMENT_KEYWORDS, earliestKeywordIndex } = require('./tool-keywords');
|
||||
function processToolSieveChunk(state, chunk, toolNames) {
|
||||
if (!state) {
|
||||
return [];
|
||||
@@ -53,7 +57,7 @@ function processToolSieveChunk(state, chunk, toolNames) {
|
||||
if (!pending) {
|
||||
break;
|
||||
}
|
||||
const start = findToolSegmentStart(pending);
|
||||
const start = findToolSegmentStart(state, pending);
|
||||
if (start >= 0) {
|
||||
const prefix = pending.slice(0, start);
|
||||
if (prefix) {
|
||||
@@ -143,32 +147,21 @@ function findSuspiciousPrefixStart(s) {
|
||||
return start;
|
||||
}
|
||||
|
||||
function findToolSegmentStart(s) {
|
||||
function findToolSegmentStart(state, s) {
|
||||
if (!s) {
|
||||
return -1;
|
||||
}
|
||||
const lower = s.toLowerCase();
|
||||
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
|
||||
let offset = 0;
|
||||
while (true) {
|
||||
let bestKeyIdx = -1;
|
||||
let matchedKeyword = '';
|
||||
for (const kw of keywords) {
|
||||
const idx = lower.indexOf(kw, offset);
|
||||
if (idx >= 0) {
|
||||
if (bestKeyIdx < 0 || idx < bestKeyIdx) {
|
||||
bestKeyIdx = idx;
|
||||
matchedKeyword = kw;
|
||||
}
|
||||
}
|
||||
}
|
||||
const { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
|
||||
if (bestKeyIdx < 0) {
|
||||
return -1;
|
||||
}
|
||||
const keyIdx = bestKeyIdx;
|
||||
const start = s.slice(0, keyIdx).lastIndexOf('{');
|
||||
const candidateStart = start >= 0 ? start : keyIdx;
|
||||
if (!insideCodeFence(s.slice(0, candidateStart))) {
|
||||
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
|
||||
return candidateStart;
|
||||
}
|
||||
offset = keyIdx + matchedKeyword.length;
|
||||
@@ -181,14 +174,7 @@ function consumeToolCapture(state, toolNames) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
const lower = captured.toLowerCase();
|
||||
let keyIdx = -1;
|
||||
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
|
||||
for (const kw of keywords) {
|
||||
const idx = lower.indexOf(kw);
|
||||
if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {
|
||||
keyIdx = idx;
|
||||
}
|
||||
}
|
||||
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
|
||||
if (keyIdx < 0) {
|
||||
return { ready: false, prefix: '', calls: [], suffix: '' };
|
||||
}
|
||||
@@ -211,7 +197,7 @@ function consumeToolCapture(state, toolNames) {
|
||||
}
|
||||
const prefixPart = captured.slice(0, actualStart);
|
||||
const suffixPart = captured.slice(obj.end);
|
||||
if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
|
||||
if (insideCodeFenceWithState(state, prefixPart)) {
|
||||
return {
|
||||
ready: true,
|
||||
prefix: captured,
|
||||
@@ -281,7 +267,6 @@ function trimWrappingJSONFence(prefix, suffix) {
|
||||
if (header && header !== 'json') {
|
||||
return { prefix, suffix };
|
||||
}
|
||||
|
||||
const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
|
||||
if (!leftTrimmedSuffix.startsWith('```')) {
|
||||
return { prefix, suffix };
|
||||
@@ -292,7 +277,6 @@ function trimWrappingJSONFence(prefix, suffix) {
|
||||
suffix: (suffix || '').slice(consumed + 3),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
processToolSieveChunk,
|
||||
flushToolSieve,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_SIEVE_CONTEXT_TAIL_LIMIT = 256;
|
||||
const TOOL_SIEVE_CONTEXT_TAIL_LIMIT = 4096;
|
||||
|
||||
function createToolSieveState() {
|
||||
return {
|
||||
@@ -8,6 +8,9 @@ function createToolSieveState() {
|
||||
capture: '',
|
||||
capturing: false,
|
||||
recentTextTail: '',
|
||||
codeFenceStack: [],
|
||||
codeFencePendingTicks: 0,
|
||||
codeFenceLineStart: true,
|
||||
pendingToolRaw: '',
|
||||
pendingToolCalls: [],
|
||||
disableDeltas: false,
|
||||
@@ -34,6 +37,7 @@ function noteText(state, text) {
|
||||
if (!state || !hasMeaningfulText(text)) {
|
||||
return;
|
||||
}
|
||||
updateCodeFenceState(state, text);
|
||||
state.recentTextTail = appendTail(state.recentTextTail, text, TOOL_SIEVE_CONTEXT_TAIL_LIMIT);
|
||||
}
|
||||
|
||||
@@ -63,6 +67,91 @@ function insideCodeFence(text) {
|
||||
return ticks % 2 === 1;
|
||||
}
|
||||
|
||||
function insideCodeFenceWithState(state, text) {
|
||||
if (!state) {
|
||||
return insideCodeFence(text);
|
||||
}
|
||||
const simulated = simulateCodeFenceState(
|
||||
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
|
||||
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
|
||||
state.codeFenceLineStart !== false,
|
||||
text,
|
||||
);
|
||||
return simulated.stack.length > 0;
|
||||
}
|
||||
|
||||
function updateCodeFenceState(state, text) {
|
||||
if (!state) {
|
||||
return;
|
||||
}
|
||||
const next = simulateCodeFenceState(
|
||||
Array.isArray(state.codeFenceStack) ? state.codeFenceStack : [],
|
||||
Number.isInteger(state.codeFencePendingTicks) ? state.codeFencePendingTicks : 0,
|
||||
state.codeFenceLineStart !== false,
|
||||
text,
|
||||
);
|
||||
state.codeFenceStack = next.stack;
|
||||
state.codeFencePendingTicks = next.pendingTicks;
|
||||
state.codeFenceLineStart = next.lineStart;
|
||||
}
|
||||
|
||||
function simulateCodeFenceState(stack, pendingTicks, lineStart, text) {
|
||||
const chunk = typeof text === 'string' ? text : '';
|
||||
const nextStack = Array.isArray(stack) ? [...stack] : [];
|
||||
let ticks = Number.isInteger(pendingTicks) ? pendingTicks : 0;
|
||||
let atLineStart = lineStart !== false;
|
||||
|
||||
const flushTicks = () => {
|
||||
if (ticks > 0) {
|
||||
if (atLineStart && ticks >= 3) {
|
||||
applyFenceMarker(nextStack, ticks);
|
||||
}
|
||||
atLineStart = false;
|
||||
ticks = 0;
|
||||
}
|
||||
};
|
||||
|
||||
for (let i = 0; i < chunk.length; i += 1) {
|
||||
const ch = chunk[i];
|
||||
if (ch === '`') {
|
||||
ticks += 1;
|
||||
continue;
|
||||
}
|
||||
flushTicks();
|
||||
if (ch === '\n' || ch === '\r') {
|
||||
atLineStart = true;
|
||||
continue;
|
||||
}
|
||||
if ((ch === ' ' || ch === '\t') && atLineStart) {
|
||||
continue;
|
||||
}
|
||||
atLineStart = false;
|
||||
}
|
||||
// keep ticks for cross-chunk continuation.
|
||||
return {
|
||||
stack: nextStack,
|
||||
pendingTicks: ticks,
|
||||
lineStart: atLineStart,
|
||||
};
|
||||
}
|
||||
|
||||
function applyFenceMarker(stack, ticks) {
|
||||
if (!Array.isArray(stack)) {
|
||||
return;
|
||||
}
|
||||
if (stack.length === 0) {
|
||||
stack.push(ticks);
|
||||
return;
|
||||
}
|
||||
const top = stack[stack.length - 1];
|
||||
if (ticks >= top) {
|
||||
stack.pop();
|
||||
return;
|
||||
}
|
||||
// nested/open inner fence using longer marker for robustness.
|
||||
stack.push(ticks);
|
||||
}
|
||||
|
||||
function hasMeaningfulText(text) {
|
||||
return toStringSafe(text) !== '';
|
||||
}
|
||||
@@ -88,6 +177,8 @@ module.exports = {
|
||||
appendTail,
|
||||
looksLikeToolExampleContext,
|
||||
insideCodeFence,
|
||||
insideCodeFenceWithState,
|
||||
updateCodeFenceState,
|
||||
hasMeaningfulText,
|
||||
toStringSafe,
|
||||
};
|
||||
|
||||
29
internal/js/helpers/stream-tool-sieve/tool-keywords.js
Normal file
29
internal/js/helpers/stream-tool-sieve/tool-keywords.js
Normal file
@@ -0,0 +1,29 @@
|
||||
'use strict';
|
||||
|
||||
const TOOL_SEGMENT_KEYWORDS = [
|
||||
'tool_calls',
|
||||
'function.name:',
|
||||
'[tool_call_history]',
|
||||
'[tool_result_history]',
|
||||
];
|
||||
|
||||
function earliestKeywordIndex(text, keywords = TOOL_SEGMENT_KEYWORDS, offset = 0) {
|
||||
if (!text) {
|
||||
return { index: -1, keyword: '' };
|
||||
}
|
||||
let index = -1;
|
||||
let keyword = '';
|
||||
for (const kw of keywords) {
|
||||
const candidate = text.indexOf(kw, offset);
|
||||
if (candidate >= 0 && (index < 0 || candidate < index)) {
|
||||
index = candidate;
|
||||
keyword = kw;
|
||||
}
|
||||
}
|
||||
return { index, keyword };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TOOL_SEGMENT_KEYWORDS,
|
||||
earliestKeywordIndex,
|
||||
};
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
|
||||
var toolCallPattern = regexp.MustCompile(`\{\s*["']tool_calls["']\s*:\s*\[(.*?)\]\s*\}`)
|
||||
var fencedJSONPattern = regexp.MustCompile("(?s)```(?:json)?\\s*(.*?)\\s*```")
|
||||
var fencedCodeBlockPattern = regexp.MustCompile("(?s)```[\\s\\S]*?```")
|
||||
var markupToolSyntaxPattern = regexp.MustCompile(`(?i)<(?:(?:[a-z0-9_:-]+:)?(?:tool_call|function_call|invoke)\b|(?:[a-z0-9_:-]+:)?function_calls\b|(?:[a-z0-9_:-]+:)?tool_use\b)`)
|
||||
|
||||
func buildToolCallCandidates(text string) []string {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
@@ -173,3 +175,22 @@ func looksLikeToolExampleContext(text string) bool {
|
||||
}
|
||||
return strings.Contains(t, "```")
|
||||
}
|
||||
|
||||
func shouldSkipToolCallParsingForCodeFenceExample(text string) bool {
|
||||
if !looksLikeToolCallSyntax(text) {
|
||||
return false
|
||||
}
|
||||
stripped := strings.TrimSpace(stripFencedCodeBlocks(text))
|
||||
return !looksLikeToolCallSyntax(stripped)
|
||||
}
|
||||
|
||||
func looksLikeMarkupToolSyntax(text string) bool {
|
||||
return markupToolSyntaxPattern.MatchString(text)
|
||||
}
|
||||
|
||||
func stripFencedCodeBlocks(text string) string {
|
||||
if text == "" {
|
||||
return ""
|
||||
}
|
||||
return fencedCodeBlockPattern.ReplaceAllString(text, " ")
|
||||
}
|
||||
|
||||
@@ -26,6 +26,9 @@ func ParseToolCallsDetailed(text string, availableToolNames []string) ToolCallPa
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(text)
|
||||
if shouldSkipToolCallParsingForCodeFenceExample(text) {
|
||||
return result
|
||||
}
|
||||
|
||||
candidates := buildToolCallCandidates(text)
|
||||
var parsed []ParsedToolCall
|
||||
@@ -74,6 +77,9 @@ func ParseStandaloneToolCallsDetailed(text string, availableToolNames []string)
|
||||
return result
|
||||
}
|
||||
result.SawToolCallSyntax = looksLikeToolCallSyntax(trimmed)
|
||||
if shouldSkipToolCallParsingForCodeFenceExample(trimmed) {
|
||||
return result
|
||||
}
|
||||
candidates := buildToolCallCandidates(trimmed)
|
||||
var parsed []ParsedToolCall
|
||||
for _, candidate := range candidates {
|
||||
@@ -183,6 +189,9 @@ func parseToolCallsPayload(payload string) []ParsedToolCall {
|
||||
switch v := decoded.(type) {
|
||||
case map[string]any:
|
||||
if tc, ok := v["tool_calls"]; ok {
|
||||
if isLikelyChatMessageEnvelope(v) {
|
||||
return nil
|
||||
}
|
||||
return parseToolCallList(tc)
|
||||
}
|
||||
if parsed, ok := parseToolCallItem(v); ok {
|
||||
@@ -194,6 +203,28 @@ func parseToolCallsPayload(payload string) []ParsedToolCall {
|
||||
return nil
|
||||
}
|
||||
|
||||
func isLikelyChatMessageEnvelope(v map[string]any) bool {
|
||||
if v == nil {
|
||||
return false
|
||||
}
|
||||
if _, ok := v["tool_calls"]; !ok {
|
||||
return false
|
||||
}
|
||||
if role, ok := v["role"].(string); ok {
|
||||
switch strings.ToLower(strings.TrimSpace(role)) {
|
||||
case "assistant", "tool", "user", "system":
|
||||
return true
|
||||
}
|
||||
}
|
||||
if _, ok := v["tool_call_id"]; ok {
|
||||
return true
|
||||
}
|
||||
if _, ok := v["content"]; ok {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func looksLikeToolCallSyntax(text string) bool {
|
||||
lower := strings.ToLower(text)
|
||||
return strings.Contains(lower, "tool_calls") ||
|
||||
|
||||
@@ -19,11 +19,11 @@ func TestParseToolCalls(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseToolCallsFromFencedJSON(t *testing.T) {
|
||||
func TestParseToolCallsIgnoresFencedJSON(t *testing.T) {
|
||||
text := "I will call tools now\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"news\"}}]}\n```"
|
||||
calls := ParseToolCalls(text, []string{"search"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls)
|
||||
if len(calls) != 0 {
|
||||
t.Fatalf("expected fenced tool_call payload to be ignored, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,10 +112,17 @@ func TestParseStandaloneToolCallsSupportsMixedProsePayload(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStandaloneToolCallsParsesFencedCodeBlock(t *testing.T) {
|
||||
func TestParseStandaloneToolCallsIgnoresFencedCodeBlock(t *testing.T) {
|
||||
fenced := "```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```"
|
||||
if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 1 {
|
||||
t.Fatalf("expected fenced tool_call payload to be parsed, got %#v", calls)
|
||||
if calls := ParseStandaloneToolCalls(fenced, []string{"search"}); len(calls) != 0 {
|
||||
t.Fatalf("expected fenced tool_call payload to be ignored, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStandaloneToolCallsIgnoresChatTranscriptEnvelope(t *testing.T) {
|
||||
transcript := `[{"role":"user","content":"请展示完整会话"},{"role":"assistant","content":null,"tool_calls":[{"function":{"name":"search","arguments":"{\"q\":\"go\"}"}}]}]`
|
||||
if calls := ParseStandaloneToolCalls(transcript, []string{"search"}); len(calls) != 0 {
|
||||
t.Fatalf("expected transcript envelope not to trigger tool call parse, got %#v", calls)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -409,8 +409,8 @@ func TestParseToolCallsWithFunctionWrapper(t *testing.T) {
|
||||
func TestParseStandaloneToolCallsFencedCodeBlock(t *testing.T) {
|
||||
fenced := "Here's an example:\n```json\n{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}\n```\nDon't execute this."
|
||||
calls := ParseStandaloneToolCalls(fenced, []string{"search"})
|
||||
if len(calls) != 1 {
|
||||
t.Fatalf("expected fenced code block to be parsed, got %d calls", len(calls))
|
||||
if len(calls) != 0 {
|
||||
t.Fatalf("expected fenced code block to be ignored, got %d calls", len(calls))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,5 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
|
||||
@@ -1,12 +1,5 @@
|
||||
{
|
||||
"calls": [
|
||||
{
|
||||
"name": "read_file",
|
||||
"input": {
|
||||
"path": "README.MD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"calls": [],
|
||||
"sawToolCallSyntax": true,
|
||||
"rejectedByPolicy": false,
|
||||
"rejectedToolNames": []
|
||||
|
||||
@@ -84,7 +84,7 @@ test('parseToolCalls rejects all names when toolNames is empty (Go strict parity
|
||||
assert.deepEqual(detailed.rejectedToolNames, ['not_in_schema']);
|
||||
});
|
||||
|
||||
test('parseToolCalls supports fenced json and function.arguments string payload', () => {
|
||||
test('parseToolCalls ignores tool_call payloads that exist only inside fenced code blocks', () => {
|
||||
const text = [
|
||||
'I will call a tool now.',
|
||||
'```json',
|
||||
@@ -92,9 +92,7 @@ test('parseToolCalls supports fenced json and function.arguments string payload'
|
||||
'```',
|
||||
].join('\n');
|
||||
const calls = parseToolCalls(text, ['read_file']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].name, 'read_file');
|
||||
assert.equal(calls[0].input.path, 'README.md');
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseToolCalls parses text-kv fallback payload', () => {
|
||||
@@ -134,10 +132,23 @@ test('parseStandaloneToolCalls parses mixed prose payload', () => {
|
||||
assert.equal(standaloneCalls.length, 1);
|
||||
});
|
||||
|
||||
test('parseStandaloneToolCalls parses fenced code block tool_call payload', () => {
|
||||
test('parseStandaloneToolCalls ignores fenced code block tool_call payload', () => {
|
||||
const fenced = ['```json', '{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}', '```'].join('\n');
|
||||
const calls = parseStandaloneToolCalls(fenced, ['read_file']);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
test('parseStandaloneToolCalls ignores chat transcript message envelope with tool_calls', () => {
|
||||
const transcript = JSON.stringify([
|
||||
{ role: 'user', content: '请展示完整会话' },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [{ function: { name: 'read_file', arguments: '{"path":"README.MD"}' } }],
|
||||
},
|
||||
]);
|
||||
const calls = parseStandaloneToolCalls(transcript, ['read_file']);
|
||||
assert.equal(calls.length, 0);
|
||||
});
|
||||
|
||||
|
||||
@@ -348,6 +359,59 @@ test('sieve preserves closed fence before standalone tool payload', () => {
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), false);
|
||||
});
|
||||
|
||||
test('sieve does not trigger tool calls for long fenced examples beyond legacy tail window', () => {
|
||||
const longPadding = 'x'.repeat(700);
|
||||
const events = runSieve(
|
||||
[
|
||||
`前置说明\n\`\`\`json\n${longPadding}\n`,
|
||||
'{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n',
|
||||
'```',
|
||||
'\n后置说明',
|
||||
],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, false);
|
||||
assert.equal(leakedText.includes('后置说明'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve keeps fence state when triple-backticks are split across chunks', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'示例开始\n``',
|
||||
'`json\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n',
|
||||
'```',
|
||||
'\n示例结束',
|
||||
],
|
||||
['read_file'],
|
||||
);
|
||||
const hasTool = events.some((evt) => evt.type === 'tool_calls' && evt.calls?.length > 0);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(hasTool, false);
|
||||
assert.equal(leakedText.includes('示例结束'), true);
|
||||
assert.equal(leakedText.toLowerCase().includes('tool_calls'), true);
|
||||
});
|
||||
|
||||
test('sieve ignores tool-like payload inside nested fences and resumes detection after close', () => {
|
||||
const events = runSieve(
|
||||
[
|
||||
'外层示例开始\n````markdown\n',
|
||||
'```json\n{"tool_calls":[{"name":"read_file","input":{"path":"README.MD"}}]}\n```\n',
|
||||
'````\n',
|
||||
'{"tool_calls":[{"name":"read_file","input":{"path":"README2.MD"}}]}',
|
||||
],
|
||||
['read_file'],
|
||||
);
|
||||
const calls = events.filter((evt) => evt.type === 'tool_calls').flatMap((evt) => evt.calls || []);
|
||||
const leakedText = collectText(events);
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].input.path, 'README2.MD');
|
||||
assert.equal(leakedText.includes('README.MD'), true);
|
||||
assert.equal(leakedText.includes('README2.MD'), false);
|
||||
});
|
||||
|
||||
test('formatOpenAIStreamToolCalls reuses ids with the same idStore', () => {
|
||||
const idStore = new Map();
|
||||
const calls = [{ name: 'read_file', input: { path: 'README.MD' } }];
|
||||
|
||||
Reference in New Issue
Block a user