Fix streaming whitespace trim and capture TOOL_RESULT_HISTORY

This commit is contained in:
CJACK.
2026-03-22 00:44:44 +08:00
parent 6758514c61
commit 648bb74587
5 changed files with 44 additions and 8 deletions

View File

@@ -2,15 +2,13 @@ package openai
import (
"regexp"
"strings"
)
var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`)
func sanitizeLeakedToolHistory(text string) string {
if strings.TrimSpace(text) == "" {
if text == "" {
return text
}
cleaned := leakedToolHistoryPattern.ReplaceAllString(text, "")
return strings.TrimSpace(cleaned)
return leakedToolHistoryPattern.ReplaceAllString(text, "")
}

View File

@@ -10,6 +10,14 @@ func TestSanitizeLeakedToolHistoryRemovesMarkerBlocks(t *testing.T) {
}
}
func TestSanitizeLeakedToolHistoryPreservesChunkWhitespace(t *testing.T) {
raw := "Hello "
got := sanitizeLeakedToolHistory(raw)
if got != "Hello " {
t.Fatalf("expected trailing whitespace to be preserved, got %q", got)
}
}
func TestFlushToolSieveDropsToolHistoryLeak(t *testing.T) {
var state toolStreamSieveState
chunk := "[TOOL_CALL_HISTORY]\nstatus: already_called\nfunction.name: exec\nfunction.arguments: {}\n[/TOOL_CALL_HISTORY]"
@@ -22,3 +30,16 @@ func TestFlushToolSieveDropsToolHistoryLeak(t *testing.T) {
t.Fatalf("expected history block to be swallowed, got %+v", flushed)
}
}
func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) {
var state toolStreamSieveState
chunk := "[TOOL_RESULT_HISTORY]\nstatus: already_called\nfunction.name: exec\nfunction.arguments: {}\n[/TOOL_RESULT_HISTORY]"
evts := processToolSieveChunk(&state, chunk, []string{"exec"})
if len(evts) != 0 {
t.Fatalf("expected no immediate output before result history block is complete, got %+v", evts)
}
flushed := flushToolSieve(&state, []string{"exec"})
if len(flushed) != 0 {
t.Fatalf("expected result history block to be swallowed, got %+v", flushed)
}
}

View File

@@ -167,7 +167,7 @@ func findToolSegmentStart(s string) int {
return -1
}
lower := strings.ToLower(s)
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]", "[tool_result_history]"}
bestKeyIdx := -1
for _, kw := range keywords {
idx := strings.Index(lower, kw)
@@ -196,7 +196,7 @@ func consumeToolCapture(state *toolStreamSieveState, toolNames []string) (prefix
lower := strings.ToLower(captured)
keyIdx := -1
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]"}
keywords := []string{"tool_calls", "function.name:", "[tool_call_history]", "[tool_result_history]"}
for _, kw := range keywords {
idx := strings.Index(lower, kw)
if idx >= 0 && (keyIdx < 0 || idx < keyIdx) {

View File

@@ -168,7 +168,7 @@ function findToolSegmentStart(s) {
return -1;
}
const lower = s.toLowerCase();
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]'];
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
let offset = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
@@ -207,7 +207,7 @@ function consumeToolCapture(state, toolNames) {
const lower = captured.toLowerCase();
let keyIdx = -1;
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]'];
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
for (const kw of keywords) {
const idx = lower.indexOf(kw);
if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {

View File

@@ -243,6 +243,23 @@ test('sieve swallows leaked TOOL_CALL_HISTORY marker blocks', () => {
assert.equal(leakedText.includes('[TOOL_CALL_HISTORY]'), false);
});
test('sieve swallows leaked TOOL_RESULT_HISTORY marker blocks', () => {
const events = runSieve(
[
'前置文本。',
'[TOOL_RESULT_HISTORY]\nstatus: already_called\nfunction.name: exec\nfunction.arguments: {}\n[/TOOL_RESULT_HISTORY]',
'后置文本。',
],
['exec'],
);
const leakedText = collectText(events);
const hasToolCall = events.some((evt) => evt.type === 'tool_calls');
assert.equal(hasToolCall, false);
assert.equal(leakedText.includes('前置文本。'), true);
assert.equal(leakedText.includes('后置文本。'), true);
assert.equal(leakedText.includes('[TOOL_RESULT_HISTORY]'), false);
});
test('sieve intercepts rejected unknown tool payload (no args) without raw leak', () => {
const events = runSieve(
['{"tool_calls":[{"name":"not_in_schema"}]}', '后置正文G。'],