feat: Implement tool call sieving and formatting for streaming responses in Node.js, add Go fallback for non-Vercel environments, and update Vercel configuration.

This commit is contained in:
CJACK
2026-02-17 00:26:34 +08:00
parent 770f5719d8
commit 555df63fbc
9 changed files with 855 additions and 31 deletions

View File

@@ -39,6 +39,17 @@ type streamLease struct {
ExpiresAt time.Time
}
type toolStreamSieveState struct {
pending strings.Builder
capture strings.Builder
capturing bool
}
type toolStreamEvent struct {
Content string
ToolCalls []util.ParsedToolCall
}
func RegisterRoutes(r chi.Router, h *Handler) {
r.Get("/v1/models", h.ListModels)
r.Post("/v1/chat/completions", h.ChatCompletions)
@@ -376,6 +387,8 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
created := time.Now().Unix()
firstChunkSent := false
bufferToolContent := len(toolNames) > 0
var toolSieve toolStreamSieveState
toolCallsEmitted := false
currentType := "text"
if thinkingEnabled {
currentType = "thinking"
@@ -408,7 +421,7 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
finalThinking := thinking.String()
finalText := text.String()
detected := util.ParseToolCalls(finalText, toolNames)
if len(detected) > 0 {
if len(detected) > 0 && !toolCallsEmitted {
finishReason = "tool_calls"
delta := map[string]any{
"tool_calls": util.FormatOpenAIStreamToolCalls(detected),
@@ -424,21 +437,29 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
"model": model,
"choices": []map[string]any{{"delta": delta, "index": 0}},
})
} else if bufferToolContent && strings.TrimSpace(finalText) != "" {
delta := map[string]any{
"content": finalText,
} else if bufferToolContent {
for _, evt := range flushToolSieve(&toolSieve, toolNames) {
if evt.Content == "" {
continue
}
delta := map[string]any{
"content": evt.Content,
}
if !firstChunkSent {
delta["role"] = "assistant"
firstChunkSent = true
}
sendChunk(map[string]any{
"id": completionID,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": []map[string]any{{"delta": delta, "index": 0}},
})
}
if !firstChunkSent {
delta["role"] = "assistant"
firstChunkSent = true
}
sendChunk(map[string]any{
"id": completionID,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": []map[string]any{{"delta": delta, "index": 0}},
})
}
if len(detected) > 0 || toolCallsEmitted {
finishReason = "tool_calls"
}
promptTokens := util.EstimateTokens(finalPrompt)
reasoningTokens := util.EstimateTokens(finalThinking)
@@ -532,6 +553,41 @@ func (h *Handler) handleStream(w http.ResponseWriter, r *http.Request, resp *htt
text.WriteString(p.Text)
if !bufferToolContent {
delta["content"] = p.Text
} else {
events := processToolSieveChunk(&toolSieve, p.Text, toolNames)
if len(events) == 0 {
// Keep thinking delta only frame.
}
for _, evt := range events {
if len(evt.ToolCalls) > 0 {
toolCallsEmitted = true
tcDelta := map[string]any{
"tool_calls": util.FormatOpenAIStreamToolCalls(evt.ToolCalls),
}
if !firstChunkSent {
tcDelta["role"] = "assistant"
firstChunkSent = true
}
newChoices = append(newChoices, map[string]any{
"delta": tcDelta,
"index": 0,
})
continue
}
if evt.Content != "" {
contentDelta := map[string]any{
"content": evt.Content,
}
if !firstChunkSent {
contentDelta["role"] = "assistant"
firstChunkSent = true
}
newChoices = append(newChoices, map[string]any{
"delta": contentDelta,
"index": 0,
})
}
}
}
}
if len(delta) > 0 {
@@ -669,6 +725,224 @@ func vercelInternalSecret() string {
return "admin"
}
func shouldEmitBufferedToolProbeContent(buffered string) bool {
trimmed := strings.TrimSpace(buffered)
if trimmed == "" {
return false
}
normalized := normalizeToolProbePrefix(trimmed)
if normalized == "" {
return false
}
first := normalized[0]
switch first {
case '{', '[', '`':
lower := strings.ToLower(normalized)
if strings.Contains(lower, "tool_calls") {
return false
}
// Keep a short hold window for JSON-ish starts to avoid leaking tool JSON.
if len([]rune(normalized)) < 20 {
return false
}
return true
default:
// Natural language starts can be streamed immediately.
return true
}
}
func normalizeToolProbePrefix(s string) string {
t := strings.TrimSpace(s)
if strings.HasPrefix(t, "```") {
t = strings.TrimPrefix(t, "```")
t = strings.TrimSpace(t)
t = strings.TrimPrefix(strings.ToLower(t), "json")
t = strings.TrimSpace(t)
}
return t
}
func processToolSieveChunk(state *toolStreamSieveState, chunk string, toolNames []string) []toolStreamEvent {
if state == nil || chunk == "" {
return nil
}
state.pending.WriteString(chunk)
events := make([]toolStreamEvent, 0, 2)
for {
if state.capturing {
if state.pending.Len() > 0 {
state.capture.WriteString(state.pending.String())
state.pending.Reset()
}
prefix, calls, suffix, ready := consumeToolCapture(state.capture.String(), toolNames)
if !ready {
break
}
state.capture.Reset()
state.capturing = false
if prefix != "" {
events = append(events, toolStreamEvent{Content: prefix})
}
if len(calls) > 0 {
events = append(events, toolStreamEvent{ToolCalls: calls})
}
if suffix != "" {
state.pending.WriteString(suffix)
}
continue
}
pending := state.pending.String()
if pending == "" {
break
}
start := findToolSegmentStart(pending)
if start >= 0 {
prefix := pending[:start]
if prefix != "" {
events = append(events, toolStreamEvent{Content: prefix})
}
state.pending.Reset()
state.capture.WriteString(pending[start:])
state.capturing = true
continue
}
safe, hold := splitSafeContent(pending, 64)
if safe == "" {
break
}
state.pending.Reset()
state.pending.WriteString(hold)
events = append(events, toolStreamEvent{Content: safe})
}
return events
}
func flushToolSieve(state *toolStreamSieveState, toolNames []string) []toolStreamEvent {
if state == nil {
return nil
}
events := processToolSieveChunk(state, "", toolNames)
if state.capturing {
raw := state.capture.String()
state.capture.Reset()
state.capturing = false
if raw != "" {
events = append(events, toolStreamEvent{Content: raw})
}
}
if state.pending.Len() > 0 {
events = append(events, toolStreamEvent{Content: state.pending.String()})
state.pending.Reset()
}
return events
}
func splitSafeContent(s string, holdRunes int) (safe, hold string) {
if s == "" || holdRunes <= 0 {
return s, ""
}
runes := []rune(s)
if len(runes) <= holdRunes {
return "", s
}
return string(runes[:len(runes)-holdRunes]), string(runes[len(runes)-holdRunes:])
}
func findToolSegmentStart(s string) int {
if s == "" {
return -1
}
lower := strings.ToLower(s)
keyIdx := strings.Index(lower, "tool_calls")
if keyIdx < 0 {
return -1
}
if start := strings.LastIndex(s[:keyIdx], "{"); start >= 0 {
return start
}
return keyIdx
}
func consumeToolCapture(captured string, toolNames []string) (prefix string, calls []util.ParsedToolCall, suffix string, ready bool) {
if captured == "" {
return "", nil, "", false
}
lower := strings.ToLower(captured)
keyIdx := strings.Index(lower, "tool_calls")
if keyIdx < 0 {
if len([]rune(captured)) >= 256 {
return captured, nil, "", true
}
return "", nil, "", false
}
start := strings.LastIndex(captured[:keyIdx], "{")
if start < 0 {
if len([]rune(captured)) >= 512 {
return captured, nil, "", true
}
return "", nil, "", false
}
obj, end, ok := extractJSONObjectFrom(captured, start)
if !ok {
if len([]rune(captured)) >= 4096 {
return captured, nil, "", true
}
return "", nil, "", false
}
parsed := util.ParseToolCalls(obj, toolNames)
if len(parsed) == 0 {
return captured[:end], nil, captured[end:], true
}
return captured[:start], parsed, captured[end:], true
}
func extractJSONObjectFrom(text string, start int) (string, int, bool) {
if start < 0 || start >= len(text) || text[start] != '{' {
return "", 0, false
}
depth := 0
quote := byte(0)
escaped := false
for i := start; i < len(text); i++ {
ch := text[i]
if quote != 0 {
if escaped {
escaped = false
continue
}
if ch == '\\' {
escaped = true
continue
}
if ch == quote {
quote = 0
}
continue
}
if ch == '"' || ch == '\'' {
quote = ch
continue
}
if ch == '{' {
depth++
continue
}
if ch == '}' {
depth--
if depth == 0 {
end := i + 1
return text[start:end], end, true
}
}
}
return "", 0, false
}
func (h *Handler) holdStreamLease(a *auth.RequestAuth) string {
if a == nil {
return ""

View File

@@ -335,3 +335,84 @@ func TestHandleStreamUnknownToolStillIntercepted(t *testing.T) {
t.Fatalf("raw tool_calls JSON leaked in content delta: %s", rec.Body.String())
}
}
func TestHandleStreamToolsPlainTextStreamsBeforeFinish(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":"你好,"}`,
`data: {"p":"response/content","v":"这是普通文本回复。"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid6", "deepseek-chat", "prompt", false, false, []string{"search"})
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
if streamHasToolCallsDelta(frames) {
t.Fatalf("did not expect tool_calls delta for plain text: %s", rec.Body.String())
}
content := strings.Builder{}
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
for _, item := range choices {
choice, _ := item.(map[string]any)
delta, _ := choice["delta"].(map[string]any)
if c, ok := delta["content"].(string); ok {
content.WriteString(c)
}
}
}
if got := content.String(); got == "" {
t.Fatalf("expected streamed content in tool mode plain text, body=%s", rec.Body.String())
}
if streamFinishReason(frames) != "stop" {
t.Fatalf("expected finish_reason=stop, body=%s", rec.Body.String())
}
}
func TestHandleStreamToolCallMixedWithPlainTextSegments(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
`data: {"p":"response/content","v":"前置正文A。"}`,
`data: {"p":"response/content","v":"{\"tool_calls\":[{\"name\":\"search\",\"input\":{\"q\":\"go\"}}]}"}`,
`data: {"p":"response/content","v":"后置正文B。"}`,
`data: [DONE]`,
)
rec := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions", nil)
h.handleStream(rec, req, resp, "cid7", "deepseek-chat", "prompt", false, false, []string{"search"})
frames, done := parseSSEDataFrames(t, rec.Body.String())
if !done {
t.Fatalf("expected [DONE], body=%s", rec.Body.String())
}
if !streamHasToolCallsDelta(frames) {
t.Fatalf("expected tool_calls delta in mixed stream, body=%s", rec.Body.String())
}
if streamHasRawToolJSONContent(frames) {
t.Fatalf("raw tool_calls JSON leaked in mixed stream: %s", rec.Body.String())
}
content := strings.Builder{}
for _, frame := range frames {
choices, _ := frame["choices"].([]any)
for _, item := range choices {
choice, _ := item.(map[string]any)
delta, _ := choice["delta"].(map[string]any)
if c, ok := delta["content"].(string); ok {
content.WriteString(c)
}
}
}
got := content.String()
if !strings.Contains(got, "前置正文A。") || !strings.Contains(got, "后置正文B。") {
t.Fatalf("expected pre/post plain text to pass sieve, got=%q", got)
}
if streamFinishReason(frames) != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, body=%s", rec.Body.String())
}
}