diff --git a/internal/js/chat-stream/sse_parse_impl.js b/internal/js/chat-stream/sse_parse_impl.js index 0ce2896..8db00ae 100644 --- a/internal/js/chat-stream/sse_parse_impl.js +++ b/internal/js/chat-stream/sse_parse_impl.js @@ -7,6 +7,53 @@ const { SKIP_EXACT_PATHS, } = require('../shared/deepseek-constants'); + + +function stripThinkTags(text) { + if (typeof text !== 'string' || !text) { + return text; + } + return text.replace(/<\/?\s*think\s*>/gi, ''); +} + +function splitThinkingParts(parts) { + const out = []; + let thinkingDone = false; + for (const p of parts) { + if (!p) continue; + if (thinkingDone && p.type === 'thinking') { + const cleaned = stripThinkTags(p.text); + if (cleaned) { + out.push({ text: cleaned, type: 'text' }); + } + continue; + } + if (p.type !== 'thinking') { + const cleaned = stripThinkTags(p.text); + if (cleaned) { + out.push({ text: cleaned, type: p.type }); + } + continue; + } + const match = /<\/\s*think\s*>/i.exec(p.text); + if (!match) { + out.push(p); + continue; + } + thinkingDone = true; + const before = p.text.substring(0, match.index); + let after = p.text.substring(match.index + match[0].length); + if (before) { + out.push({ text: before, type: 'thinking' }); + } + after = stripThinkTags(after); + if (after) { + out.push({ text: after, type: 'text' }); + } + } + return { parts: out, transitioned: thinkingDone }; +} + function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenceMarkers = true) { if (!chunk || typeof chunk !== 'object') { return { @@ -147,7 +194,11 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc let partType = 'text'; if (pathValue === 'response/thinking_content') { - partType = 'thinking'; + if (newType === 'text') { + partType = 'text'; + } else { + partType = 'thinking'; + } } else if (pathValue === 'response/content') { partType = 'text'; } else if (pathValue.includes('response/fragments') && pathValue.includes('/content')) { @@ -186,9 +237,16 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc if (content) { parts.push({ text: content, type: partType }); } + + let resolvedParts = filterLeakedContentFilterParts(parts); + const splitResult = splitThinkingParts(resolvedParts); + if (splitResult.transitioned) { + newType = 'text'; + } + return { parsed: true, - parts: filterLeakedContentFilterParts(parts), + parts: splitResult.parts, finished: false, contentFilter: false, errorMessage: '', @@ -213,9 +271,16 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc }; } parts.push(...extracted.parts); + + let resolvedParts = filterLeakedContentFilterParts(parts); + const splitResult = splitThinkingParts(resolvedParts); + if (splitResult.transitioned) { + newType = 'text'; + } + return { parsed: true, - parts: filterLeakedContentFilterParts(parts), + parts: splitResult.parts, finished: false, contentFilter: false, errorMessage: '', @@ -249,9 +314,16 @@ function parseChunkForContent(chunk, thinkingEnabled, currentType, stripReferenc } } } + + let resolvedParts = filterLeakedContentFilterParts(parts); + const splitResult = splitThinkingParts(resolvedParts); + if (splitResult.transitioned) { + newType = 'text'; + } + return { parsed: true, - parts: filterLeakedContentFilterParts(parts), + parts: splitResult.parts, finished: false, contentFilter: false, errorMessage: '', @@ -546,4 +618,5 @@ module.exports = { isFragmentStatusPath, isCitation, stripReferenceMarkers: stripReferenceMarkersText, + stripThinkTags, }; diff --git a/internal/sse/parser.go b/internal/sse/parser.go index f051225..34813be 100644 --- a/internal/sse/parser.go +++ b/internal/sse/parser.go @@ -273,7 +273,10 @@ func splitThinkingParts(parts []ContentPart) ([]ContentPart, bool) { continue } if p.Type != "thinking" { - out = append(out, p) + cleaned := stripThinkTags(p.Text) + if cleaned != "" { + out = append(out, ContentPart{Text: cleaned, Type: p.Type}) + } continue } loc := thinkClosePattern.FindStringIndex(p.Text) @@ -294,7 +297,8 @@ func splitThinkingParts(parts []ContentPart) ([]ContentPart, bool) { } } if !thinkingDone { - return parts, false + // Return 'out' instead of 'parts' because text parts might have been cleaned via stripThinkTags + return out, false } return out, true } diff --git a/internal/sse/parser_test.go b/internal/sse/parser_test.go index 1e494f5..6ea69fa 100644 --- a/internal/sse/parser_test.go +++ b/internal/sse/parser_test.go @@ -149,3 +149,17 @@ func TestParseSSEChunkForContentAutoTransitionsState(t *testing.T) { t.Fatalf("expected nextType2 to remain text, got %q", nextType2) } } + +func TestParseSSEChunkForContentStripsLeakedThinkTagsFromText(t *testing.T) { + chunk := map[string]any{ + "p": "response/content", // This makes the part type "text" + "v": "normal text leaked end", + } + parts, _, _ := ParseSSEChunkForContent(chunk, true, "text") + if len(parts) != 1 { + t.Fatalf("expected 1 part, got %d: %#v", len(parts), parts) + } + if parts[0].Type != "text" || parts[0].Text != "normal text leaked end" { + t.Fatalf("expected leaked think tag to be stripped, got %#v", parts[0]) + } +}