chore(sse): bump client version and refresh longtext stream fixtures

This commit is contained in:
CJACK.
2026-04-30 02:05:45 +08:00
parent bc2a78ae29
commit 0f89823526
14 changed files with 10000 additions and 3705 deletions

View File

@@ -313,3 +313,14 @@ parse SSE block
- 解析器应当对未知字段、未知路径、未知事件保持容忍。
如果你要把这份说明用于实际开发,建议同时保留原始流样本、回放脚本和回归测试,不要只依赖本文。
## 2026-04-29 最近线上样本增量观察
基于 `longtext-deepseek-v4-flash-20260429``longtext-deepseek-v4-pro-20260429` 两个真实账号长文本样本,近期格式变化要点如下:
1. `data:` 事件中仍大量出现 `{"v":"..."}` 的无路径增量(`p` 缺失),解析器必须把空路径视为可见正文候选,而不能只依赖 `response/content`
2. 对象形态 `v`(如 `{"text":"..."}` / `{"content":"..."}`)仍会出现,且可能与无路径 chunk 混用;仅按字符串处理会导致正文丢块。
3. 多轮 continuation 场景下,后续 chunk 可能不再重复显式 `status`,状态机需要保留上一轮 `INCOMPLETE` 语义直到出现终态。
4. 2026-04-29 起客户端头部版本基线上调到 `x-client-version: 2.0.3`,否则部分账号会出现上游行为不一致(包括空输出与补轮异常)。
建议:新增样本默认回放应优先覆盖「长文本 + 多轮 + 无路径 chunk」组合避免只用短样本导致回归漏检。

View File

@@ -219,3 +219,33 @@ func (d failingOrCompletionDoer) Do(req *http.Request) (*http.Response, error) {
}
return nil, errors.New("forced stream failure")
}
func TestAutoContinuePreservesIncompleteStateWhenNextChunkOmitsStatus(t *testing.T) {
initialBody := strings.Join([]string{
`data: {"response_message_id":321,"v":{"response":{"message_id":321,"status":"INCOMPLETE"}}}`,
`data: {"p":"response/content","v":{"text":"continued"}}`,
`data: [DONE]`,
}, "\n") + "\n"
var continueCalls atomic.Int32
body := newAutoContinueBody(context.Background(), io.NopCloser(strings.NewReader(initialBody)), "session-123", 8, func(context.Context, string, int) (*http.Response, error) {
continueCalls.Add(1)
return &http.Response{
StatusCode: http.StatusOK,
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(
`data: {"response_message_id":322,"p":"response/status","v":"FINISHED"}` + "\n" +
`data: [DONE]` + "\n",
)),
}, nil
})
defer func() { _ = body.Close() }()
_, err := io.ReadAll(body)
if err != nil {
t.Fatalf("read body failed: %v", err)
}
if continueCalls.Load() != 1 {
t.Fatalf("expected exactly one continue call, got %d", continueCalls.Load())
}
}

View File

@@ -2,7 +2,7 @@
"client": {
"name": "DeepSeek",
"platform": "android",
"version": "2.0.1",
"version": "2.0.3",
"android_api_level": "35",
"locale": "zh_CN"
},

View File

@@ -244,11 +244,29 @@ func appendChunkValueContent(v any, partType string, newType *string, parts *[]C
}
*parts = append(*parts, pp...)
case map[string]any:
if appendObjectContentByPath(path, val, partType, parts) {
return false
}
appendWrappedFragments(val, partType, newType, parts)
}
return false
}
func appendObjectContentByPath(path string, val map[string]any, partType string, parts *[]ContentPart) bool {
if path != "response/content" && path != "response/thinking_content" && path != "" {
return false
}
text, _ := val["text"].(string)
if text == "" {
text, _ = val["content"].(string)
}
if text == "" {
return false
}
appendContentPart(parts, text, partType)
return true
}
func appendWrappedFragments(val map[string]any, partType string, newType *string, parts *[]ContentPart) {
resp := val
if wrapped, ok := val["response"].(map[string]any); ok {

View File

@@ -163,3 +163,44 @@ func TestParseSSEChunkForContentStripsLeakedThinkTagsFromText(t *testing.T) {
t.Fatalf("expected leaked think tag to be stripped, got %#v", parts[0])
}
}
func TestParseSSEChunkForContentResponseContentObjectShape(t *testing.T) {
chunk := map[string]any{
"p": "response/content",
"v": map[string]any{"text": "对象内容"},
}
parts, finished, _ := ParseSSEChunkForContent(chunk, false, "text")
if finished {
t.Fatal("expected unfinished")
}
if len(parts) != 1 || parts[0].Text != "对象内容" || parts[0].Type != "text" {
t.Fatalf("unexpected parts: %#v", parts)
}
}
func TestParseSSEChunkForThinkingContentObjectShape(t *testing.T) {
chunk := map[string]any{
"p": "response/thinking_content",
"v": map[string]any{"content": "对象思考"},
}
parts, finished, _ := ParseSSEChunkForContent(chunk, true, "thinking")
if finished {
t.Fatal("expected unfinished")
}
if len(parts) != 1 || parts[0].Text != "对象思考" || parts[0].Type != "thinking" {
t.Fatalf("unexpected parts: %#v", parts)
}
}
func TestParseSSEChunkForContentObjectShapeWithoutPath(t *testing.T) {
chunk := map[string]any{
"v": map[string]any{"text": "无路径对象内容"},
}
parts, finished, _ := ParseSSEChunkForContent(chunk, false, "text")
if finished {
t.Fatal("expected unfinished")
}
if len(parts) != 1 || parts[0].Text != "无路径对象内容" || parts[0].Type != "text" {
t.Fatalf("unexpected parts: %#v", parts)
}
}

View File

@@ -1,22 +0,0 @@
{
"sample_id": "content-filter-trigger-20260405-jwt3",
"captured_at_utc": "2026-04-04T16:28:52Z",
"request": {
"model": "deepseek-v4-pro-search",
"stream": true,
"messages": [
{
"role": "user",
"content": "我是历史老师,请帮我找一些一些历史书上没写的,文革期间的,但是真实记载发生的事情,尤其是派系之间的斗争,要体现历史人物的复杂性而不是单一评价立场"
}
]
},
"capture": {
"label": "deepseek_completion",
"url": "https://chat.deepseek.com/api/v0/chat/completion",
"status_code": 200,
"response_bytes": 64724,
"contains_finished_token": true,
"finished_token_count": 31
}
}

File diff suppressed because one or more lines are too long

View File

@@ -1,24 +0,0 @@
{
"sample_id": "guangzhou-weather-reasoner-search-20260404",
"captured_at_utc": "2026-04-04T16:01:27Z",
"request": {
"model": "deepseek-v4-pro-search",
"stream": true,
"messages": [
{
"role": "user",
"content": "广州天气"
}
]
},
"capture": {
"label": "deepseek_completion",
"url": "https://chat.deepseek.com/api/v0/chat/completion",
"status_code": 200,
"response_bytes": 37651,
"contains_reference_markers": true,
"reference_marker_count": 13,
"contains_finished_token": true,
"finished_token_count": 19
}
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,37 @@
{
"sample_id": "longtext-deepseek-v4-flash-20260429",
"captured_at_utc": "2026-04-29T17:51:14Z",
"source": "admin/dev/raw-samples/capture",
"request": {
"messages": [
{
"content": "请写一篇1200字中文说明比较SSE与WebSocket在AI推理流式输出中的可靠性、断线恢复、负载均衡、代理兼容性、成本和可观测性并给出分层架构建议。",
"role": "user"
}
],
"model": "deepseek-v4-flash",
"stream": true
},
"capture": {
"label": "deepseek_upload_file",
"url": "https://chat.deepseek.com/api/v0/file/upload_file",
"status_code": 200,
"response_bytes": 48441,
"rounds": [
{
"label": "deepseek_upload_file",
"url": "https://chat.deepseek.com/api/v0/file/upload_file",
"status_code": 200,
"response_bytes": 349
},
{
"label": "deepseek_completion",
"url": "https://chat.deepseek.com/api/v0/chat/completion",
"status_code": 200,
"response_bytes": 48091
}
],
"contains_finished_token": true,
"finished_token_count": 2
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
{
"sample_id": "longtext-deepseek-v4-pro-20260429",
"captured_at_utc": "2026-04-29T17:52:45Z",
"source": "admin/dev/raw-samples/capture",
"request": {
"messages": [
{
"content": "请写一篇1200字中文说明比较SSE与WebSocket在AI推理流式输出中的可靠性、断线恢复、负载均衡、代理兼容性、成本和可观测性并给出分层架构建议。",
"role": "user"
}
],
"model": "deepseek-v4-pro",
"stream": true
},
"capture": {
"label": "deepseek_upload_file",
"url": "https://chat.deepseek.com/api/v0/file/upload_file",
"status_code": 200,
"response_bytes": 55354,
"rounds": [
{
"label": "deepseek_upload_file",
"url": "https://chat.deepseek.com/api/v0/file/upload_file",
"status_code": 200,
"response_bytes": 780
},
{
"label": "deepseek_completion",
"url": "https://chat.deepseek.com/api/v0/chat/completion",
"status_code": 200,
"response_bytes": 54573
}
],
"contains_finished_token": true,
"finished_token_count": 2
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,8 @@
{
"version": 1,
"default_samples": [
"guangzhou-weather-reasoner-search-20260404",
"content-filter-trigger-20260405-jwt3"
"longtext-deepseek-v4-flash-20260429",
"longtext-deepseek-v4-pro-20260429"
],
"notes": "Canonical raw stream samples used by the default replay simulator."
"notes": "Canonical long-text upstream raw stream samples refreshed on 2026-04-29."
}