本次修复了搜索场景下 citation 标签未完全映射的问题。根因是 citation 顺序收集阶段对 URL 做了去重,导致当上游返回重复来源且 cite_index 缺失或不稳定时,位置索引被压缩,部分 [citation:x] 无法找到对应链接。修复后改为保留上游结果的原始顺序(包括重复 URL),从而保证按位置回填 citation 时不会丢号。

This commit is contained in:
songguoliang
2026-04-21 10:52:17 +08:00
parent 31e64ff31d
commit 8f01aa224c
2 changed files with 19 additions and 6 deletions

View File

@@ -7,14 +7,12 @@ import (
type citationLinkCollector struct {
ordered []string
seen map[string]struct{}
explicitRaw map[int]string
hasZeroIdx bool
}
func newCitationLinkCollector() *citationLinkCollector {
return &citationLinkCollector{
seen: map[string]struct{}{},
explicitRaw: map[int]string{},
}
}
@@ -129,10 +127,6 @@ func (c *citationLinkCollector) captureURLAndIndex(m map[string]any) {
}
func (c *citationLinkCollector) addOrdered(url string) {
if _, ok := c.seen[url]; ok {
return
}
c.seen[url] = struct{}{}
c.ordered = append(c.ordered, url)
}

View File

@@ -166,6 +166,25 @@ func TestCollectStreamExtractsCitationLinksForOneBasedIndices(t *testing.T) {
}
}
func TestCollectStreamExtractsCitationLinksWithRepeatedURLsAndNilIndices(t *testing.T) {
resp := makeHTTPResponse(
"data: {\"p\":\"response/fragments/-1/results\",\"v\":[{\"url\":\"https://example.com/a\",\"cite_index\":null},{\"url\":\"https://example.com/a\",\"cite_index\":null},{\"url\":\"https://example.com/b\",\"cite_index\":null}]}\n" +
"data: {\"p\":\"response/content\",\"v\":\"结论[citation:1][citation:2][citation:3]\"}\n" +
"data: [DONE]\n",
)
result := CollectStream(resp, false, false)
if got := result.CitationLinks[1]; got != "https://example.com/a" {
t.Fatalf("expected citation 1 link, got %q", got)
}
if got := result.CitationLinks[2]; got != "https://example.com/a" {
t.Fatalf("expected citation 2 link, got %q", got)
}
if got := result.CitationLinks[3]; got != "https://example.com/b" {
t.Fatalf("expected citation 3 link, got %q", got)
}
}
func TestCollectStreamMultipleThinkingChunks(t *testing.T) {
resp := makeHTTPResponse(
"data: {\"p\":\"response/thinking_content\",\"v\":\"part1\"}\n" +