From e0ed4ba238ed7fd93b9f54d0747742f5cd16dd7a Mon Sep 17 00:00:00 2001 From: "CJACK." <155826701+CJackHwang@users.noreply.github.com> Date: Mon, 20 Apr 2026 18:29:58 +0800 Subject: [PATCH] Handle one-based and zero-based citation indices safely --- internal/sse/citation_links.go | 86 +++++++++++++++++++++++------- internal/sse/consumer_edge_test.go | 16 ++++++ 2 files changed, 82 insertions(+), 20 deletions(-) diff --git a/internal/sse/citation_links.go b/internal/sse/citation_links.go index e20314c..17a6dbb 100644 --- a/internal/sse/citation_links.go +++ b/internal/sse/citation_links.go @@ -6,15 +6,16 @@ import ( ) type citationLinkCollector struct { - ordered []string - seen map[string]struct{} - explicit map[int]string + ordered []string + seen map[string]struct{} + explicitRaw map[int]string + hasZeroIdx bool } func newCitationLinkCollector() *citationLinkCollector { return &citationLinkCollector{ - seen: map[string]struct{}{}, - explicit: map[int]string{}, + seen: map[string]struct{}{}, + explicitRaw: map[int]string{}, } } @@ -26,11 +27,9 @@ func (c *citationLinkCollector) ingestChunk(chunk map[string]any) { } func (c *citationLinkCollector) build() map[int]string { - out := make(map[int]string, len(c.explicit)+len(c.ordered)) - for idx, u := range c.explicit { - if idx > 0 && strings.TrimSpace(u) != "" { - out[idx] = u - } + out := make(map[int]string, len(c.explicitRaw)+len(c.ordered)) + for idx, u := range c.buildNormalizedExplicit() { + out[idx] = u } for i, u := range c.ordered { idx := i + 1 @@ -41,6 +40,57 @@ func (c *citationLinkCollector) build() map[int]string { return out } +func (c *citationLinkCollector) buildNormalizedExplicit() map[int]string { + out := make(map[int]string, len(c.explicitRaw)) + + // Default behavior keeps positive indices as-is (one-based payloads). + for idx, u := range c.explicitRaw { + if idx <= 0 || strings.TrimSpace(u) == "" { + continue + } + out[idx] = u + } + + if !c.hasZeroIdx { + return out + } + + // If zero index appears, upstream may be using zero-based indices. + // Add shifted candidates and resolve conflicts using ordered appearance, + // which matches visible citation marker order in response text. + for rawIdx, u := range c.explicitRaw { + if rawIdx < 0 || strings.TrimSpace(u) == "" { + continue + } + normalized := rawIdx + 1 + existing, exists := out[normalized] + if !exists { + out[normalized] = u + continue + } + if c.preferURLForIndex(normalized, existing, u) == u { + out[normalized] = u + } + } + + return out +} + +func (c *citationLinkCollector) preferURLForIndex(idx int, current, candidate string) string { + if idx <= 0 || idx > len(c.ordered) { + return current + } + expected := c.ordered[idx-1] + switch { + case strings.TrimSpace(expected) == "": + return current + case candidate == expected && current != expected: + return candidate + default: + return current + } +} + func (c *citationLinkCollector) walkValue(v any) { switch x := v.(type) { case []any: @@ -66,20 +116,16 @@ func (c *citationLinkCollector) captureURLAndIndex(m map[string]any) { if !hasIdx { return } - // DeepSeek citation indices in search results are zero-based (0,1,2,...), - // while visible markers are one-based ([citation:1], [citation:2], ...). - // Normalize all non-negative explicit indices to one-based to avoid - // misalignment when 3+ citations are present. - if idx >= 0 { - idx = idx + 1 - } - if idx <= 0 { + if idx < 0 { return } - if existing, ok := c.explicit[idx]; ok && strings.TrimSpace(existing) != "" { + if idx == 0 { + c.hasZeroIdx = true + } + if existing, ok := c.explicitRaw[idx]; ok && strings.TrimSpace(existing) != "" { return } - c.explicit[idx] = url + c.explicitRaw[idx] = url } func (c *citationLinkCollector) addOrdered(url string) { diff --git a/internal/sse/consumer_edge_test.go b/internal/sse/consumer_edge_test.go index 700e193..9b49788 100644 --- a/internal/sse/consumer_edge_test.go +++ b/internal/sse/consumer_edge_test.go @@ -150,6 +150,22 @@ func TestCollectStreamExtractsCitationLinksForSequentialZeroBasedIndices(t *test } } +func TestCollectStreamExtractsCitationLinksForOneBasedIndices(t *testing.T) { + resp := makeHTTPResponse( + "data: {\"p\":\"response/fragments/-1/results\",\"v\":[{\"url\":\"https://example.com/a\",\"cite_index\":1},{\"url\":\"https://example.com/b\",\"cite_index\":2}]}\n" + + "data: {\"p\":\"response/content\",\"v\":\"结论[citation:1][citation:2]\"}\n" + + "data: [DONE]\n", + ) + result := CollectStream(resp, false, false) + + if got := result.CitationLinks[1]; got != "https://example.com/a" { + t.Fatalf("expected citation 1 link, got %q", got) + } + if got := result.CitationLinks[2]; got != "https://example.com/b" { + t.Fatalf("expected citation 2 link, got %q", got) + } +} + func TestCollectStreamMultipleThinkingChunks(t *testing.T) { resp := makeHTTPResponse( "data: {\"p\":\"response/thinking_content\",\"v\":\"part1\"}\n" +