Handle one-based and zero-based citation indices safely

This commit is contained in:
CJACK.
2026-04-20 18:29:58 +08:00
parent ae37654893
commit e0ed4ba238
2 changed files with 82 additions and 20 deletions

View File

@@ -6,15 +6,16 @@ import (
)
type citationLinkCollector struct {
ordered []string
seen map[string]struct{}
explicit map[int]string
ordered []string
seen map[string]struct{}
explicitRaw map[int]string
hasZeroIdx bool
}
func newCitationLinkCollector() *citationLinkCollector {
return &citationLinkCollector{
seen: map[string]struct{}{},
explicit: map[int]string{},
seen: map[string]struct{}{},
explicitRaw: map[int]string{},
}
}
@@ -26,11 +27,9 @@ func (c *citationLinkCollector) ingestChunk(chunk map[string]any) {
}
func (c *citationLinkCollector) build() map[int]string {
out := make(map[int]string, len(c.explicit)+len(c.ordered))
for idx, u := range c.explicit {
if idx > 0 && strings.TrimSpace(u) != "" {
out[idx] = u
}
out := make(map[int]string, len(c.explicitRaw)+len(c.ordered))
for idx, u := range c.buildNormalizedExplicit() {
out[idx] = u
}
for i, u := range c.ordered {
idx := i + 1
@@ -41,6 +40,57 @@ func (c *citationLinkCollector) build() map[int]string {
return out
}
func (c *citationLinkCollector) buildNormalizedExplicit() map[int]string {
out := make(map[int]string, len(c.explicitRaw))
// Default behavior keeps positive indices as-is (one-based payloads).
for idx, u := range c.explicitRaw {
if idx <= 0 || strings.TrimSpace(u) == "" {
continue
}
out[idx] = u
}
if !c.hasZeroIdx {
return out
}
// If zero index appears, upstream may be using zero-based indices.
// Add shifted candidates and resolve conflicts using ordered appearance,
// which matches visible citation marker order in response text.
for rawIdx, u := range c.explicitRaw {
if rawIdx < 0 || strings.TrimSpace(u) == "" {
continue
}
normalized := rawIdx + 1
existing, exists := out[normalized]
if !exists {
out[normalized] = u
continue
}
if c.preferURLForIndex(normalized, existing, u) == u {
out[normalized] = u
}
}
return out
}
func (c *citationLinkCollector) preferURLForIndex(idx int, current, candidate string) string {
if idx <= 0 || idx > len(c.ordered) {
return current
}
expected := c.ordered[idx-1]
switch {
case strings.TrimSpace(expected) == "":
return current
case candidate == expected && current != expected:
return candidate
default:
return current
}
}
func (c *citationLinkCollector) walkValue(v any) {
switch x := v.(type) {
case []any:
@@ -66,20 +116,16 @@ func (c *citationLinkCollector) captureURLAndIndex(m map[string]any) {
if !hasIdx {
return
}
// DeepSeek citation indices in search results are zero-based (0,1,2,...),
// while visible markers are one-based ([citation:1], [citation:2], ...).
// Normalize all non-negative explicit indices to one-based to avoid
// misalignment when 3+ citations are present.
if idx >= 0 {
idx = idx + 1
}
if idx <= 0 {
if idx < 0 {
return
}
if existing, ok := c.explicit[idx]; ok && strings.TrimSpace(existing) != "" {
if idx == 0 {
c.hasZeroIdx = true
}
if existing, ok := c.explicitRaw[idx]; ok && strings.TrimSpace(existing) != "" {
return
}
c.explicit[idx] = url
c.explicitRaw[idx] = url
}
func (c *citationLinkCollector) addOrdered(url string) {

View File

@@ -150,6 +150,22 @@ func TestCollectStreamExtractsCitationLinksForSequentialZeroBasedIndices(t *test
}
}
func TestCollectStreamExtractsCitationLinksForOneBasedIndices(t *testing.T) {
resp := makeHTTPResponse(
"data: {\"p\":\"response/fragments/-1/results\",\"v\":[{\"url\":\"https://example.com/a\",\"cite_index\":1},{\"url\":\"https://example.com/b\",\"cite_index\":2}]}\n" +
"data: {\"p\":\"response/content\",\"v\":\"结论[citation:1][citation:2]\"}\n" +
"data: [DONE]\n",
)
result := CollectStream(resp, false, false)
if got := result.CitationLinks[1]; got != "https://example.com/a" {
t.Fatalf("expected citation 1 link, got %q", got)
}
if got := result.CitationLinks[2]; got != "https://example.com/b" {
t.Fatalf("expected citation 2 link, got %q", got)
}
}
func TestCollectStreamMultipleThinkingChunks(t *testing.T) {
resp := makeHTTPResponse(
"data: {\"p\":\"response/thinking_content\",\"v\":\"part1\"}\n" +