package rawsample import ( "encoding/json" "errors" "fmt" "os" "path/filepath" "regexp" "strings" "time" "github.com/google/uuid" ) var referenceMarkerRe = regexp.MustCompile(`(?i)\[reference:\s*\d+\]`) type CaptureRound struct { Label string `json:"label,omitempty"` URL string `json:"url,omitempty"` StatusCode int `json:"status_code"` ResponseBytes int `json:"response_bytes"` } type CaptureSummary struct { Label string `json:"label,omitempty"` URL string `json:"url,omitempty"` StatusCode int `json:"status_code"` ResponseBytes int `json:"response_bytes"` Rounds []CaptureRound `json:"rounds,omitempty"` ContainsReferenceMarkers bool `json:"contains_reference_markers,omitempty"` ReferenceMarkerCount int `json:"reference_marker_count,omitempty"` ContainsFinishedToken bool `json:"contains_finished_token,omitempty"` FinishedTokenCount int `json:"finished_token_count,omitempty"` } type Meta struct { SampleID string `json:"sample_id"` CapturedAtUTC string `json:"captured_at_utc"` Source string `json:"source,omitempty"` Request any `json:"request"` Capture CaptureSummary `json:"capture"` } type PersistOptions struct { RootDir string SampleID string Source string Request any Capture CaptureSummary UpstreamBody []byte } type SavedSample struct { SampleID string Dir string MetaPath string UpstreamPath string Meta Meta } func Persist(opts PersistOptions) (SavedSample, error) { root := strings.TrimSpace(opts.RootDir) if root == "" { return SavedSample{}, errors.New("root dir is required") } if len(opts.UpstreamBody) == 0 { return SavedSample{}, errors.New("upstream body is required") } if err := os.MkdirAll(root, 0o755); err != nil { return SavedSample{}, fmt.Errorf("create root dir: %w", err) } baseID := NormalizeSampleID(opts.SampleID) if baseID == "" { baseID = DefaultSampleID("capture") } sampleID, err := uniqueSampleID(root, baseID) if err != nil { return SavedSample{}, err } tempID := ".tmp-" + sampleID + "-" + strings.ToLower(strings.ReplaceAll(uuid.NewString(), "-", "")) tempDir := filepath.Join(root, tempID) finalDir := filepath.Join(root, sampleID) if err := os.MkdirAll(tempDir, 0o755); err != nil { return SavedSample{}, fmt.Errorf("create temp dir: %w", err) } cleanup := func() { _ = os.RemoveAll(tempDir) } upstreamPath := filepath.Join(tempDir, "upstream.stream.sse") if err := os.WriteFile(upstreamPath, opts.UpstreamBody, 0o644); err != nil { cleanup() return SavedSample{}, fmt.Errorf("write upstream stream: %w", err) } now := time.Now().UTC() capture := opts.Capture capture.ResponseBytes = len(opts.UpstreamBody) capture.ContainsReferenceMarkers, capture.ReferenceMarkerCount, capture.ContainsFinishedToken, capture.FinishedTokenCount = analyzeBytes(opts.UpstreamBody) meta := Meta{ SampleID: sampleID, CapturedAtUTC: now.Format(time.RFC3339), Source: strings.TrimSpace(opts.Source), Request: opts.Request, Capture: capture, } metaBytes, err := json.MarshalIndent(meta, "", " ") if err != nil { cleanup() return SavedSample{}, fmt.Errorf("marshal meta: %w", err) } metaPath := filepath.Join(tempDir, "meta.json") if err := os.WriteFile(metaPath, append(metaBytes, '\n'), 0o644); err != nil { cleanup() return SavedSample{}, fmt.Errorf("write meta: %w", err) } if err := os.Rename(tempDir, finalDir); err != nil { cleanup() return SavedSample{}, fmt.Errorf("promote sample dir: %w", err) } return SavedSample{ SampleID: sampleID, Dir: finalDir, MetaPath: filepath.Join(finalDir, "meta.json"), UpstreamPath: filepath.Join(finalDir, "upstream.stream.sse"), Meta: meta, }, nil } func NormalizeSampleID(raw string) string { raw = strings.TrimSpace(strings.ToLower(raw)) if raw == "" { return "" } var b strings.Builder prevDash := false for _, r := range raw { switch { case r >= 'a' && r <= 'z', r >= '0' && r <= '9', r == '-', r == '_', r == '.': b.WriteRune(r) prevDash = false default: if !prevDash { b.WriteRune('-') prevDash = true } } } out := strings.Trim(b.String(), "-_.") if out == "" { return "" } return out } func DefaultSampleID(prefix string) string { prefix = NormalizeSampleID(prefix) if prefix == "" { prefix = "capture" } return fmt.Sprintf("%s-%s", prefix, time.Now().UTC().Format("20060102T150405Z")) } func uniqueSampleID(root, base string) (string, error) { if base == "" { base = DefaultSampleID("capture") } candidate := base for i := 2; ; i++ { finalDir := filepath.Join(root, candidate) if _, err := os.Stat(finalDir); err != nil { if os.IsNotExist(err) { return candidate, nil } return "", fmt.Errorf("stat sample dir: %w", err) } candidate = fmt.Sprintf("%s-%d", base, i) } } func analyzeBytes(raw []byte) (containsReferenceMarkers bool, referenceMarkerCount int, containsFinishedToken bool, finishedTokenCount int) { if len(raw) == 0 { return false, 0, false, 0 } text := string(raw) referenceMarkerCount = len(referenceMarkerRe.FindAllStringIndex(text, -1)) containsReferenceMarkers = referenceMarkerCount > 0 upper := strings.ToUpper(text) finishedTokenCount = strings.Count(upper, "FINISHED") containsFinishedToken = finishedTokenCount > 0 return }