From aff7b907c7afed38675ab0ef19facf1687b70836 Mon Sep 17 00:00:00 2001 From: perf3ct Date: Tue, 29 Jul 2025 21:27:54 +0000 Subject: [PATCH] fix(db): backfill data for sources given missing counts --- ...0729000001_backfill_document_source_id.sql | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 migrations/20250729000001_backfill_document_source_id.sql diff --git a/migrations/20250729000001_backfill_document_source_id.sql b/migrations/20250729000001_backfill_document_source_id.sql new file mode 100644 index 0000000..457b97c --- /dev/null +++ b/migrations/20250729000001_backfill_document_source_id.sql @@ -0,0 +1,27 @@ +-- Backfill source_id and source_type for existing documents +-- This migration fixes documents that were ingested before proper source tracking was implemented + +-- Update documents that have WebDAV source paths but missing source_id +-- Link them to existing WebDAV sources based on user ownership +UPDATE documents +SET + source_id = sources.id, + source_type = 'webdav' +FROM sources +WHERE + documents.user_id = sources.user_id + AND sources.source_type = 'webdav' + AND documents.source_metadata->>'source_path' IS NOT NULL + AND documents.source_id IS NULL; + +-- Update documents that have source paths but no source_metadata, likely from older ingestion +-- This handles edge cases where source_path is populated but source_type is not +UPDATE documents +SET source_type = 'webdav' +WHERE + source_path IS NOT NULL + AND source_type IS NULL + AND source_id IN (SELECT id FROM sources WHERE source_type = 'webdav'); + +-- Add helpful comment explaining the backfill +COMMENT ON COLUMN documents.source_id IS 'References the source that this document was ingested from. Backfilled for existing documents on 2025-07-29.'; \ No newline at end of file