437 lines
18 KiB
PL/PgSQL
437 lines
18 KiB
PL/PgSQL
-- Generic Source Scan Failures Tracking System
|
|
-- This migration creates a comprehensive failure tracking system for all source types (WebDAV, S3, Local Filesystem)
|
|
|
|
-- Create enum for generic source types
|
|
-- Use DO block to handle existing type gracefully
|
|
DO $$ BEGIN
|
|
CREATE TYPE source_error_source_type AS ENUM (
|
|
'webdav', -- WebDAV/CalDAV servers
|
|
's3', -- S3-compatible object storage
|
|
'local', -- Local filesystem folders
|
|
'dropbox', -- Future: Dropbox integration
|
|
'gdrive', -- Future: Google Drive integration
|
|
'onedrive' -- Future: OneDrive integration
|
|
);
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN null;
|
|
END $$;
|
|
|
|
-- Create enum for generic error types
|
|
DO $$ BEGIN
|
|
CREATE TYPE source_error_type AS ENUM (
|
|
'timeout', -- Request or operation took too long
|
|
'permission_denied', -- Access denied or authentication failure
|
|
'network_error', -- Network connectivity issues
|
|
'server_error', -- Server returned error (404, 500, etc.)
|
|
'path_too_long', -- Path exceeds filesystem or protocol limits
|
|
'invalid_characters', -- Invalid characters in path/filename
|
|
'too_many_items', -- Directory has too many items
|
|
'depth_limit', -- Directory depth exceeds limit
|
|
'size_limit', -- File or directory size exceeds limit
|
|
'xml_parse_error', -- Malformed XML response (WebDAV specific)
|
|
'json_parse_error', -- Malformed JSON response (S3/API specific)
|
|
'quota_exceeded', -- Storage quota exceeded
|
|
'rate_limited', -- API rate limit exceeded
|
|
'not_found', -- Resource not found
|
|
'conflict', -- Conflict with existing resource
|
|
'unsupported_operation', -- Operation not supported by source
|
|
'unknown' -- Unknown error type
|
|
);
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN null;
|
|
END $$;
|
|
|
|
-- Create enum for error severity levels
|
|
DO $$ BEGIN
|
|
CREATE TYPE source_error_severity AS ENUM (
|
|
'low', -- Can be retried, likely temporary (network issues)
|
|
'medium', -- May succeed with adjustments (timeouts, server errors)
|
|
'high', -- Unlikely to succeed without intervention (permissions, too many items)
|
|
'critical' -- Will never succeed, permanent issue (path too long, invalid characters)
|
|
);
|
|
EXCEPTION
|
|
WHEN duplicate_object THEN null;
|
|
END $$;
|
|
|
|
-- Main table for tracking scan failures across all source types
|
|
CREATE TABLE IF NOT EXISTS source_scan_failures (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
source_type source_error_source_type NOT NULL,
|
|
source_id UUID REFERENCES sources(id) ON DELETE CASCADE, -- Links to specific source configuration
|
|
resource_path TEXT NOT NULL, -- Path/key/identifier within the source
|
|
|
|
-- Failure classification
|
|
error_type source_error_type NOT NULL DEFAULT 'unknown',
|
|
error_severity source_error_severity NOT NULL DEFAULT 'medium',
|
|
failure_count INTEGER NOT NULL DEFAULT 1,
|
|
consecutive_failures INTEGER NOT NULL DEFAULT 1,
|
|
|
|
-- Timestamps
|
|
first_failure_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
|
last_failure_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
|
last_retry_at TIMESTAMP WITH TIME ZONE,
|
|
next_retry_at TIMESTAMP WITH TIME ZONE,
|
|
|
|
-- Error details
|
|
error_message TEXT,
|
|
error_code TEXT, -- System/API specific error codes
|
|
http_status_code INTEGER, -- HTTP status codes where applicable
|
|
|
|
-- Performance metrics
|
|
response_time_ms INTEGER, -- How long the request took
|
|
response_size_bytes BIGINT, -- Size of response (for timeout diagnosis)
|
|
|
|
-- Resource characteristics
|
|
resource_size_bytes BIGINT, -- Size of the resource that failed
|
|
resource_depth INTEGER, -- Depth in hierarchy (for nested resources)
|
|
estimated_item_count INTEGER, -- Estimated number of items in directory
|
|
|
|
-- Source-specific diagnostic data (flexible JSON field)
|
|
diagnostic_data JSONB DEFAULT '{}',
|
|
|
|
-- User actions
|
|
user_excluded BOOLEAN DEFAULT FALSE, -- User marked as permanently excluded
|
|
user_notes TEXT, -- User-provided notes about the issue
|
|
|
|
-- Retry strategy configuration
|
|
retry_strategy TEXT DEFAULT 'exponential', -- Strategy: exponential, linear, fixed
|
|
max_retries INTEGER DEFAULT 5, -- Maximum number of retries
|
|
retry_delay_seconds INTEGER DEFAULT 300, -- Base delay between retries
|
|
|
|
-- Resolution tracking
|
|
resolved BOOLEAN DEFAULT FALSE,
|
|
resolved_at TIMESTAMP WITH TIME ZONE,
|
|
resolution_method TEXT, -- How it was resolved (automatic, manual, etc.)
|
|
resolution_notes TEXT, -- Additional notes about resolution
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
-- Unique constraint to prevent duplicates per source
|
|
CONSTRAINT unique_source_resource_failure UNIQUE (user_id, source_type, source_id, resource_path)
|
|
);
|
|
|
|
-- Create indexes for efficient querying
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_user_id ON source_scan_failures(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_source_type ON source_scan_failures(source_type);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_source_id ON source_scan_failures(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_error_type ON source_scan_failures(error_type);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_error_severity ON source_scan_failures(error_severity);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_resolved ON source_scan_failures(resolved);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_next_retry ON source_scan_failures(next_retry_at) WHERE NOT resolved AND NOT user_excluded;
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_resource_path ON source_scan_failures(resource_path);
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_composite_active ON source_scan_failures(user_id, source_type, resolved, user_excluded) WHERE NOT resolved;
|
|
|
|
-- GIN index for flexible JSON diagnostic data queries
|
|
CREATE INDEX IF NOT EXISTS idx_source_scan_failures_diagnostic_data ON source_scan_failures USING GIN (diagnostic_data);
|
|
|
|
-- Function to calculate next retry time with configurable backoff strategies
|
|
CREATE OR REPLACE FUNCTION calculate_source_retry_time(
|
|
failure_count INTEGER,
|
|
retry_strategy TEXT,
|
|
base_delay_seconds INTEGER,
|
|
max_delay_seconds INTEGER DEFAULT 86400 -- 24 hours max
|
|
) RETURNS TIMESTAMP WITH TIME ZONE AS $$
|
|
DECLARE
|
|
delay_seconds INTEGER;
|
|
BEGIN
|
|
CASE retry_strategy
|
|
WHEN 'exponential' THEN
|
|
-- Exponential backoff: delay = base * 2^(failure_count - 1)
|
|
delay_seconds := LEAST(
|
|
base_delay_seconds * POWER(2, LEAST(failure_count - 1, 10)),
|
|
max_delay_seconds
|
|
);
|
|
WHEN 'linear' THEN
|
|
-- Linear backoff: delay = base * failure_count
|
|
delay_seconds := LEAST(
|
|
base_delay_seconds * failure_count,
|
|
max_delay_seconds
|
|
);
|
|
WHEN 'fixed' THEN
|
|
-- Fixed delay
|
|
delay_seconds := base_delay_seconds;
|
|
ELSE
|
|
-- Default to exponential
|
|
delay_seconds := LEAST(
|
|
base_delay_seconds * POWER(2, LEAST(failure_count - 1, 10)),
|
|
max_delay_seconds
|
|
);
|
|
END CASE;
|
|
|
|
RETURN NOW() + (delay_seconds || ' seconds')::INTERVAL;
|
|
END;
|
|
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
|
|
-- Function to automatically determine error severity based on error type and context
|
|
CREATE OR REPLACE FUNCTION classify_error_severity(
|
|
p_error_type source_error_type,
|
|
p_http_status_code INTEGER DEFAULT NULL,
|
|
p_failure_count INTEGER DEFAULT 1,
|
|
p_error_message TEXT DEFAULT NULL
|
|
) RETURNS source_error_severity AS $$
|
|
BEGIN
|
|
CASE p_error_type
|
|
-- Critical errors that won't resolve automatically
|
|
WHEN 'path_too_long', 'invalid_characters' THEN
|
|
RETURN 'critical'::source_error_severity;
|
|
|
|
-- High severity errors requiring intervention
|
|
WHEN 'permission_denied', 'quota_exceeded', 'too_many_items', 'depth_limit', 'size_limit' THEN
|
|
RETURN 'high'::source_error_severity;
|
|
|
|
-- Context-dependent severity
|
|
WHEN 'server_error' THEN
|
|
IF p_http_status_code = 404 THEN
|
|
RETURN 'critical'::source_error_severity; -- Resource doesn't exist
|
|
ELSIF p_http_status_code >= 500 THEN
|
|
RETURN 'medium'::source_error_severity; -- Server issues, may recover
|
|
ELSE
|
|
RETURN 'medium'::source_error_severity;
|
|
END IF;
|
|
|
|
WHEN 'not_found' THEN
|
|
RETURN 'critical'::source_error_severity;
|
|
|
|
WHEN 'timeout' THEN
|
|
-- Repeated timeouts indicate systemic issues
|
|
IF p_failure_count > 5 THEN
|
|
RETURN 'high'::source_error_severity;
|
|
ELSE
|
|
RETURN 'medium'::source_error_severity;
|
|
END IF;
|
|
|
|
-- Low severity, likely temporary
|
|
WHEN 'network_error', 'rate_limited' THEN
|
|
RETURN 'low'::source_error_severity;
|
|
|
|
-- Medium severity by default
|
|
ELSE
|
|
RETURN 'medium'::source_error_severity;
|
|
END CASE;
|
|
END;
|
|
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
|
|
-- Function to record or update a source scan failure
|
|
CREATE OR REPLACE FUNCTION record_source_scan_failure(
|
|
p_user_id UUID,
|
|
p_source_type source_error_source_type,
|
|
p_source_id UUID,
|
|
p_resource_path TEXT,
|
|
p_error_type source_error_type,
|
|
p_error_message TEXT,
|
|
p_error_code TEXT DEFAULT NULL,
|
|
p_http_status_code INTEGER DEFAULT NULL,
|
|
p_response_time_ms INTEGER DEFAULT NULL,
|
|
p_response_size_bytes BIGINT DEFAULT NULL,
|
|
p_resource_size_bytes BIGINT DEFAULT NULL,
|
|
p_diagnostic_data JSONB DEFAULT NULL
|
|
) RETURNS UUID AS $$
|
|
DECLARE
|
|
v_failure_id UUID;
|
|
v_existing_count INTEGER DEFAULT 0;
|
|
v_severity source_error_severity;
|
|
v_retry_strategy TEXT DEFAULT 'exponential';
|
|
v_base_delay INTEGER DEFAULT 300;
|
|
BEGIN
|
|
-- Determine severity based on error type and context
|
|
v_severity := classify_error_severity(p_error_type, p_http_status_code, 1, p_error_message);
|
|
|
|
-- Adjust retry strategy based on error type
|
|
CASE p_error_type
|
|
WHEN 'rate_limited' THEN
|
|
v_retry_strategy := 'linear';
|
|
v_base_delay := 600; -- 10 minutes for rate limiting
|
|
WHEN 'network_error' THEN
|
|
v_retry_strategy := 'exponential';
|
|
v_base_delay := 60; -- 1 minute for network issues
|
|
WHEN 'timeout' THEN
|
|
v_retry_strategy := 'exponential';
|
|
v_base_delay := 900; -- 15 minutes for timeouts
|
|
ELSE
|
|
v_retry_strategy := 'exponential';
|
|
v_base_delay := 300; -- 5 minutes default
|
|
END CASE;
|
|
|
|
-- Insert or update the failure record
|
|
INSERT INTO source_scan_failures (
|
|
user_id,
|
|
source_type,
|
|
source_id,
|
|
resource_path,
|
|
error_type,
|
|
error_severity,
|
|
failure_count,
|
|
consecutive_failures,
|
|
error_message,
|
|
error_code,
|
|
http_status_code,
|
|
response_time_ms,
|
|
response_size_bytes,
|
|
resource_size_bytes,
|
|
resource_depth,
|
|
estimated_item_count,
|
|
diagnostic_data,
|
|
retry_strategy,
|
|
retry_delay_seconds,
|
|
next_retry_at
|
|
) VALUES (
|
|
p_user_id,
|
|
p_source_type,
|
|
p_source_id,
|
|
p_resource_path,
|
|
p_error_type,
|
|
v_severity,
|
|
1,
|
|
1,
|
|
p_error_message,
|
|
p_error_code,
|
|
p_http_status_code,
|
|
p_response_time_ms,
|
|
p_response_size_bytes,
|
|
p_resource_size_bytes,
|
|
array_length(string_to_array(p_resource_path, '/'), 1) - 1,
|
|
NULL, -- Will be filled in by source-specific logic
|
|
COALESCE(p_diagnostic_data, '{}'::jsonb),
|
|
v_retry_strategy,
|
|
v_base_delay,
|
|
calculate_source_retry_time(1, v_retry_strategy, v_base_delay, 86400)
|
|
)
|
|
ON CONFLICT (user_id, source_type, source_id, resource_path) DO UPDATE SET
|
|
error_type = EXCLUDED.error_type,
|
|
error_severity = classify_error_severity(EXCLUDED.error_type, EXCLUDED.http_status_code, source_scan_failures.failure_count + 1, EXCLUDED.error_message),
|
|
failure_count = source_scan_failures.failure_count + 1,
|
|
consecutive_failures = source_scan_failures.consecutive_failures + 1,
|
|
last_failure_at = NOW(),
|
|
error_message = EXCLUDED.error_message,
|
|
error_code = EXCLUDED.error_code,
|
|
http_status_code = EXCLUDED.http_status_code,
|
|
response_time_ms = EXCLUDED.response_time_ms,
|
|
response_size_bytes = EXCLUDED.response_size_bytes,
|
|
resource_size_bytes = EXCLUDED.resource_size_bytes,
|
|
diagnostic_data = COALESCE(EXCLUDED.diagnostic_data, source_scan_failures.diagnostic_data),
|
|
next_retry_at = calculate_source_retry_time(
|
|
source_scan_failures.failure_count + 1,
|
|
source_scan_failures.retry_strategy,
|
|
source_scan_failures.retry_delay_seconds,
|
|
86400
|
|
),
|
|
resolved = FALSE,
|
|
updated_at = NOW()
|
|
RETURNING id INTO v_failure_id;
|
|
|
|
RETURN v_failure_id;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to reset a failure for retry
|
|
CREATE OR REPLACE FUNCTION reset_source_scan_failure(
|
|
p_user_id UUID,
|
|
p_source_type source_error_source_type,
|
|
p_source_id UUID,
|
|
p_resource_path TEXT
|
|
) RETURNS BOOLEAN AS $$
|
|
DECLARE
|
|
v_updated INTEGER;
|
|
BEGIN
|
|
UPDATE source_scan_failures
|
|
SET
|
|
consecutive_failures = 0,
|
|
last_retry_at = NOW(),
|
|
next_retry_at = NOW(), -- Retry immediately
|
|
resolved = FALSE,
|
|
user_excluded = FALSE,
|
|
updated_at = NOW()
|
|
WHERE user_id = p_user_id
|
|
AND source_type = p_source_type
|
|
AND (source_id = p_source_id OR (source_id IS NULL AND p_source_id IS NULL))
|
|
AND resource_path = p_resource_path
|
|
AND NOT resolved;
|
|
|
|
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
|
RETURN v_updated > 0;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to mark a failure as resolved
|
|
CREATE OR REPLACE FUNCTION resolve_source_scan_failure(
|
|
p_user_id UUID,
|
|
p_source_type source_error_source_type,
|
|
p_source_id UUID,
|
|
p_resource_path TEXT,
|
|
p_resolution_method TEXT DEFAULT 'automatic'
|
|
) RETURNS BOOLEAN AS $$
|
|
DECLARE
|
|
v_updated INTEGER;
|
|
BEGIN
|
|
UPDATE source_scan_failures
|
|
SET
|
|
resolved = TRUE,
|
|
resolved_at = NOW(),
|
|
resolution_method = p_resolution_method,
|
|
consecutive_failures = 0,
|
|
updated_at = NOW()
|
|
WHERE user_id = p_user_id
|
|
AND source_type = p_source_type
|
|
AND (source_id = p_source_id OR (source_id IS NULL AND p_source_id IS NULL))
|
|
AND resource_path = p_resource_path
|
|
AND NOT resolved;
|
|
|
|
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
|
RETURN v_updated > 0;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- View for active failures that need attention across all source types
|
|
CREATE OR REPLACE VIEW active_source_scan_failures AS
|
|
SELECT
|
|
ssf.*,
|
|
u.username,
|
|
u.email,
|
|
s.name as source_name,
|
|
s.source_type as configured_source_type,
|
|
CASE
|
|
WHEN ssf.failure_count > 20 THEN 'chronic'
|
|
WHEN ssf.failure_count > 10 THEN 'persistent'
|
|
WHEN ssf.failure_count > 3 THEN 'recurring'
|
|
ELSE 'recent'
|
|
END as failure_status,
|
|
CASE
|
|
WHEN ssf.next_retry_at < NOW() AND NOT ssf.user_excluded AND NOT ssf.resolved THEN 'ready_for_retry'
|
|
WHEN ssf.user_excluded THEN 'excluded'
|
|
WHEN ssf.error_severity = 'critical' THEN 'needs_intervention'
|
|
WHEN ssf.resolved THEN 'resolved'
|
|
ELSE 'scheduled'
|
|
END as action_status
|
|
FROM source_scan_failures ssf
|
|
JOIN users u ON ssf.user_id = u.id
|
|
LEFT JOIN sources s ON ssf.source_id = s.id
|
|
WHERE NOT ssf.resolved;
|
|
|
|
-- Trigger to update the updated_at timestamp
|
|
CREATE OR REPLACE FUNCTION update_source_scan_failures_updated_at()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = NOW();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
DROP TRIGGER IF EXISTS update_source_scan_failures_updated_at ON source_scan_failures;
|
|
CREATE TRIGGER update_source_scan_failures_updated_at
|
|
BEFORE UPDATE ON source_scan_failures
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_source_scan_failures_updated_at();
|
|
|
|
-- Comments for documentation
|
|
COMMENT ON TABLE source_scan_failures IS 'Generic failure tracking for all source types (WebDAV, S3, Local, etc.) with detailed diagnostics and configurable retry strategies';
|
|
COMMENT ON COLUMN source_scan_failures.source_type IS 'Type of source (webdav, s3, local, etc.)';
|
|
COMMENT ON COLUMN source_scan_failures.source_id IS 'Reference to the specific source configuration (nullable for backward compatibility)';
|
|
COMMENT ON COLUMN source_scan_failures.resource_path IS 'Path/key/identifier of the resource that failed (directory, file, or object key)';
|
|
COMMENT ON COLUMN source_scan_failures.error_type IS 'Categorized type of error for analysis and handling across all source types';
|
|
COMMENT ON COLUMN source_scan_failures.error_severity IS 'Severity level determining retry strategy and user notification priority';
|
|
COMMENT ON COLUMN source_scan_failures.diagnostic_data IS 'Flexible JSONB field for storing source-specific diagnostic information';
|
|
COMMENT ON COLUMN source_scan_failures.retry_strategy IS 'Retry strategy: exponential, linear, or fixed delay';
|
|
COMMENT ON COLUMN source_scan_failures.user_excluded IS 'User has marked this resource to be permanently excluded from scanning';
|
|
COMMENT ON COLUMN source_scan_failures.consecutive_failures IS 'Number of consecutive failures without a successful scan'; |