fix(tests): resolve a whole lot of test issues

This commit is contained in:
perf3ct 2025-06-28 22:50:40 +00:00
parent edd0c7514f
commit fbf89c213d
5 changed files with 615 additions and 55 deletions

View File

@ -7,6 +7,9 @@ const mockList = vi.fn();
const mockUpload = vi.fn();
const mockDownload = vi.fn();
const mockDeleteLowConfidence = vi.fn();
const mockGetFailedOcrDocuments = vi.fn();
const mockGetFailedDocuments = vi.fn();
const mockRetryOcr = vi.fn();
// Mock the entire api module
vi.mock('../api', async () => {
@ -19,6 +22,9 @@ vi.mock('../api', async () => {
upload: mockUpload,
download: mockDownload,
deleteLowConfidence: mockDeleteLowConfidence,
getFailedOcrDocuments: mockGetFailedOcrDocuments,
getFailedDocuments: mockGetFailedDocuments,
retryOcr: mockRetryOcr,
},
};
});
@ -490,4 +496,274 @@ describe('documentService.deleteLowConfidence', () => {
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(confidence, true);
}
});
});
describe('documentService.getFailedOcrDocuments', () => {
const mockFailedOcrResponse = {
documents: [
{
id: 'doc-1',
filename: 'failed_doc1.pdf',
failure_reason: 'low_ocr_confidence',
failure_stage: 'ocr',
created_at: '2024-01-01T10:00:00Z',
retry_count: 1
},
{
id: 'doc-2',
filename: 'failed_doc2.pdf',
failure_reason: 'pdf_parsing_error',
failure_stage: 'ocr',
created_at: '2024-01-01T11:00:00Z',
retry_count: 0
}
],
pagination: {
total: 2,
limit: 50,
offset: 0,
has_more: false
},
statistics: {
total_failed: 2,
failure_categories: [
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
{ reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
]
}
};
it('should fetch failed OCR documents successfully', async () => {
const mockResponse = {
data: mockFailedOcrResponse,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockGetFailedOcrDocuments.mockResolvedValue(mockResponse);
const result = await documentService.getFailedOcrDocuments(50, 0);
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
expect(result.data).toEqual(mockFailedOcrResponse);
expect(result.data.documents).toHaveLength(2);
expect(result.data.documents[0].failure_stage).toBe('ocr');
expect(result.data.pagination.total).toBe(2);
});
it('should handle pagination parameters correctly', async () => {
mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
await documentService.getFailedOcrDocuments(25, 10);
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
});
it('should use default pagination when not specified', async () => {
mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
await documentService.getFailedOcrDocuments();
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith();
});
it('should handle empty results', async () => {
const emptyResponse = {
documents: [],
pagination: { total: 0, limit: 50, offset: 0, has_more: false },
statistics: { total_failed: 0, failure_categories: [] }
};
mockGetFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });
const result = await documentService.getFailedOcrDocuments();
expect(result.data.documents).toHaveLength(0);
expect(result.data.pagination.total).toBe(0);
expect(result.data.statistics.total_failed).toBe(0);
});
it('should handle API errors', async () => {
const mockError = new Error('Network error');
mockGetFailedOcrDocuments.mockRejectedValue(mockError);
await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
});
});
describe('documentService.getFailedDocuments', () => {
const mockFailedDocumentsResponse = {
documents: [
{
id: 'doc-1',
filename: 'failed_doc1.pdf',
failure_reason: 'low_ocr_confidence',
failure_stage: 'ocr',
created_at: '2024-01-01T10:00:00Z',
retry_count: 1
},
{
id: 'doc-2',
filename: 'duplicate_doc.pdf',
failure_reason: 'duplicate_content',
failure_stage: 'ingestion',
created_at: '2024-01-01T11:00:00Z',
retry_count: 0
},
{
id: 'doc-3',
filename: 'large_file.pdf',
failure_reason: 'file_too_large',
failure_stage: 'validation',
created_at: '2024-01-01T12:00:00Z',
retry_count: 2
}
],
pagination: {
total: 3,
limit: 25,
offset: 0,
has_more: false
},
statistics: {
total_failed: 3,
failure_categories: [
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
{ reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
]
}
};
it('should fetch failed documents with default parameters', async () => {
mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
const result = await documentService.getFailedDocuments();
expect(mockGetFailedDocuments).toHaveBeenCalledWith();
expect(result.data).toEqual(mockFailedDocumentsResponse);
expect(result.data.documents).toHaveLength(3);
});
it('should filter by stage parameter', async () => {
const ocrOnlyResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
};
mockGetFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });
const result = await documentService.getFailedDocuments(25, 0, 'ocr');
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_stage).toBe('ocr');
});
it('should filter by reason parameter', async () => {
const duplicateOnlyResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
};
mockGetFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });
const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
});
it('should filter by both stage and reason', async () => {
const filteredResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
};
mockGetFailedDocuments.mockResolvedValue({ data: filteredResponse });
const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_stage).toBe('ocr');
expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
});
it('should handle custom pagination', async () => {
mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
await documentService.getFailedDocuments(10, 20);
expect(mockGetFailedDocuments).toHaveBeenCalledWith(10, 20);
});
it('should handle empty results', async () => {
const emptyResponse = {
documents: [],
pagination: { total: 0, limit: 25, offset: 0, has_more: false },
statistics: { total_failed: 0, failure_categories: [] }
};
mockGetFailedDocuments.mockResolvedValue({ data: emptyResponse });
const result = await documentService.getFailedDocuments();
expect(result.data.documents).toHaveLength(0);
expect(result.data.statistics.total_failed).toBe(0);
});
});
describe('documentService.retryOcr', () => {
it('should retry OCR for a document successfully', async () => {
const mockRetryResponse = {
data: {
success: true,
message: 'OCR retry queued successfully',
document_id: 'doc-123'
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockRetryOcr.mockResolvedValue(mockRetryResponse);
const result = await documentService.retryOcr('doc-123');
expect(mockRetryOcr).toHaveBeenCalledWith('doc-123');
expect(result.data.success).toBe(true);
expect(result.data.document_id).toBe('doc-123');
});
it('should handle retry errors', async () => {
const mockError = {
response: {
status: 404,
data: { error: 'Document not found' }
}
};
mockRetryOcr.mockRejectedValue(mockError);
await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
response: { status: 404 }
});
});
it('should handle network errors', async () => {
mockRetryOcr.mockRejectedValue(new Error('Network error'));
await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
});
});

View File

@ -0,0 +1,283 @@
use crate::db::constraint_validation::ConstraintValidator;
/// Simple unit tests for failed_documents functionality
/// These tests focus on business logic and constraint validation
/// without requiring live database connections during compilation
#[cfg(test)]
mod failed_documents_unit_tests {
use super::*;
#[test]
fn test_constraint_validator_failure_reasons() {
// Test all valid failure reasons
let valid_reasons = [
"duplicate_content", "duplicate_filename", "unsupported_format",
"file_too_large", "file_corrupted", "access_denied",
"low_ocr_confidence", "ocr_timeout", "ocr_memory_limit",
"pdf_parsing_error", "storage_quota_exceeded", "network_error",
"permission_denied", "virus_detected", "invalid_structure",
"policy_violation", "other"
];
for reason in valid_reasons {
assert!(
ConstraintValidator::validate_failure_reason(reason).is_ok(),
"Expected '{}' to be valid",
reason
);
}
// Test invalid failure reasons
let invalid_reasons = [
"invalid_reason", "unknown", "timeout", "migration_completed",
"", "random_text", "failure", "error"
];
for reason in invalid_reasons {
assert!(
ConstraintValidator::validate_failure_reason(reason).is_err(),
"Expected '{}' to be invalid",
reason
);
}
}
#[test]
fn test_constraint_validator_failure_stages() {
// Test all valid failure stages
let valid_stages = [
"ingestion", "validation", "ocr", "storage", "processing", "sync"
];
for stage in valid_stages {
assert!(
ConstraintValidator::validate_failure_stage(stage).is_ok(),
"Expected '{}' to be valid",
stage
);
}
// Test invalid failure stages
let invalid_stages = [
"invalid_stage", "unknown", "failed", "error", "", "random_text"
];
for stage in invalid_stages {
assert!(
ConstraintValidator::validate_failure_stage(stage).is_err(),
"Expected '{}' to be invalid",
stage
);
}
}
#[test]
fn test_legacy_ocr_failure_mapping() {
let test_cases = [
(Some("low_ocr_confidence"), "low_ocr_confidence"),
(Some("timeout"), "ocr_timeout"),
(Some("memory_limit"), "ocr_memory_limit"),
(Some("pdf_parsing_error"), "pdf_parsing_error"),
(Some("corrupted"), "file_corrupted"),
(Some("file_corrupted"), "file_corrupted"),
(Some("unsupported_format"), "unsupported_format"),
(Some("access_denied"), "access_denied"),
(Some("unknown"), "other"),
(None, "other"),
(Some("unmapped_value"), "other"),
(Some(""), "other"),
];
for (input, expected) in test_cases {
let result = ConstraintValidator::map_legacy_ocr_failure_reason(input);
assert_eq!(
result, expected,
"Failed for input: {:?}. Expected '{}', got '{}'",
input, expected, result
);
}
}
#[test]
fn test_mapped_legacy_values_are_valid() {
// Ensure all mapped legacy values are actually valid according to our constraints
let legacy_values = [
Some("low_ocr_confidence"),
Some("timeout"),
Some("memory_limit"),
Some("pdf_parsing_error"),
Some("corrupted"),
Some("file_corrupted"),
Some("unsupported_format"),
Some("access_denied"),
Some("unknown"),
None,
Some("random_unmapped_value"),
];
for legacy_value in legacy_values {
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value);
assert!(
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
"Mapped value '{}' from legacy '{:?}' should be valid",
mapped, legacy_value
);
}
}
#[test]
fn test_batch_validation() {
// Test valid batch
let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"];
assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok());
// Test invalid batch
let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"];
let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch);
assert!(result.is_err());
let errors = result.unwrap_err();
assert_eq!(errors.len(), 2); // Should have 2 invalid reasons
assert!(errors.iter().any(|e| e.contains("invalid_reason")));
assert!(errors.iter().any(|e| e.contains("timeout")));
// Test empty batch
let empty_batch: &[&str] = &[];
assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok());
}
#[test]
fn test_constraint_error_messages() {
let result = ConstraintValidator::validate_failure_reason("invalid_reason");
assert!(result.is_err());
let error_msg = result.unwrap_err();
assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'"));
assert!(error_msg.contains("Valid values are:"));
assert!(error_msg.contains("low_ocr_confidence"));
assert!(error_msg.contains("other"));
let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage");
assert!(stage_result.is_err());
let stage_error = stage_result.unwrap_err();
assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'"));
assert!(stage_error.contains("Valid values are:"));
assert!(stage_error.contains("ingestion"));
assert!(stage_error.contains("ocr"));
}
#[test]
fn test_constraint_validation_comprehensive() {
// Test that our enum values comprehensively cover expected failure scenarios
// OCR-related failures
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok());
assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok());
assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok());
// File-related failures
assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok());
assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok());
assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok());
assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok());
// Duplicate detection
assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok());
assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok());
// System-related failures
assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok());
assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok());
assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok());
// Security-related failures
assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok());
assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok());
assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok());
// Fallback
assert!(ConstraintValidator::validate_failure_reason("other").is_ok());
}
#[test]
fn test_failure_stages_comprehensive() {
// Test that our stage enum covers the document processing pipeline
// Initial processing stages
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
assert!(ConstraintValidator::validate_failure_stage("validation").is_ok());
// Core processing stages
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
assert!(ConstraintValidator::validate_failure_stage("processing").is_ok());
// Storage and sync stages
assert!(ConstraintValidator::validate_failure_stage("storage").is_ok());
assert!(ConstraintValidator::validate_failure_stage("sync").is_ok());
}
#[test]
fn test_legacy_mapping_completeness() {
// Ensure we handle all possible legacy OCR failure reasons that could exist
let legacy_ocr_reasons = [
"low_ocr_confidence",
"timeout",
"memory_limit",
"pdf_parsing_error",
"corrupted",
"file_corrupted",
"unsupported_format",
"access_denied",
"unknown",
"some_new_unmapped_reason"
];
for legacy_reason in legacy_ocr_reasons {
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason));
// All mapped values should be valid
assert!(
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
"Legacy reason '{}' maps to '{}' which should be valid",
legacy_reason, mapped
);
// Unmapped values should fall back to "other"
if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error",
"corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"]
.contains(&legacy_reason) {
assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'");
}
}
}
#[test]
fn test_case_sensitivity() {
// Our validation should be case-sensitive
assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err());
assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err());
assert!(ConstraintValidator::validate_failure_reason("OCR").is_err());
assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err());
// Only exact lowercase matches should work
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
}
#[test]
fn test_whitespace_handling() {
// Validation should not accept values with extra whitespace
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err());
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err());
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err());
assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err());
assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err());
// Only exact matches should work
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
}
}

View File

@ -1,5 +1,4 @@
use sqlx::PgPool;
use crate::tests::helpers::setup_test_db;
#[cfg(test)]
mod migration_constraint_tests {
@ -18,17 +17,17 @@ mod migration_constraint_tests {
];
for reason in valid_reasons {
let result = sqlx::query!(
let result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
) VALUES (
gen_random_uuid(), $1, $2, 'validation', 'test'
)
"#,
format!("test_file_{}.txt", reason),
reason
"#
)
.bind(format!("test_file_{}.txt", reason))
.bind(reason)
.execute(&pool)
.await;
@ -45,17 +44,17 @@ mod migration_constraint_tests {
];
for reason in invalid_reasons {
let result = sqlx::query!(
let result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
) VALUES (
gen_random_uuid(), $1, $2, 'validation', 'test'
)
"#,
format!("test_file_{}.txt", reason),
reason
"#
)
.bind(format!("test_file_{}.txt", reason))
.bind(reason)
.execute(&pool)
.await;
@ -71,17 +70,17 @@ mod migration_constraint_tests {
];
for stage in valid_stages {
let result = sqlx::query!(
let result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
) VALUES (
gen_random_uuid(), $1, 'other', $2, 'test'
)
"#,
format!("test_file_{}.txt", stage),
stage
"#
)
.bind(format!("test_file_{}.txt", stage))
.bind(stage)
.execute(&pool)
.await;
@ -123,17 +122,17 @@ mod migration_constraint_tests {
input_reason, expected_output);
// Test that the mapped value works in the database
let result = sqlx::query!(
let result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
) VALUES (
gen_random_uuid(), $1, $2, 'ocr', 'migration'
)
"#,
format!("migration_test_{}.txt", input_reason.replace("/", "_")),
mapped_reason
"#
)
.bind(format!("migration_test_{}.txt", input_reason.replace("/", "_")))
.bind(mapped_reason)
.execute(&pool)
.await;

View File

@ -1,4 +1,4 @@
use sqlx::PgPool;
use sqlx::{PgPool, Row};
use uuid::Uuid;
#[cfg(test)]
@ -22,7 +22,7 @@ mod migration_integration_tests {
// Insert test documents
for (filename, failure_reason, error_msg) in &test_documents {
sqlx::query!(
sqlx::query(
r#"
INSERT INTO documents (
user_id, filename, original_filename, file_path, file_size,
@ -31,30 +31,29 @@ mod migration_integration_tests {
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
'failed', $3, $4
)
"#,
user_id,
filename,
*failure_reason,
error_msg
"#
)
.bind(user_id)
.bind(filename)
.bind(*failure_reason)
.bind(error_msg)
.execute(&pool)
.await
.expect("Failed to insert test document");
}
// Count documents before migration
let before_count = sqlx::query_scalar!(
let before_count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
)
.fetch_one(&pool)
.await
.expect("Failed to count documents")
.unwrap_or(0);
.expect("Failed to count documents");
assert_eq!(before_count, test_documents.len() as i64);
// Simulate the migration logic
let migration_result = sqlx::query!(
let migration_result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, original_filename, file_path, file_size,
@ -87,13 +86,12 @@ mod migration_integration_tests {
assert!(migration_result.is_ok(), "Migration should succeed");
// Verify all documents were migrated
let migrated_count = sqlx::query_scalar!(
let migrated_count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM failed_documents WHERE ingestion_source = 'migration'"
)
.fetch_one(&pool)
.await
.expect("Failed to count migrated documents")
.unwrap_or(0);
.expect("Failed to count migrated documents");
assert_eq!(migrated_count, test_documents.len() as i64);
@ -108,24 +106,24 @@ mod migration_integration_tests {
];
for (filename, expected_reason) in mapping_tests {
let actual_reason = sqlx::query_scalar!(
"SELECT failure_reason FROM failed_documents WHERE filename = $1",
filename
let actual_reason: String = sqlx::query_scalar(
"SELECT failure_reason FROM failed_documents WHERE filename = $1"
)
.bind(filename)
.fetch_one(&pool)
.await
.expect("Failed to fetch failure reason");
assert_eq!(
actual_reason.as_deref(),
Some(expected_reason),
actual_reason,
expected_reason,
"Incorrect mapping for {}",
filename
);
}
// Test deletion of original failed documents
let delete_result = sqlx::query!(
let delete_result = sqlx::query(
"DELETE FROM documents WHERE ocr_status = 'failed'"
)
.execute(&pool)
@ -134,18 +132,17 @@ mod migration_integration_tests {
assert!(delete_result.is_ok(), "Delete should succeed");
// Verify cleanup
let remaining_failed = sqlx::query_scalar!(
let remaining_failed: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
)
.fetch_one(&pool)
.await
.expect("Failed to count remaining documents")
.unwrap_or(0);
.expect("Failed to count remaining documents");
assert_eq!(remaining_failed, 0);
// Verify failed_documents table integrity
let failed_docs = sqlx::query!(
let failed_docs = sqlx::query(
"SELECT filename, failure_reason, failure_stage FROM failed_documents ORDER BY filename"
)
.fetch_all(&pool)
@ -156,11 +153,13 @@ mod migration_integration_tests {
for doc in &failed_docs {
// All should have proper stage
assert_eq!(doc.failure_stage, "ocr");
let stage: String = doc.get("failure_stage");
assert_eq!(stage, "ocr");
// All should have valid failure_reason
let reason: String = doc.get("failure_reason");
assert!(matches!(
doc.failure_reason.as_str(),
reason.as_str(),
"low_ocr_confidence" | "ocr_timeout" | "ocr_memory_limit" |
"file_corrupted" | "other"
));
@ -181,7 +180,7 @@ mod migration_integration_tests {
];
for (filename, failure_reason, error_msg) in &edge_cases {
sqlx::query!(
sqlx::query(
r#"
INSERT INTO documents (
user_id, filename, original_filename, file_path, file_size,
@ -190,19 +189,19 @@ mod migration_integration_tests {
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
'failed', $3, $4
)
"#,
user_id,
filename,
*failure_reason,
error_msg
"#
)
.bind(user_id)
.bind(filename)
.bind(*failure_reason)
.bind(error_msg)
.execute(&pool)
.await
.expect("Failed to insert edge case document");
}
// Run migration on edge cases
let migration_result = sqlx::query!(
let migration_result = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source
@ -231,7 +230,7 @@ mod migration_integration_tests {
assert!(migration_result.is_ok(), "Migration should handle edge cases");
// Verify all edge cases mapped to 'other' (since they're not in our mapping)
let edge_case_mappings = sqlx::query!(
let edge_case_mappings = sqlx::query(
"SELECT filename, failure_reason FROM failed_documents WHERE ingestion_source = 'migration_edge_test'"
)
.fetch_all(&pool)
@ -239,8 +238,10 @@ mod migration_integration_tests {
.expect("Failed to fetch edge case mappings");
for mapping in edge_case_mappings {
assert_eq!(mapping.failure_reason, "other",
"Edge case '{}' should map to 'other'", mapping.filename);
let filename: String = mapping.get("filename");
let failure_reason: String = mapping.get("failure_reason");
assert_eq!(failure_reason, "other",
"Edge case '{}' should map to 'other'", filename);
}
}
@ -250,7 +251,7 @@ mod migration_integration_tests {
// during migration, the constraints will catch it
// Try to insert data that violates constraints
let invalid_insert = sqlx::query!(
let invalid_insert = sqlx::query(
r#"
INSERT INTO failed_documents (
user_id, filename, failure_reason, failure_stage, ingestion_source

View File

@ -18,4 +18,5 @@ mod sql_type_safety_tests;
mod users_tests;
mod generic_migration_tests;
mod migration_constraint_tests;
mod migration_integration_tests;
mod migration_integration_tests;
mod failed_documents_unit_tests;