fix(tests): resolve a whole lot of test issues
This commit is contained in:
parent
edd0c7514f
commit
fbf89c213d
|
|
@ -7,6 +7,9 @@ const mockList = vi.fn();
|
|||
const mockUpload = vi.fn();
|
||||
const mockDownload = vi.fn();
|
||||
const mockDeleteLowConfidence = vi.fn();
|
||||
const mockGetFailedOcrDocuments = vi.fn();
|
||||
const mockGetFailedDocuments = vi.fn();
|
||||
const mockRetryOcr = vi.fn();
|
||||
|
||||
// Mock the entire api module
|
||||
vi.mock('../api', async () => {
|
||||
|
|
@ -19,6 +22,9 @@ vi.mock('../api', async () => {
|
|||
upload: mockUpload,
|
||||
download: mockDownload,
|
||||
deleteLowConfidence: mockDeleteLowConfidence,
|
||||
getFailedOcrDocuments: mockGetFailedOcrDocuments,
|
||||
getFailedDocuments: mockGetFailedDocuments,
|
||||
retryOcr: mockRetryOcr,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
|
@ -491,3 +497,273 @@ describe('documentService.deleteLowConfidence', () => {
|
|||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('documentService.getFailedOcrDocuments', () => {
|
||||
const mockFailedOcrResponse = {
|
||||
documents: [
|
||||
{
|
||||
id: 'doc-1',
|
||||
filename: 'failed_doc1.pdf',
|
||||
failure_reason: 'low_ocr_confidence',
|
||||
failure_stage: 'ocr',
|
||||
created_at: '2024-01-01T10:00:00Z',
|
||||
retry_count: 1
|
||||
},
|
||||
{
|
||||
id: 'doc-2',
|
||||
filename: 'failed_doc2.pdf',
|
||||
failure_reason: 'pdf_parsing_error',
|
||||
failure_stage: 'ocr',
|
||||
created_at: '2024-01-01T11:00:00Z',
|
||||
retry_count: 0
|
||||
}
|
||||
],
|
||||
pagination: {
|
||||
total: 2,
|
||||
limit: 50,
|
||||
offset: 0,
|
||||
has_more: false
|
||||
},
|
||||
statistics: {
|
||||
total_failed: 2,
|
||||
failure_categories: [
|
||||
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
|
||||
{ reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
it('should fetch failed OCR documents successfully', async () => {
|
||||
const mockResponse = {
|
||||
data: mockFailedOcrResponse,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockGetFailedOcrDocuments.mockResolvedValue(mockResponse);
|
||||
|
||||
const result = await documentService.getFailedOcrDocuments(50, 0);
|
||||
|
||||
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
|
||||
expect(result.data).toEqual(mockFailedOcrResponse);
|
||||
expect(result.data.documents).toHaveLength(2);
|
||||
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
||||
expect(result.data.pagination.total).toBe(2);
|
||||
});
|
||||
|
||||
it('should handle pagination parameters correctly', async () => {
|
||||
mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
|
||||
|
||||
await documentService.getFailedOcrDocuments(25, 10);
|
||||
|
||||
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
|
||||
});
|
||||
|
||||
it('should use default pagination when not specified', async () => {
|
||||
mockGetFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
|
||||
|
||||
await documentService.getFailedOcrDocuments();
|
||||
|
||||
expect(mockGetFailedOcrDocuments).toHaveBeenCalledWith();
|
||||
});
|
||||
|
||||
it('should handle empty results', async () => {
|
||||
const emptyResponse = {
|
||||
documents: [],
|
||||
pagination: { total: 0, limit: 50, offset: 0, has_more: false },
|
||||
statistics: { total_failed: 0, failure_categories: [] }
|
||||
};
|
||||
|
||||
mockGetFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });
|
||||
|
||||
const result = await documentService.getFailedOcrDocuments();
|
||||
|
||||
expect(result.data.documents).toHaveLength(0);
|
||||
expect(result.data.pagination.total).toBe(0);
|
||||
expect(result.data.statistics.total_failed).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle API errors', async () => {
|
||||
const mockError = new Error('Network error');
|
||||
mockGetFailedOcrDocuments.mockRejectedValue(mockError);
|
||||
|
||||
await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('documentService.getFailedDocuments', () => {
|
||||
const mockFailedDocumentsResponse = {
|
||||
documents: [
|
||||
{
|
||||
id: 'doc-1',
|
||||
filename: 'failed_doc1.pdf',
|
||||
failure_reason: 'low_ocr_confidence',
|
||||
failure_stage: 'ocr',
|
||||
created_at: '2024-01-01T10:00:00Z',
|
||||
retry_count: 1
|
||||
},
|
||||
{
|
||||
id: 'doc-2',
|
||||
filename: 'duplicate_doc.pdf',
|
||||
failure_reason: 'duplicate_content',
|
||||
failure_stage: 'ingestion',
|
||||
created_at: '2024-01-01T11:00:00Z',
|
||||
retry_count: 0
|
||||
},
|
||||
{
|
||||
id: 'doc-3',
|
||||
filename: 'large_file.pdf',
|
||||
failure_reason: 'file_too_large',
|
||||
failure_stage: 'validation',
|
||||
created_at: '2024-01-01T12:00:00Z',
|
||||
retry_count: 2
|
||||
}
|
||||
],
|
||||
pagination: {
|
||||
total: 3,
|
||||
limit: 25,
|
||||
offset: 0,
|
||||
has_more: false
|
||||
},
|
||||
statistics: {
|
||||
total_failed: 3,
|
||||
failure_categories: [
|
||||
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
|
||||
{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
|
||||
{ reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
it('should fetch failed documents with default parameters', async () => {
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
|
||||
|
||||
const result = await documentService.getFailedDocuments();
|
||||
|
||||
expect(mockGetFailedDocuments).toHaveBeenCalledWith();
|
||||
expect(result.data).toEqual(mockFailedDocumentsResponse);
|
||||
expect(result.data.documents).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should filter by stage parameter', async () => {
|
||||
const ocrOnlyResponse = {
|
||||
...mockFailedDocumentsResponse,
|
||||
documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
|
||||
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
||||
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
|
||||
};
|
||||
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });
|
||||
|
||||
const result = await documentService.getFailedDocuments(25, 0, 'ocr');
|
||||
|
||||
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
|
||||
expect(result.data.documents).toHaveLength(1);
|
||||
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
||||
});
|
||||
|
||||
it('should filter by reason parameter', async () => {
|
||||
const duplicateOnlyResponse = {
|
||||
...mockFailedDocumentsResponse,
|
||||
documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
|
||||
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
||||
statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
|
||||
};
|
||||
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });
|
||||
|
||||
const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');
|
||||
|
||||
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
|
||||
expect(result.data.documents).toHaveLength(1);
|
||||
expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
|
||||
});
|
||||
|
||||
it('should filter by both stage and reason', async () => {
|
||||
const filteredResponse = {
|
||||
...mockFailedDocumentsResponse,
|
||||
documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
|
||||
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
||||
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
|
||||
};
|
||||
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: filteredResponse });
|
||||
|
||||
const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');
|
||||
|
||||
expect(mockGetFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
|
||||
expect(result.data.documents).toHaveLength(1);
|
||||
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
||||
expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
|
||||
});
|
||||
|
||||
it('should handle custom pagination', async () => {
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
|
||||
|
||||
await documentService.getFailedDocuments(10, 20);
|
||||
|
||||
expect(mockGetFailedDocuments).toHaveBeenCalledWith(10, 20);
|
||||
});
|
||||
|
||||
it('should handle empty results', async () => {
|
||||
const emptyResponse = {
|
||||
documents: [],
|
||||
pagination: { total: 0, limit: 25, offset: 0, has_more: false },
|
||||
statistics: { total_failed: 0, failure_categories: [] }
|
||||
};
|
||||
|
||||
mockGetFailedDocuments.mockResolvedValue({ data: emptyResponse });
|
||||
|
||||
const result = await documentService.getFailedDocuments();
|
||||
|
||||
expect(result.data.documents).toHaveLength(0);
|
||||
expect(result.data.statistics.total_failed).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('documentService.retryOcr', () => {
|
||||
it('should retry OCR for a document successfully', async () => {
|
||||
const mockRetryResponse = {
|
||||
data: {
|
||||
success: true,
|
||||
message: 'OCR retry queued successfully',
|
||||
document_id: 'doc-123'
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockRetryOcr.mockResolvedValue(mockRetryResponse);
|
||||
|
||||
const result = await documentService.retryOcr('doc-123');
|
||||
|
||||
expect(mockRetryOcr).toHaveBeenCalledWith('doc-123');
|
||||
expect(result.data.success).toBe(true);
|
||||
expect(result.data.document_id).toBe('doc-123');
|
||||
});
|
||||
|
||||
it('should handle retry errors', async () => {
|
||||
const mockError = {
|
||||
response: {
|
||||
status: 404,
|
||||
data: { error: 'Document not found' }
|
||||
}
|
||||
};
|
||||
|
||||
mockRetryOcr.mockRejectedValue(mockError);
|
||||
|
||||
await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
|
||||
response: { status: 404 }
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle network errors', async () => {
|
||||
mockRetryOcr.mockRejectedValue(new Error('Network error'));
|
||||
|
||||
await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
use crate::db::constraint_validation::ConstraintValidator;
|
||||
|
||||
/// Simple unit tests for failed_documents functionality
|
||||
/// These tests focus on business logic and constraint validation
|
||||
/// without requiring live database connections during compilation
|
||||
#[cfg(test)]
|
||||
mod failed_documents_unit_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_constraint_validator_failure_reasons() {
|
||||
// Test all valid failure reasons
|
||||
let valid_reasons = [
|
||||
"duplicate_content", "duplicate_filename", "unsupported_format",
|
||||
"file_too_large", "file_corrupted", "access_denied",
|
||||
"low_ocr_confidence", "ocr_timeout", "ocr_memory_limit",
|
||||
"pdf_parsing_error", "storage_quota_exceeded", "network_error",
|
||||
"permission_denied", "virus_detected", "invalid_structure",
|
||||
"policy_violation", "other"
|
||||
];
|
||||
|
||||
for reason in valid_reasons {
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_reason(reason).is_ok(),
|
||||
"Expected '{}' to be valid",
|
||||
reason
|
||||
);
|
||||
}
|
||||
|
||||
// Test invalid failure reasons
|
||||
let invalid_reasons = [
|
||||
"invalid_reason", "unknown", "timeout", "migration_completed",
|
||||
"", "random_text", "failure", "error"
|
||||
];
|
||||
|
||||
for reason in invalid_reasons {
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_reason(reason).is_err(),
|
||||
"Expected '{}' to be invalid",
|
||||
reason
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_constraint_validator_failure_stages() {
|
||||
// Test all valid failure stages
|
||||
let valid_stages = [
|
||||
"ingestion", "validation", "ocr", "storage", "processing", "sync"
|
||||
];
|
||||
|
||||
for stage in valid_stages {
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_stage(stage).is_ok(),
|
||||
"Expected '{}' to be valid",
|
||||
stage
|
||||
);
|
||||
}
|
||||
|
||||
// Test invalid failure stages
|
||||
let invalid_stages = [
|
||||
"invalid_stage", "unknown", "failed", "error", "", "random_text"
|
||||
];
|
||||
|
||||
for stage in invalid_stages {
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_stage(stage).is_err(),
|
||||
"Expected '{}' to be invalid",
|
||||
stage
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_legacy_ocr_failure_mapping() {
|
||||
let test_cases = [
|
||||
(Some("low_ocr_confidence"), "low_ocr_confidence"),
|
||||
(Some("timeout"), "ocr_timeout"),
|
||||
(Some("memory_limit"), "ocr_memory_limit"),
|
||||
(Some("pdf_parsing_error"), "pdf_parsing_error"),
|
||||
(Some("corrupted"), "file_corrupted"),
|
||||
(Some("file_corrupted"), "file_corrupted"),
|
||||
(Some("unsupported_format"), "unsupported_format"),
|
||||
(Some("access_denied"), "access_denied"),
|
||||
(Some("unknown"), "other"),
|
||||
(None, "other"),
|
||||
(Some("unmapped_value"), "other"),
|
||||
(Some(""), "other"),
|
||||
];
|
||||
|
||||
for (input, expected) in test_cases {
|
||||
let result = ConstraintValidator::map_legacy_ocr_failure_reason(input);
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Failed for input: {:?}. Expected '{}', got '{}'",
|
||||
input, expected, result
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mapped_legacy_values_are_valid() {
|
||||
// Ensure all mapped legacy values are actually valid according to our constraints
|
||||
let legacy_values = [
|
||||
Some("low_ocr_confidence"),
|
||||
Some("timeout"),
|
||||
Some("memory_limit"),
|
||||
Some("pdf_parsing_error"),
|
||||
Some("corrupted"),
|
||||
Some("file_corrupted"),
|
||||
Some("unsupported_format"),
|
||||
Some("access_denied"),
|
||||
Some("unknown"),
|
||||
None,
|
||||
Some("random_unmapped_value"),
|
||||
];
|
||||
|
||||
for legacy_value in legacy_values {
|
||||
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(legacy_value);
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
|
||||
"Mapped value '{}' from legacy '{:?}' should be valid",
|
||||
mapped, legacy_value
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_validation() {
|
||||
// Test valid batch
|
||||
let valid_batch = ["other", "low_ocr_confidence", "pdf_parsing_error", "duplicate_content"];
|
||||
assert!(ConstraintValidator::validate_failure_reasons_batch(&valid_batch).is_ok());
|
||||
|
||||
// Test invalid batch
|
||||
let invalid_batch = ["other", "invalid_reason", "timeout", "low_ocr_confidence"];
|
||||
let result = ConstraintValidator::validate_failure_reasons_batch(&invalid_batch);
|
||||
assert!(result.is_err());
|
||||
|
||||
let errors = result.unwrap_err();
|
||||
assert_eq!(errors.len(), 2); // Should have 2 invalid reasons
|
||||
assert!(errors.iter().any(|e| e.contains("invalid_reason")));
|
||||
assert!(errors.iter().any(|e| e.contains("timeout")));
|
||||
|
||||
// Test empty batch
|
||||
let empty_batch: &[&str] = &[];
|
||||
assert!(ConstraintValidator::validate_failure_reasons_batch(empty_batch).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_constraint_error_messages() {
|
||||
let result = ConstraintValidator::validate_failure_reason("invalid_reason");
|
||||
assert!(result.is_err());
|
||||
|
||||
let error_msg = result.unwrap_err();
|
||||
assert!(error_msg.contains("Invalid failure_reason 'invalid_reason'"));
|
||||
assert!(error_msg.contains("Valid values are:"));
|
||||
assert!(error_msg.contains("low_ocr_confidence"));
|
||||
assert!(error_msg.contains("other"));
|
||||
|
||||
let stage_result = ConstraintValidator::validate_failure_stage("invalid_stage");
|
||||
assert!(stage_result.is_err());
|
||||
|
||||
let stage_error = stage_result.unwrap_err();
|
||||
assert!(stage_error.contains("Invalid failure_stage 'invalid_stage'"));
|
||||
assert!(stage_error.contains("Valid values are:"));
|
||||
assert!(stage_error.contains("ingestion"));
|
||||
assert!(stage_error.contains("ocr"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_constraint_validation_comprehensive() {
|
||||
// Test that our enum values comprehensively cover expected failure scenarios
|
||||
|
||||
// OCR-related failures
|
||||
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("ocr_timeout").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("ocr_memory_limit").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("pdf_parsing_error").is_ok());
|
||||
|
||||
// File-related failures
|
||||
assert!(ConstraintValidator::validate_failure_reason("file_too_large").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("file_corrupted").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("unsupported_format").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("access_denied").is_ok());
|
||||
|
||||
// Duplicate detection
|
||||
assert!(ConstraintValidator::validate_failure_reason("duplicate_content").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("duplicate_filename").is_ok());
|
||||
|
||||
// System-related failures
|
||||
assert!(ConstraintValidator::validate_failure_reason("storage_quota_exceeded").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("network_error").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("permission_denied").is_ok());
|
||||
|
||||
// Security-related failures
|
||||
assert!(ConstraintValidator::validate_failure_reason("virus_detected").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("policy_violation").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_reason("invalid_structure").is_ok());
|
||||
|
||||
// Fallback
|
||||
assert!(ConstraintValidator::validate_failure_reason("other").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failure_stages_comprehensive() {
|
||||
// Test that our stage enum covers the document processing pipeline
|
||||
|
||||
// Initial processing stages
|
||||
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("validation").is_ok());
|
||||
|
||||
// Core processing stages
|
||||
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("processing").is_ok());
|
||||
|
||||
// Storage and sync stages
|
||||
assert!(ConstraintValidator::validate_failure_stage("storage").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("sync").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_legacy_mapping_completeness() {
|
||||
// Ensure we handle all possible legacy OCR failure reasons that could exist
|
||||
let legacy_ocr_reasons = [
|
||||
"low_ocr_confidence",
|
||||
"timeout",
|
||||
"memory_limit",
|
||||
"pdf_parsing_error",
|
||||
"corrupted",
|
||||
"file_corrupted",
|
||||
"unsupported_format",
|
||||
"access_denied",
|
||||
"unknown",
|
||||
"some_new_unmapped_reason"
|
||||
];
|
||||
|
||||
for legacy_reason in legacy_ocr_reasons {
|
||||
let mapped = ConstraintValidator::map_legacy_ocr_failure_reason(Some(legacy_reason));
|
||||
|
||||
// All mapped values should be valid
|
||||
assert!(
|
||||
ConstraintValidator::validate_failure_reason(mapped).is_ok(),
|
||||
"Legacy reason '{}' maps to '{}' which should be valid",
|
||||
legacy_reason, mapped
|
||||
);
|
||||
|
||||
// Unmapped values should fall back to "other"
|
||||
if !["low_ocr_confidence", "timeout", "memory_limit", "pdf_parsing_error",
|
||||
"corrupted", "file_corrupted", "unsupported_format", "access_denied", "unknown"]
|
||||
.contains(&legacy_reason) {
|
||||
assert_eq!(mapped, "other", "Unmapped legacy reason should fall back to 'other'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case_sensitivity() {
|
||||
// Our validation should be case-sensitive
|
||||
assert!(ConstraintValidator::validate_failure_reason("Low_OCR_Confidence").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_reason("LOW_OCR_CONFIDENCE").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_reason("OCR").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_reason("INGESTION").is_err());
|
||||
|
||||
// Only exact lowercase matches should work
|
||||
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("ingestion").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_handling() {
|
||||
// Validation should not accept values with extra whitespace
|
||||
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence ").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_reason(" low_ocr_confidence ").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_stage(" ocr").is_err());
|
||||
assert!(ConstraintValidator::validate_failure_stage("ocr ").is_err());
|
||||
|
||||
// Only exact matches should work
|
||||
assert!(ConstraintValidator::validate_failure_reason("low_ocr_confidence").is_ok());
|
||||
assert!(ConstraintValidator::validate_failure_stage("ocr").is_ok());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
use sqlx::PgPool;
|
||||
use crate::tests::helpers::setup_test_db;
|
||||
|
||||
#[cfg(test)]
|
||||
mod migration_constraint_tests {
|
||||
|
|
@ -18,17 +17,17 @@ mod migration_constraint_tests {
|
|||
];
|
||||
|
||||
for reason in valid_reasons {
|
||||
let result = sqlx::query!(
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
) VALUES (
|
||||
gen_random_uuid(), $1, $2, 'validation', 'test'
|
||||
)
|
||||
"#,
|
||||
format!("test_file_{}.txt", reason),
|
||||
reason
|
||||
"#
|
||||
)
|
||||
.bind(format!("test_file_{}.txt", reason))
|
||||
.bind(reason)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
|
|
@ -45,17 +44,17 @@ mod migration_constraint_tests {
|
|||
];
|
||||
|
||||
for reason in invalid_reasons {
|
||||
let result = sqlx::query!(
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
) VALUES (
|
||||
gen_random_uuid(), $1, $2, 'validation', 'test'
|
||||
)
|
||||
"#,
|
||||
format!("test_file_{}.txt", reason),
|
||||
reason
|
||||
"#
|
||||
)
|
||||
.bind(format!("test_file_{}.txt", reason))
|
||||
.bind(reason)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
|
|
@ -71,17 +70,17 @@ mod migration_constraint_tests {
|
|||
];
|
||||
|
||||
for stage in valid_stages {
|
||||
let result = sqlx::query!(
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
) VALUES (
|
||||
gen_random_uuid(), $1, 'other', $2, 'test'
|
||||
)
|
||||
"#,
|
||||
format!("test_file_{}.txt", stage),
|
||||
stage
|
||||
"#
|
||||
)
|
||||
.bind(format!("test_file_{}.txt", stage))
|
||||
.bind(stage)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
|
|
@ -123,17 +122,17 @@ mod migration_constraint_tests {
|
|||
input_reason, expected_output);
|
||||
|
||||
// Test that the mapped value works in the database
|
||||
let result = sqlx::query!(
|
||||
let result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
) VALUES (
|
||||
gen_random_uuid(), $1, $2, 'ocr', 'migration'
|
||||
)
|
||||
"#,
|
||||
format!("migration_test_{}.txt", input_reason.replace("/", "_")),
|
||||
mapped_reason
|
||||
"#
|
||||
)
|
||||
.bind(format!("migration_test_{}.txt", input_reason.replace("/", "_")))
|
||||
.bind(mapped_reason)
|
||||
.execute(&pool)
|
||||
.await;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use sqlx::PgPool;
|
||||
use sqlx::{PgPool, Row};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -22,7 +22,7 @@ mod migration_integration_tests {
|
|||
|
||||
// Insert test documents
|
||||
for (filename, failure_reason, error_msg) in &test_documents {
|
||||
sqlx::query!(
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO documents (
|
||||
user_id, filename, original_filename, file_path, file_size,
|
||||
|
|
@ -31,30 +31,29 @@ mod migration_integration_tests {
|
|||
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
|
||||
'failed', $3, $4
|
||||
)
|
||||
"#,
|
||||
user_id,
|
||||
filename,
|
||||
*failure_reason,
|
||||
error_msg
|
||||
"#
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(filename)
|
||||
.bind(*failure_reason)
|
||||
.bind(error_msg)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to insert test document");
|
||||
}
|
||||
|
||||
// Count documents before migration
|
||||
let before_count = sqlx::query_scalar!(
|
||||
let before_count: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to count documents")
|
||||
.unwrap_or(0);
|
||||
.expect("Failed to count documents");
|
||||
|
||||
assert_eq!(before_count, test_documents.len() as i64);
|
||||
|
||||
// Simulate the migration logic
|
||||
let migration_result = sqlx::query!(
|
||||
let migration_result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, original_filename, file_path, file_size,
|
||||
|
|
@ -87,13 +86,12 @@ mod migration_integration_tests {
|
|||
assert!(migration_result.is_ok(), "Migration should succeed");
|
||||
|
||||
// Verify all documents were migrated
|
||||
let migrated_count = sqlx::query_scalar!(
|
||||
let migrated_count: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM failed_documents WHERE ingestion_source = 'migration'"
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to count migrated documents")
|
||||
.unwrap_or(0);
|
||||
.expect("Failed to count migrated documents");
|
||||
|
||||
assert_eq!(migrated_count, test_documents.len() as i64);
|
||||
|
||||
|
|
@ -108,24 +106,24 @@ mod migration_integration_tests {
|
|||
];
|
||||
|
||||
for (filename, expected_reason) in mapping_tests {
|
||||
let actual_reason = sqlx::query_scalar!(
|
||||
"SELECT failure_reason FROM failed_documents WHERE filename = $1",
|
||||
filename
|
||||
let actual_reason: String = sqlx::query_scalar(
|
||||
"SELECT failure_reason FROM failed_documents WHERE filename = $1"
|
||||
)
|
||||
.bind(filename)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to fetch failure reason");
|
||||
|
||||
assert_eq!(
|
||||
actual_reason.as_deref(),
|
||||
Some(expected_reason),
|
||||
actual_reason,
|
||||
expected_reason,
|
||||
"Incorrect mapping for {}",
|
||||
filename
|
||||
);
|
||||
}
|
||||
|
||||
// Test deletion of original failed documents
|
||||
let delete_result = sqlx::query!(
|
||||
let delete_result = sqlx::query(
|
||||
"DELETE FROM documents WHERE ocr_status = 'failed'"
|
||||
)
|
||||
.execute(&pool)
|
||||
|
|
@ -134,18 +132,17 @@ mod migration_integration_tests {
|
|||
assert!(delete_result.is_ok(), "Delete should succeed");
|
||||
|
||||
// Verify cleanup
|
||||
let remaining_failed = sqlx::query_scalar!(
|
||||
let remaining_failed: i64 = sqlx::query_scalar(
|
||||
"SELECT COUNT(*) FROM documents WHERE ocr_status = 'failed'"
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.expect("Failed to count remaining documents")
|
||||
.unwrap_or(0);
|
||||
.expect("Failed to count remaining documents");
|
||||
|
||||
assert_eq!(remaining_failed, 0);
|
||||
|
||||
// Verify failed_documents table integrity
|
||||
let failed_docs = sqlx::query!(
|
||||
let failed_docs = sqlx::query(
|
||||
"SELECT filename, failure_reason, failure_stage FROM failed_documents ORDER BY filename"
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
|
|
@ -156,11 +153,13 @@ mod migration_integration_tests {
|
|||
|
||||
for doc in &failed_docs {
|
||||
// All should have proper stage
|
||||
assert_eq!(doc.failure_stage, "ocr");
|
||||
let stage: String = doc.get("failure_stage");
|
||||
assert_eq!(stage, "ocr");
|
||||
|
||||
// All should have valid failure_reason
|
||||
let reason: String = doc.get("failure_reason");
|
||||
assert!(matches!(
|
||||
doc.failure_reason.as_str(),
|
||||
reason.as_str(),
|
||||
"low_ocr_confidence" | "ocr_timeout" | "ocr_memory_limit" |
|
||||
"file_corrupted" | "other"
|
||||
));
|
||||
|
|
@ -181,7 +180,7 @@ mod migration_integration_tests {
|
|||
];
|
||||
|
||||
for (filename, failure_reason, error_msg) in &edge_cases {
|
||||
sqlx::query!(
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO documents (
|
||||
user_id, filename, original_filename, file_path, file_size,
|
||||
|
|
@ -190,19 +189,19 @@ mod migration_integration_tests {
|
|||
$1, $2, $2, '/fake/path', 1000, 'application/pdf',
|
||||
'failed', $3, $4
|
||||
)
|
||||
"#,
|
||||
user_id,
|
||||
filename,
|
||||
*failure_reason,
|
||||
error_msg
|
||||
"#
|
||||
)
|
||||
.bind(user_id)
|
||||
.bind(filename)
|
||||
.bind(*failure_reason)
|
||||
.bind(error_msg)
|
||||
.execute(&pool)
|
||||
.await
|
||||
.expect("Failed to insert edge case document");
|
||||
}
|
||||
|
||||
// Run migration on edge cases
|
||||
let migration_result = sqlx::query!(
|
||||
let migration_result = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
|
|
@ -231,7 +230,7 @@ mod migration_integration_tests {
|
|||
assert!(migration_result.is_ok(), "Migration should handle edge cases");
|
||||
|
||||
// Verify all edge cases mapped to 'other' (since they're not in our mapping)
|
||||
let edge_case_mappings = sqlx::query!(
|
||||
let edge_case_mappings = sqlx::query(
|
||||
"SELECT filename, failure_reason FROM failed_documents WHERE ingestion_source = 'migration_edge_test'"
|
||||
)
|
||||
.fetch_all(&pool)
|
||||
|
|
@ -239,8 +238,10 @@ mod migration_integration_tests {
|
|||
.expect("Failed to fetch edge case mappings");
|
||||
|
||||
for mapping in edge_case_mappings {
|
||||
assert_eq!(mapping.failure_reason, "other",
|
||||
"Edge case '{}' should map to 'other'", mapping.filename);
|
||||
let filename: String = mapping.get("filename");
|
||||
let failure_reason: String = mapping.get("failure_reason");
|
||||
assert_eq!(failure_reason, "other",
|
||||
"Edge case '{}' should map to 'other'", filename);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +251,7 @@ mod migration_integration_tests {
|
|||
// during migration, the constraints will catch it
|
||||
|
||||
// Try to insert data that violates constraints
|
||||
let invalid_insert = sqlx::query!(
|
||||
let invalid_insert = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO failed_documents (
|
||||
user_id, filename, failure_reason, failure_stage, ingestion_source
|
||||
|
|
|
|||
|
|
@ -19,3 +19,4 @@ mod users_tests;
|
|||
mod generic_migration_tests;
|
||||
mod migration_constraint_tests;
|
||||
mod migration_integration_tests;
|
||||
mod failed_documents_unit_tests;
|
||||
|
|
|
|||
Loading…
Reference in New Issue