755 lines
24 KiB
TypeScript
755 lines
24 KiB
TypeScript
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
import { type OcrResponse, type Document } from '../api';
|
|
import { createMockApiServices } from '../../test/test-utils';
|
|
|
|
// Use centralized API mocking
|
|
const mockServices = createMockApiServices();
|
|
const mockDocumentService = mockServices.documentService;
|
|
|
|
// Mock the entire api module with centralized mocks
|
|
vi.mock('../api', async () => {
|
|
const actual = await vi.importActual('../api');
|
|
return {
|
|
...actual,
|
|
documentService: mockDocumentService,
|
|
};
|
|
});
|
|
|
|
// Import after mocking
|
|
const { documentService } = await import('../api');
|
|
|
|
describe('documentService', () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
describe('getOcrText', () => {
|
|
const mockOcrResponse: OcrResponse = {
|
|
document_id: 'doc-123',
|
|
filename: 'test_document.pdf',
|
|
has_ocr_text: true,
|
|
ocr_text: 'This is extracted OCR text content.',
|
|
ocr_confidence: 95.5,
|
|
ocr_word_count: 150,
|
|
ocr_processing_time_ms: 1200,
|
|
ocr_status: 'completed',
|
|
ocr_error: null,
|
|
ocr_completed_at: '2024-01-01T00:05:00Z',
|
|
};
|
|
|
|
it('should fetch OCR text for a document', async () => {
|
|
const mockResponse = {
|
|
data: mockOcrResponse,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
|
|
|
|
const result = await documentService.getOcrText('doc-123');
|
|
|
|
expect(result.data).toEqual(mockOcrResponse);
|
|
expect(result.data.document_id).toBe('doc-123');
|
|
expect(result.data.has_ocr_text).toBe(true);
|
|
expect(result.data.ocr_text).toBe('This is extracted OCR text content.');
|
|
expect(result.data.ocr_confidence).toBe(95.5);
|
|
expect(result.data.ocr_word_count).toBe(150);
|
|
});
|
|
|
|
it('should handle OCR response without text', async () => {
|
|
const mockEmptyOcrResponse: OcrResponse = {
|
|
document_id: 'doc-456',
|
|
filename: 'text_file.txt',
|
|
has_ocr_text: false,
|
|
ocr_text: null,
|
|
ocr_confidence: null,
|
|
ocr_word_count: null,
|
|
ocr_processing_time_ms: null,
|
|
ocr_status: 'pending',
|
|
ocr_error: null,
|
|
ocr_completed_at: null,
|
|
};
|
|
|
|
const mockResponse = {
|
|
data: mockEmptyOcrResponse,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
|
|
|
|
const result = await documentService.getOcrText('doc-456');
|
|
|
|
expect(result.data).toEqual(mockEmptyOcrResponse);
|
|
expect(result.data.has_ocr_text).toBe(false);
|
|
expect(result.data.ocr_text).toBeNull();
|
|
expect(result.data.ocr_confidence).toBeNull();
|
|
});
|
|
|
|
it('should handle OCR error response', async () => {
|
|
const mockErrorOcrResponse: OcrResponse = {
|
|
document_id: 'doc-789',
|
|
filename: 'corrupted_file.pdf',
|
|
has_ocr_text: false,
|
|
ocr_text: null,
|
|
ocr_confidence: null,
|
|
ocr_word_count: null,
|
|
ocr_processing_time_ms: 5000,
|
|
ocr_status: 'failed',
|
|
ocr_error: 'Failed to process document: corrupted file format',
|
|
ocr_completed_at: '2024-01-01T00:05:00Z',
|
|
};
|
|
|
|
const mockResponse = {
|
|
data: mockErrorOcrResponse,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
|
|
|
|
const result = await documentService.getOcrText('doc-789');
|
|
|
|
expect(result.data).toEqual(mockErrorOcrResponse);
|
|
expect(result.data.ocr_status).toBe('failed');
|
|
expect(result.data.ocr_error).toBe('Failed to process document: corrupted file format');
|
|
expect(result.data.has_ocr_text).toBe(false);
|
|
});
|
|
|
|
it('should make correct API call', async () => {
|
|
mockDocumentService.getOcrText.mockResolvedValue({ data: mockOcrResponse });
|
|
|
|
await documentService.getOcrText('doc-123');
|
|
|
|
expect(mockDocumentService.getOcrText).toHaveBeenCalledWith('doc-123');
|
|
});
|
|
|
|
it('should handle network errors', async () => {
|
|
mockDocumentService.getOcrText.mockRejectedValue(new Error('Network Error'));
|
|
|
|
await expect(documentService.getOcrText('doc-123')).rejects.toThrow('Network Error');
|
|
});
|
|
|
|
it('should handle 404 errors for non-existent documents', async () => {
|
|
mockDocumentService.getOcrText.mockRejectedValue({
|
|
response: {
|
|
status: 404,
|
|
data: { error: 'Document not found' },
|
|
},
|
|
});
|
|
|
|
await expect(documentService.getOcrText('non-existent-doc')).rejects.toMatchObject({
|
|
response: {
|
|
status: 404,
|
|
},
|
|
});
|
|
});
|
|
|
|
it('should handle 401 unauthorized errors', async () => {
|
|
mockDocumentService.getOcrText.mockRejectedValue({
|
|
response: {
|
|
status: 401,
|
|
data: { error: 'Unauthorized' },
|
|
},
|
|
});
|
|
|
|
await expect(documentService.getOcrText('doc-123')).rejects.toMatchObject({
|
|
response: {
|
|
status: 401,
|
|
},
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('list', () => {
|
|
const mockDocuments: Document[] = [
|
|
{
|
|
id: 'doc-1',
|
|
filename: 'document1.pdf',
|
|
original_filename: 'document1.pdf',
|
|
file_size: 1024000,
|
|
mime_type: 'application/pdf',
|
|
tags: ['pdf', 'document'],
|
|
created_at: '2024-01-01T00:00:00Z',
|
|
has_ocr_text: true,
|
|
ocr_confidence: 95.5,
|
|
ocr_word_count: 150,
|
|
ocr_processing_time_ms: 1200,
|
|
ocr_status: 'completed',
|
|
},
|
|
{
|
|
id: 'doc-2',
|
|
filename: 'image.png',
|
|
original_filename: 'image.png',
|
|
file_size: 512000,
|
|
mime_type: 'image/png',
|
|
tags: ['image'],
|
|
created_at: '2024-01-02T00:00:00Z',
|
|
has_ocr_text: false,
|
|
ocr_confidence: undefined,
|
|
ocr_word_count: undefined,
|
|
ocr_processing_time_ms: undefined,
|
|
ocr_status: 'pending',
|
|
},
|
|
];
|
|
|
|
it('should fetch document list with OCR metadata', async () => {
|
|
const mockResponse = {
|
|
data: mockDocuments,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.list.mockResolvedValue(mockResponse);
|
|
|
|
const result = await documentService.list(50, 0);
|
|
|
|
expect(result.data).toEqual(mockDocuments);
|
|
expect(result.data[0].has_ocr_text).toBe(true);
|
|
expect(result.data[0].ocr_confidence).toBe(95.5);
|
|
expect(result.data[1].has_ocr_text).toBe(false);
|
|
expect(result.data[1].ocr_confidence).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('upload', () => {
|
|
it('should upload file with multipart form data', async () => {
|
|
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' });
|
|
const mockUploadResponse: Document = {
|
|
id: 'doc-new',
|
|
filename: 'test.pdf',
|
|
original_filename: 'test.pdf',
|
|
file_size: 7,
|
|
mime_type: 'application/pdf',
|
|
tags: [],
|
|
created_at: '2024-01-01T00:00:00Z',
|
|
has_ocr_text: false,
|
|
ocr_status: 'pending',
|
|
};
|
|
|
|
mockDocumentService.upload.mockResolvedValue({ data: mockUploadResponse });
|
|
|
|
const result = await documentService.upload(mockFile);
|
|
|
|
expect(result.data).toEqual(mockUploadResponse);
|
|
expect(mockDocumentService.upload).toHaveBeenCalledWith(mockFile);
|
|
});
|
|
});
|
|
|
|
describe('download', () => {
|
|
it('should download file as blob', async () => {
|
|
const mockBlob = new Blob(['file content'], { type: 'application/pdf' });
|
|
mockDocumentService.download.mockResolvedValue({ data: mockBlob });
|
|
|
|
const result = await documentService.download('doc-123');
|
|
|
|
expect(result.data).toEqual(mockBlob);
|
|
expect(mockDocumentService.download).toHaveBeenCalledWith('doc-123');
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('OcrResponse interface', () => {
|
|
it('should have correct type structure', () => {
|
|
const ocrResponse: OcrResponse = {
|
|
document_id: 'doc-123',
|
|
filename: 'test.pdf',
|
|
has_ocr_text: true,
|
|
ocr_text: 'Sample text',
|
|
ocr_confidence: 95.5,
|
|
ocr_word_count: 100,
|
|
ocr_processing_time_ms: 1000,
|
|
ocr_status: 'completed',
|
|
ocr_error: null,
|
|
ocr_completed_at: '2024-01-01T00:00:00Z',
|
|
};
|
|
|
|
// Type assertions to ensure correct types
|
|
expect(typeof ocrResponse.document_id).toBe('string');
|
|
expect(typeof ocrResponse.filename).toBe('string');
|
|
expect(typeof ocrResponse.has_ocr_text).toBe('boolean');
|
|
expect(typeof ocrResponse.ocr_text).toBe('string');
|
|
expect(typeof ocrResponse.ocr_confidence).toBe('number');
|
|
expect(typeof ocrResponse.ocr_word_count).toBe('number');
|
|
expect(typeof ocrResponse.ocr_processing_time_ms).toBe('number');
|
|
expect(typeof ocrResponse.ocr_status).toBe('string');
|
|
expect(ocrResponse.ocr_error).toBeNull();
|
|
expect(typeof ocrResponse.ocr_completed_at).toBe('string');
|
|
});
|
|
|
|
it('should allow optional/null fields', () => {
|
|
const ocrResponseMinimal: OcrResponse = {
|
|
document_id: 'doc-456',
|
|
filename: 'text.txt',
|
|
has_ocr_text: false,
|
|
ocr_text: null,
|
|
ocr_confidence: undefined,
|
|
ocr_word_count: undefined,
|
|
ocr_processing_time_ms: undefined,
|
|
ocr_status: 'pending',
|
|
ocr_error: undefined,
|
|
ocr_completed_at: undefined,
|
|
};
|
|
|
|
expect(ocrResponseMinimal.has_ocr_text).toBe(false);
|
|
expect(ocrResponseMinimal.ocr_text).toBeNull();
|
|
expect(ocrResponseMinimal.ocr_confidence).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('documentService.deleteLowConfidence', () => {
|
|
it('should delete low confidence documents successfully', async () => {
|
|
const mockDeleteResponse = {
|
|
data: {
|
|
success: true,
|
|
message: 'Successfully deleted 3 documents with OCR confidence below 30%',
|
|
deleted_count: 3,
|
|
matched_count: 3,
|
|
successful_file_deletions: 3,
|
|
failed_file_deletions: 0,
|
|
ignored_file_creation_failures: 0,
|
|
deleted_document_ids: ['doc-1', 'doc-2', 'doc-3']
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockDeleteResponse);
|
|
|
|
const result = await documentService.deleteLowConfidence(30.0, false);
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
|
|
expect(result.data.success).toBe(true);
|
|
expect(result.data.deleted_count).toBe(3);
|
|
expect(result.data.matched_count).toBe(3);
|
|
expect(result.data.deleted_document_ids).toHaveLength(3);
|
|
});
|
|
|
|
it('should preview low confidence documents without deleting', async () => {
|
|
const mockPreviewResponse = {
|
|
data: {
|
|
success: true,
|
|
message: 'Found 5 documents with OCR confidence below 50%',
|
|
matched_count: 5,
|
|
preview: true,
|
|
document_ids: ['doc-1', 'doc-2', 'doc-3', 'doc-4', 'doc-5']
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPreviewResponse);
|
|
|
|
const result = await documentService.deleteLowConfidence(50.0, true);
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(50.0, true);
|
|
expect(result.data.success).toBe(true);
|
|
expect(result.data.preview).toBe(true);
|
|
expect(result.data.matched_count).toBe(5);
|
|
expect(result.data.document_ids).toHaveLength(5);
|
|
expect(result.data).not.toHaveProperty('deleted_count');
|
|
});
|
|
|
|
it('should handle no matching documents', async () => {
|
|
const mockEmptyResponse = {
|
|
data: {
|
|
success: true,
|
|
message: 'No documents found with OCR confidence below 10%',
|
|
deleted_count: 0
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockEmptyResponse);
|
|
|
|
const result = await documentService.deleteLowConfidence(10.0, false);
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(10.0, false);
|
|
expect(result.data.success).toBe(true);
|
|
expect(result.data.deleted_count).toBe(0);
|
|
});
|
|
|
|
it('should handle validation errors for invalid confidence threshold', async () => {
|
|
const mockErrorResponse = {
|
|
data: {
|
|
success: false,
|
|
message: 'max_confidence must be between 0.0 and 100.0',
|
|
matched_count: 0
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockErrorResponse);
|
|
|
|
const result = await documentService.deleteLowConfidence(-10.0, false);
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(-10.0, false);
|
|
expect(result.data.success).toBe(false);
|
|
expect(result.data.message).toContain('must be between 0.0 and 100.0');
|
|
});
|
|
|
|
it('should handle API errors gracefully', async () => {
|
|
const mockError = new Error('Network error');
|
|
mockDocumentService.deleteLowConfidence.mockRejectedValue(mockError);
|
|
|
|
await expect(documentService.deleteLowConfidence(30.0, false))
|
|
.rejects.toThrow('Network error');
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
|
|
});
|
|
|
|
it('should use correct default values', async () => {
|
|
const mockResponse = {
|
|
data: { success: true, matched_count: 0 },
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);
|
|
|
|
// Test with explicit false value (the default)
|
|
await documentService.deleteLowConfidence(40.0, false);
|
|
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(40.0, false);
|
|
});
|
|
|
|
it('should handle partial deletion failures', async () => {
|
|
const mockPartialFailureResponse = {
|
|
data: {
|
|
success: true,
|
|
message: 'Successfully deleted 2 documents with OCR confidence below 25%',
|
|
deleted_count: 2,
|
|
matched_count: 3,
|
|
successful_file_deletions: 1,
|
|
failed_file_deletions: 1,
|
|
ignored_file_creation_failures: 1,
|
|
deleted_document_ids: ['doc-1', 'doc-2']
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPartialFailureResponse);
|
|
|
|
const result = await documentService.deleteLowConfidence(25.0, false);
|
|
|
|
expect(result.data.success).toBe(true);
|
|
expect(result.data.deleted_count).toBe(2);
|
|
expect(result.data.matched_count).toBe(3);
|
|
expect(result.data.failed_file_deletions).toBe(1);
|
|
expect(result.data.ignored_file_creation_failures).toBe(1);
|
|
});
|
|
|
|
it('should properly encode confidence threshold values', async () => {
|
|
const mockResponse = {
|
|
data: { success: true, matched_count: 0 },
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);
|
|
|
|
// Test various confidence values
|
|
const testValues = [0.0, 0.1, 30.5, 50.0, 99.9, 100.0];
|
|
|
|
for (const confidence of testValues) {
|
|
mockDocumentService.deleteLowConfidence.mockClear();
|
|
await documentService.deleteLowConfidence(confidence, true);
|
|
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(confidence, true);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('documentService.getFailedOcrDocuments', () => {
|
|
const mockFailedOcrResponse = {
|
|
documents: [
|
|
{
|
|
id: 'doc-1',
|
|
filename: 'failed_doc1.pdf',
|
|
failure_reason: 'low_ocr_confidence',
|
|
failure_stage: 'ocr',
|
|
created_at: '2024-01-01T10:00:00Z',
|
|
retry_count: 1
|
|
},
|
|
{
|
|
id: 'doc-2',
|
|
filename: 'failed_doc2.pdf',
|
|
failure_reason: 'pdf_parsing_error',
|
|
failure_stage: 'ocr',
|
|
created_at: '2024-01-01T11:00:00Z',
|
|
retry_count: 0
|
|
}
|
|
],
|
|
pagination: {
|
|
total: 2,
|
|
limit: 50,
|
|
offset: 0,
|
|
has_more: false
|
|
},
|
|
statistics: {
|
|
total_failed: 2,
|
|
failure_categories: [
|
|
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
|
|
{ reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
|
|
]
|
|
}
|
|
};
|
|
|
|
it('should fetch failed OCR documents successfully', async () => {
|
|
const mockResponse = {
|
|
data: mockFailedOcrResponse,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.getFailedOcrDocuments.mockResolvedValue(mockResponse);
|
|
|
|
const result = await documentService.getFailedOcrDocuments(50, 0);
|
|
|
|
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
|
|
expect(result.data).toEqual(mockFailedOcrResponse);
|
|
expect(result.data.documents).toHaveLength(2);
|
|
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
|
expect(result.data.pagination.total).toBe(2);
|
|
});
|
|
|
|
it('should handle pagination parameters correctly', async () => {
|
|
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
|
|
|
|
await documentService.getFailedOcrDocuments(25, 10);
|
|
|
|
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
|
|
});
|
|
|
|
it('should use default pagination when not specified', async () => {
|
|
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
|
|
|
|
await documentService.getFailedOcrDocuments();
|
|
|
|
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith();
|
|
});
|
|
|
|
it('should handle empty results', async () => {
|
|
const emptyResponse = {
|
|
documents: [],
|
|
pagination: { total: 0, limit: 50, offset: 0, has_more: false },
|
|
statistics: { total_failed: 0, failure_categories: [] }
|
|
};
|
|
|
|
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });
|
|
|
|
const result = await documentService.getFailedOcrDocuments();
|
|
|
|
expect(result.data.documents).toHaveLength(0);
|
|
expect(result.data.pagination.total).toBe(0);
|
|
expect(result.data.statistics.total_failed).toBe(0);
|
|
});
|
|
|
|
it('should handle API errors', async () => {
|
|
const mockError = new Error('Network error');
|
|
mockDocumentService.getFailedOcrDocuments.mockRejectedValue(mockError);
|
|
|
|
await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
|
|
});
|
|
});
|
|
|
|
describe('documentService.getFailedDocuments', () => {
|
|
const mockFailedDocumentsResponse = {
|
|
documents: [
|
|
{
|
|
id: 'doc-1',
|
|
filename: 'failed_doc1.pdf',
|
|
failure_reason: 'low_ocr_confidence',
|
|
failure_stage: 'ocr',
|
|
created_at: '2024-01-01T10:00:00Z',
|
|
retry_count: 1
|
|
},
|
|
{
|
|
id: 'doc-2',
|
|
filename: 'duplicate_doc.pdf',
|
|
failure_reason: 'duplicate_content',
|
|
failure_stage: 'ingestion',
|
|
created_at: '2024-01-01T11:00:00Z',
|
|
retry_count: 0
|
|
},
|
|
{
|
|
id: 'doc-3',
|
|
filename: 'large_file.pdf',
|
|
failure_reason: 'file_too_large',
|
|
failure_stage: 'validation',
|
|
created_at: '2024-01-01T12:00:00Z',
|
|
retry_count: 2
|
|
}
|
|
],
|
|
pagination: {
|
|
total: 3,
|
|
limit: 25,
|
|
offset: 0,
|
|
has_more: false
|
|
},
|
|
statistics: {
|
|
total_failed: 3,
|
|
failure_categories: [
|
|
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
|
|
{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
|
|
{ reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
|
|
]
|
|
}
|
|
};
|
|
|
|
it('should fetch failed documents with default parameters', async () => {
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
|
|
|
|
const result = await documentService.getFailedDocuments();
|
|
|
|
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith();
|
|
expect(result.data).toEqual(mockFailedDocumentsResponse);
|
|
expect(result.data.documents).toHaveLength(3);
|
|
});
|
|
|
|
it('should filter by stage parameter', async () => {
|
|
const ocrOnlyResponse = {
|
|
...mockFailedDocumentsResponse,
|
|
documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
|
|
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
|
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
|
|
};
|
|
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });
|
|
|
|
const result = await documentService.getFailedDocuments(25, 0, 'ocr');
|
|
|
|
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
|
|
expect(result.data.documents).toHaveLength(1);
|
|
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
|
});
|
|
|
|
it('should filter by reason parameter', async () => {
|
|
const duplicateOnlyResponse = {
|
|
...mockFailedDocumentsResponse,
|
|
documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
|
|
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
|
statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
|
|
};
|
|
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });
|
|
|
|
const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');
|
|
|
|
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
|
|
expect(result.data.documents).toHaveLength(1);
|
|
expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
|
|
});
|
|
|
|
it('should filter by both stage and reason', async () => {
|
|
const filteredResponse = {
|
|
...mockFailedDocumentsResponse,
|
|
documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
|
|
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
|
|
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
|
|
};
|
|
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: filteredResponse });
|
|
|
|
const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');
|
|
|
|
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
|
|
expect(result.data.documents).toHaveLength(1);
|
|
expect(result.data.documents[0].failure_stage).toBe('ocr');
|
|
expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
|
|
});
|
|
|
|
it('should handle custom pagination', async () => {
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
|
|
|
|
await documentService.getFailedDocuments(10, 20);
|
|
|
|
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(10, 20);
|
|
});
|
|
|
|
it('should handle empty results', async () => {
|
|
const emptyResponse = {
|
|
documents: [],
|
|
pagination: { total: 0, limit: 25, offset: 0, has_more: false },
|
|
statistics: { total_failed: 0, failure_categories: [] }
|
|
};
|
|
|
|
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: emptyResponse });
|
|
|
|
const result = await documentService.getFailedDocuments();
|
|
|
|
expect(result.data.documents).toHaveLength(0);
|
|
expect(result.data.statistics.total_failed).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe('documentService.retryOcr', () => {
|
|
it('should retry OCR for a document successfully', async () => {
|
|
const mockRetryResponse = {
|
|
data: {
|
|
success: true,
|
|
message: 'OCR retry queued successfully',
|
|
document_id: 'doc-123'
|
|
},
|
|
status: 200,
|
|
statusText: 'OK',
|
|
headers: {},
|
|
config: {},
|
|
};
|
|
|
|
mockDocumentService.retryOcr.mockResolvedValue(mockRetryResponse);
|
|
|
|
const result = await documentService.retryOcr('doc-123');
|
|
|
|
expect(mockDocumentService.retryOcr).toHaveBeenCalledWith('doc-123');
|
|
expect(result.data.success).toBe(true);
|
|
expect(result.data.document_id).toBe('doc-123');
|
|
});
|
|
|
|
it('should handle retry errors', async () => {
|
|
const mockError = {
|
|
response: {
|
|
status: 404,
|
|
data: { error: 'Document not found' }
|
|
}
|
|
};
|
|
|
|
mockDocumentService.retryOcr.mockRejectedValue(mockError);
|
|
|
|
await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
|
|
response: { status: 404 }
|
|
});
|
|
});
|
|
|
|
it('should handle network errors', async () => {
|
|
mockDocumentService.retryOcr.mockRejectedValue(new Error('Network error'));
|
|
|
|
await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
|
|
});
|
|
}); |