Readur/frontend/src/services/__tests__/api.test.ts

755 lines
24 KiB
TypeScript

import { describe, it, expect, vi, beforeEach } from 'vitest';
import { type OcrResponse, type Document } from '../api';
import { createMockApiServices } from '../../test/test-utils';
// Use centralized API mocking
const mockServices = createMockApiServices();
const mockDocumentService = mockServices.documentService;
// Mock the entire api module with centralized mocks
vi.mock('../api', async () => {
const actual = await vi.importActual('../api');
return {
...actual,
documentService: mockDocumentService,
};
});
// Import after mocking
const { documentService } = await import('../api');
describe('documentService', () => {
beforeEach(() => {
vi.clearAllMocks();
});
describe('getOcrText', () => {
const mockOcrResponse: OcrResponse = {
document_id: 'doc-123',
filename: 'test_document.pdf',
has_ocr_text: true,
ocr_text: 'This is extracted OCR text content.',
ocr_confidence: 95.5,
ocr_word_count: 150,
ocr_processing_time_ms: 1200,
ocr_status: 'completed',
ocr_error: null,
ocr_completed_at: '2024-01-01T00:05:00Z',
};
it('should fetch OCR text for a document', async () => {
const mockResponse = {
data: mockOcrResponse,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
const result = await documentService.getOcrText('doc-123');
expect(result.data).toEqual(mockOcrResponse);
expect(result.data.document_id).toBe('doc-123');
expect(result.data.has_ocr_text).toBe(true);
expect(result.data.ocr_text).toBe('This is extracted OCR text content.');
expect(result.data.ocr_confidence).toBe(95.5);
expect(result.data.ocr_word_count).toBe(150);
});
it('should handle OCR response without text', async () => {
const mockEmptyOcrResponse: OcrResponse = {
document_id: 'doc-456',
filename: 'text_file.txt',
has_ocr_text: false,
ocr_text: null,
ocr_confidence: null,
ocr_word_count: null,
ocr_processing_time_ms: null,
ocr_status: 'pending',
ocr_error: null,
ocr_completed_at: null,
};
const mockResponse = {
data: mockEmptyOcrResponse,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
const result = await documentService.getOcrText('doc-456');
expect(result.data).toEqual(mockEmptyOcrResponse);
expect(result.data.has_ocr_text).toBe(false);
expect(result.data.ocr_text).toBeNull();
expect(result.data.ocr_confidence).toBeNull();
});
it('should handle OCR error response', async () => {
const mockErrorOcrResponse: OcrResponse = {
document_id: 'doc-789',
filename: 'corrupted_file.pdf',
has_ocr_text: false,
ocr_text: null,
ocr_confidence: null,
ocr_word_count: null,
ocr_processing_time_ms: 5000,
ocr_status: 'failed',
ocr_error: 'Failed to process document: corrupted file format',
ocr_completed_at: '2024-01-01T00:05:00Z',
};
const mockResponse = {
data: mockErrorOcrResponse,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.getOcrText.mockResolvedValue(mockResponse);
const result = await documentService.getOcrText('doc-789');
expect(result.data).toEqual(mockErrorOcrResponse);
expect(result.data.ocr_status).toBe('failed');
expect(result.data.ocr_error).toBe('Failed to process document: corrupted file format');
expect(result.data.has_ocr_text).toBe(false);
});
it('should make correct API call', async () => {
mockDocumentService.getOcrText.mockResolvedValue({ data: mockOcrResponse });
await documentService.getOcrText('doc-123');
expect(mockDocumentService.getOcrText).toHaveBeenCalledWith('doc-123');
});
it('should handle network errors', async () => {
mockDocumentService.getOcrText.mockRejectedValue(new Error('Network Error'));
await expect(documentService.getOcrText('doc-123')).rejects.toThrow('Network Error');
});
it('should handle 404 errors for non-existent documents', async () => {
mockDocumentService.getOcrText.mockRejectedValue({
response: {
status: 404,
data: { error: 'Document not found' },
},
});
await expect(documentService.getOcrText('non-existent-doc')).rejects.toMatchObject({
response: {
status: 404,
},
});
});
it('should handle 401 unauthorized errors', async () => {
mockDocumentService.getOcrText.mockRejectedValue({
response: {
status: 401,
data: { error: 'Unauthorized' },
},
});
await expect(documentService.getOcrText('doc-123')).rejects.toMatchObject({
response: {
status: 401,
},
});
});
});
describe('list', () => {
const mockDocuments: Document[] = [
{
id: 'doc-1',
filename: 'document1.pdf',
original_filename: 'document1.pdf',
file_size: 1024000,
mime_type: 'application/pdf',
tags: ['pdf', 'document'],
created_at: '2024-01-01T00:00:00Z',
has_ocr_text: true,
ocr_confidence: 95.5,
ocr_word_count: 150,
ocr_processing_time_ms: 1200,
ocr_status: 'completed',
},
{
id: 'doc-2',
filename: 'image.png',
original_filename: 'image.png',
file_size: 512000,
mime_type: 'image/png',
tags: ['image'],
created_at: '2024-01-02T00:00:00Z',
has_ocr_text: false,
ocr_confidence: undefined,
ocr_word_count: undefined,
ocr_processing_time_ms: undefined,
ocr_status: 'pending',
},
];
it('should fetch document list with OCR metadata', async () => {
const mockResponse = {
data: mockDocuments,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.list.mockResolvedValue(mockResponse);
const result = await documentService.list(50, 0);
expect(result.data).toEqual(mockDocuments);
expect(result.data[0].has_ocr_text).toBe(true);
expect(result.data[0].ocr_confidence).toBe(95.5);
expect(result.data[1].has_ocr_text).toBe(false);
expect(result.data[1].ocr_confidence).toBeUndefined();
});
});
describe('upload', () => {
it('should upload file with multipart form data', async () => {
const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' });
const mockUploadResponse: Document = {
id: 'doc-new',
filename: 'test.pdf',
original_filename: 'test.pdf',
file_size: 7,
mime_type: 'application/pdf',
tags: [],
created_at: '2024-01-01T00:00:00Z',
has_ocr_text: false,
ocr_status: 'pending',
};
mockDocumentService.upload.mockResolvedValue({ data: mockUploadResponse });
const result = await documentService.upload(mockFile);
expect(result.data).toEqual(mockUploadResponse);
expect(mockDocumentService.upload).toHaveBeenCalledWith(mockFile);
});
});
describe('download', () => {
it('should download file as blob', async () => {
const mockBlob = new Blob(['file content'], { type: 'application/pdf' });
mockDocumentService.download.mockResolvedValue({ data: mockBlob });
const result = await documentService.download('doc-123');
expect(result.data).toEqual(mockBlob);
expect(mockDocumentService.download).toHaveBeenCalledWith('doc-123');
});
});
});
describe('OcrResponse interface', () => {
it('should have correct type structure', () => {
const ocrResponse: OcrResponse = {
document_id: 'doc-123',
filename: 'test.pdf',
has_ocr_text: true,
ocr_text: 'Sample text',
ocr_confidence: 95.5,
ocr_word_count: 100,
ocr_processing_time_ms: 1000,
ocr_status: 'completed',
ocr_error: null,
ocr_completed_at: '2024-01-01T00:00:00Z',
};
// Type assertions to ensure correct types
expect(typeof ocrResponse.document_id).toBe('string');
expect(typeof ocrResponse.filename).toBe('string');
expect(typeof ocrResponse.has_ocr_text).toBe('boolean');
expect(typeof ocrResponse.ocr_text).toBe('string');
expect(typeof ocrResponse.ocr_confidence).toBe('number');
expect(typeof ocrResponse.ocr_word_count).toBe('number');
expect(typeof ocrResponse.ocr_processing_time_ms).toBe('number');
expect(typeof ocrResponse.ocr_status).toBe('string');
expect(ocrResponse.ocr_error).toBeNull();
expect(typeof ocrResponse.ocr_completed_at).toBe('string');
});
it('should allow optional/null fields', () => {
const ocrResponseMinimal: OcrResponse = {
document_id: 'doc-456',
filename: 'text.txt',
has_ocr_text: false,
ocr_text: null,
ocr_confidence: undefined,
ocr_word_count: undefined,
ocr_processing_time_ms: undefined,
ocr_status: 'pending',
ocr_error: undefined,
ocr_completed_at: undefined,
};
expect(ocrResponseMinimal.has_ocr_text).toBe(false);
expect(ocrResponseMinimal.ocr_text).toBeNull();
expect(ocrResponseMinimal.ocr_confidence).toBeUndefined();
});
});
describe('documentService.deleteLowConfidence', () => {
it('should delete low confidence documents successfully', async () => {
const mockDeleteResponse = {
data: {
success: true,
message: 'Successfully deleted 3 documents with OCR confidence below 30%',
deleted_count: 3,
matched_count: 3,
successful_file_deletions: 3,
failed_file_deletions: 0,
ignored_file_creation_failures: 0,
deleted_document_ids: ['doc-1', 'doc-2', 'doc-3']
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockDeleteResponse);
const result = await documentService.deleteLowConfidence(30.0, false);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
expect(result.data.success).toBe(true);
expect(result.data.deleted_count).toBe(3);
expect(result.data.matched_count).toBe(3);
expect(result.data.deleted_document_ids).toHaveLength(3);
});
it('should preview low confidence documents without deleting', async () => {
const mockPreviewResponse = {
data: {
success: true,
message: 'Found 5 documents with OCR confidence below 50%',
matched_count: 5,
preview: true,
document_ids: ['doc-1', 'doc-2', 'doc-3', 'doc-4', 'doc-5']
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPreviewResponse);
const result = await documentService.deleteLowConfidence(50.0, true);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(50.0, true);
expect(result.data.success).toBe(true);
expect(result.data.preview).toBe(true);
expect(result.data.matched_count).toBe(5);
expect(result.data.document_ids).toHaveLength(5);
expect(result.data).not.toHaveProperty('deleted_count');
});
it('should handle no matching documents', async () => {
const mockEmptyResponse = {
data: {
success: true,
message: 'No documents found with OCR confidence below 10%',
deleted_count: 0
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockEmptyResponse);
const result = await documentService.deleteLowConfidence(10.0, false);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(10.0, false);
expect(result.data.success).toBe(true);
expect(result.data.deleted_count).toBe(0);
});
it('should handle validation errors for invalid confidence threshold', async () => {
const mockErrorResponse = {
data: {
success: false,
message: 'max_confidence must be between 0.0 and 100.0',
matched_count: 0
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockErrorResponse);
const result = await documentService.deleteLowConfidence(-10.0, false);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(-10.0, false);
expect(result.data.success).toBe(false);
expect(result.data.message).toContain('must be between 0.0 and 100.0');
});
it('should handle API errors gracefully', async () => {
const mockError = new Error('Network error');
mockDocumentService.deleteLowConfidence.mockRejectedValue(mockError);
await expect(documentService.deleteLowConfidence(30.0, false))
.rejects.toThrow('Network error');
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
});
it('should use correct default values', async () => {
const mockResponse = {
data: { success: true, matched_count: 0 },
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);
// Test with explicit false value (the default)
await documentService.deleteLowConfidence(40.0, false);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(40.0, false);
});
it('should handle partial deletion failures', async () => {
const mockPartialFailureResponse = {
data: {
success: true,
message: 'Successfully deleted 2 documents with OCR confidence below 25%',
deleted_count: 2,
matched_count: 3,
successful_file_deletions: 1,
failed_file_deletions: 1,
ignored_file_creation_failures: 1,
deleted_document_ids: ['doc-1', 'doc-2']
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPartialFailureResponse);
const result = await documentService.deleteLowConfidence(25.0, false);
expect(result.data.success).toBe(true);
expect(result.data.deleted_count).toBe(2);
expect(result.data.matched_count).toBe(3);
expect(result.data.failed_file_deletions).toBe(1);
expect(result.data.ignored_file_creation_failures).toBe(1);
});
it('should properly encode confidence threshold values', async () => {
const mockResponse = {
data: { success: true, matched_count: 0 },
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);
// Test various confidence values
const testValues = [0.0, 0.1, 30.5, 50.0, 99.9, 100.0];
for (const confidence of testValues) {
mockDocumentService.deleteLowConfidence.mockClear();
await documentService.deleteLowConfidence(confidence, true);
expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(confidence, true);
}
});
});
describe('documentService.getFailedOcrDocuments', () => {
const mockFailedOcrResponse = {
documents: [
{
id: 'doc-1',
filename: 'failed_doc1.pdf',
failure_reason: 'low_ocr_confidence',
failure_stage: 'ocr',
created_at: '2024-01-01T10:00:00Z',
retry_count: 1
},
{
id: 'doc-2',
filename: 'failed_doc2.pdf',
failure_reason: 'pdf_parsing_error',
failure_stage: 'ocr',
created_at: '2024-01-01T11:00:00Z',
retry_count: 0
}
],
pagination: {
total: 2,
limit: 50,
offset: 0,
has_more: false
},
statistics: {
total_failed: 2,
failure_categories: [
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
{ reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
]
}
};
it('should fetch failed OCR documents successfully', async () => {
const mockResponse = {
data: mockFailedOcrResponse,
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.getFailedOcrDocuments.mockResolvedValue(mockResponse);
const result = await documentService.getFailedOcrDocuments(50, 0);
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
expect(result.data).toEqual(mockFailedOcrResponse);
expect(result.data.documents).toHaveLength(2);
expect(result.data.documents[0].failure_stage).toBe('ocr');
expect(result.data.pagination.total).toBe(2);
});
it('should handle pagination parameters correctly', async () => {
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
await documentService.getFailedOcrDocuments(25, 10);
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
});
it('should use default pagination when not specified', async () => {
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });
await documentService.getFailedOcrDocuments();
expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith();
});
it('should handle empty results', async () => {
const emptyResponse = {
documents: [],
pagination: { total: 0, limit: 50, offset: 0, has_more: false },
statistics: { total_failed: 0, failure_categories: [] }
};
mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });
const result = await documentService.getFailedOcrDocuments();
expect(result.data.documents).toHaveLength(0);
expect(result.data.pagination.total).toBe(0);
expect(result.data.statistics.total_failed).toBe(0);
});
it('should handle API errors', async () => {
const mockError = new Error('Network error');
mockDocumentService.getFailedOcrDocuments.mockRejectedValue(mockError);
await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
});
});
describe('documentService.getFailedDocuments', () => {
const mockFailedDocumentsResponse = {
documents: [
{
id: 'doc-1',
filename: 'failed_doc1.pdf',
failure_reason: 'low_ocr_confidence',
failure_stage: 'ocr',
created_at: '2024-01-01T10:00:00Z',
retry_count: 1
},
{
id: 'doc-2',
filename: 'duplicate_doc.pdf',
failure_reason: 'duplicate_content',
failure_stage: 'ingestion',
created_at: '2024-01-01T11:00:00Z',
retry_count: 0
},
{
id: 'doc-3',
filename: 'large_file.pdf',
failure_reason: 'file_too_large',
failure_stage: 'validation',
created_at: '2024-01-01T12:00:00Z',
retry_count: 2
}
],
pagination: {
total: 3,
limit: 25,
offset: 0,
has_more: false
},
statistics: {
total_failed: 3,
failure_categories: [
{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
{ reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
]
}
};
it('should fetch failed documents with default parameters', async () => {
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
const result = await documentService.getFailedDocuments();
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith();
expect(result.data).toEqual(mockFailedDocumentsResponse);
expect(result.data.documents).toHaveLength(3);
});
it('should filter by stage parameter', async () => {
const ocrOnlyResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
};
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });
const result = await documentService.getFailedDocuments(25, 0, 'ocr');
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_stage).toBe('ocr');
});
it('should filter by reason parameter', async () => {
const duplicateOnlyResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
};
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });
const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
});
it('should filter by both stage and reason', async () => {
const filteredResponse = {
...mockFailedDocumentsResponse,
documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
};
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: filteredResponse });
const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
expect(result.data.documents).toHaveLength(1);
expect(result.data.documents[0].failure_stage).toBe('ocr');
expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
});
it('should handle custom pagination', async () => {
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });
await documentService.getFailedDocuments(10, 20);
expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(10, 20);
});
it('should handle empty results', async () => {
const emptyResponse = {
documents: [],
pagination: { total: 0, limit: 25, offset: 0, has_more: false },
statistics: { total_failed: 0, failure_categories: [] }
};
mockDocumentService.getFailedDocuments.mockResolvedValue({ data: emptyResponse });
const result = await documentService.getFailedDocuments();
expect(result.data.documents).toHaveLength(0);
expect(result.data.statistics.total_failed).toBe(0);
});
});
describe('documentService.retryOcr', () => {
it('should retry OCR for a document successfully', async () => {
const mockRetryResponse = {
data: {
success: true,
message: 'OCR retry queued successfully',
document_id: 'doc-123'
},
status: 200,
statusText: 'OK',
headers: {},
config: {},
};
mockDocumentService.retryOcr.mockResolvedValue(mockRetryResponse);
const result = await documentService.retryOcr('doc-123');
expect(mockDocumentService.retryOcr).toHaveBeenCalledWith('doc-123');
expect(result.data.success).toBe(true);
expect(result.data.document_id).toBe('doc-123');
});
it('should handle retry errors', async () => {
const mockError = {
response: {
status: 404,
data: { error: 'Document not found' }
}
};
mockDocumentService.retryOcr.mockRejectedValue(mockError);
await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
response: { status: 404 }
});
});
it('should handle network errors', async () => {
mockDocumentService.retryOcr.mockRejectedValue(new Error('Network error'));
await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
});
});