Readur/frontend/src/services/__tests__/api.test.ts

import { describe, it, expect, vi, beforeEach } from 'vitest';
import { type OcrResponse, type Document } from '../api';
import { createMockApiServices } from '../../test/test-utils';

// Use centralized API mocking
const mockServices = createMockApiServices();
const mockDocumentService = mockServices.documentService;

// Mock the entire api module with centralized mocks
vi.mock('../api', async () => {
  const actual = await vi.importActual('../api');
  return {
    ...actual,
    documentService: mockDocumentService,
  };
});

// Import after mocking
const { documentService } = await import('../api');

describe('documentService', () => {
  beforeEach(() => {
    vi.clearAllMocks();
  });

  describe('getOcrText', () => {
    const mockOcrResponse: OcrResponse = {
      document_id: 'doc-123',
      filename: 'test_document.pdf',
      has_ocr_text: true,
      ocr_text: 'This is extracted OCR text content.',
      ocr_confidence: 95.5,
      ocr_word_count: 150,
      ocr_processing_time_ms: 1200,
      ocr_status: 'completed',
      ocr_error: null,
      ocr_completed_at: '2024-01-01T00:05:00Z',
    };

    it('should fetch OCR text for a document', async () => {
      const mockResponse = {
        data: mockOcrResponse,
        status: 200,
        statusText: 'OK',
        headers: {},
        config: {},
      };

      mockDocumentService.getOcrText.mockResolvedValue(mockResponse);

      const result = await documentService.getOcrText('doc-123');

      expect(result.data).toEqual(mockOcrResponse);
      expect(result.data.document_id).toBe('doc-123');
      expect(result.data.has_ocr_text).toBe(true);
      expect(result.data.ocr_text).toBe('This is extracted OCR text content.');
      expect(result.data.ocr_confidence).toBe(95.5);
      expect(result.data.ocr_word_count).toBe(150);
    });

    it('should handle OCR response without text', async () => {
      const mockEmptyOcrResponse: OcrResponse = {
        document_id: 'doc-456',
        filename: 'text_file.txt',
        has_ocr_text: false,
        ocr_text: null,
        ocr_confidence: null,
        ocr_word_count: null,
        ocr_processing_time_ms: null,
        ocr_status: 'pending',
        ocr_error: null,
        ocr_completed_at: null,
      };

      const mockResponse = {
        data: mockEmptyOcrResponse,
        status: 200,
        statusText: 'OK',
        headers: {},
        config: {},
      };

      mockDocumentService.getOcrText.mockResolvedValue(mockResponse);

      const result = await documentService.getOcrText('doc-456');

      expect(result.data).toEqual(mockEmptyOcrResponse);
      expect(result.data.has_ocr_text).toBe(false);
      expect(result.data.ocr_text).toBeNull();
      expect(result.data.ocr_confidence).toBeNull();
    });

    it('should handle OCR error response', async () => {
      const mockErrorOcrResponse: OcrResponse = {
        document_id: 'doc-789',
        filename: 'corrupted_file.pdf',
        has_ocr_text: false,
        ocr_text: null,
        ocr_confidence: null,
        ocr_word_count: null,
        ocr_processing_time_ms: 5000,
        ocr_status: 'failed',
        ocr_error: 'Failed to process document: corrupted file format',
        ocr_completed_at: '2024-01-01T00:05:00Z',
      };

      const mockResponse = {
        data: mockErrorOcrResponse,
        status: 200,
        statusText: 'OK',
        headers: {},
        config: {},
      };

      mockDocumentService.getOcrText.mockResolvedValue(mockResponse);

      const result = await documentService.getOcrText('doc-789');

      expect(result.data).toEqual(mockErrorOcrResponse);
      expect(result.data.ocr_status).toBe('failed');
      expect(result.data.ocr_error).toBe('Failed to process document: corrupted file format');
      expect(result.data.has_ocr_text).toBe(false);
    });

    it('should make correct API call', async () => {
      mockDocumentService.getOcrText.mockResolvedValue({ data: mockOcrResponse });

      await documentService.getOcrText('doc-123');

      expect(mockDocumentService.getOcrText).toHaveBeenCalledWith('doc-123');
    });

    it('should handle network errors', async () => {
      mockDocumentService.getOcrText.mockRejectedValue(new Error('Network Error'));

      await expect(documentService.getOcrText('doc-123')).rejects.toThrow('Network Error');
    });

    it('should handle 404 errors for non-existent documents', async () => {
      mockDocumentService.getOcrText.mockRejectedValue({
        response: {
          status: 404,
          data: { error: 'Document not found' },
        },
      });

      await expect(documentService.getOcrText('non-existent-doc')).rejects.toMatchObject({
        response: {
          status: 404,
        },
      });
    });

    it('should handle 401 unauthorized errors', async () => {
      mockDocumentService.getOcrText.mockRejectedValue({
        response: {
          status: 401,
          data: { error: 'Unauthorized' },
        },
      });

      await expect(documentService.getOcrText('doc-123')).rejects.toMatchObject({
        response: {
          status: 401,
        },
      });
    });
  });

  describe('list', () => {
    const mockDocuments: Document[] = [
      {
        id: 'doc-1',
        filename: 'document1.pdf',
        original_filename: 'document1.pdf',
        file_size: 1024000,
        mime_type: 'application/pdf',
        tags: ['pdf', 'document'],
        created_at: '2024-01-01T00:00:00Z',
        has_ocr_text: true,
        ocr_confidence: 95.5,
        ocr_word_count: 150,
        ocr_processing_time_ms: 1200,
        ocr_status: 'completed',
      },
      {
        id: 'doc-2',
        filename: 'image.png',
        original_filename: 'image.png',
        file_size: 512000,
        mime_type: 'image/png',
        tags: ['image'],
        created_at: '2024-01-02T00:00:00Z',
        has_ocr_text: false,
        ocr_confidence: undefined,
        ocr_word_count: undefined,
        ocr_processing_time_ms: undefined,
        ocr_status: 'pending',
      },
    ];

    it('should fetch document list with OCR metadata', async () => {
      const mockResponse = {
        data: mockDocuments,
        status: 200,
        statusText: 'OK',
        headers: {},
        config: {},
      };

      mockDocumentService.list.mockResolvedValue(mockResponse);

      const result = await documentService.list(50, 0);

      expect(result.data).toEqual(mockDocuments);
      expect(result.data[0].has_ocr_text).toBe(true);
      expect(result.data[0].ocr_confidence).toBe(95.5);
      expect(result.data[1].has_ocr_text).toBe(false);
      expect(result.data[1].ocr_confidence).toBeUndefined();
    });
  });

  describe('upload', () => {
    it('should upload file with multipart form data', async () => {
      const mockFile = new File(['content'], 'test.pdf', { type: 'application/pdf' });
      const mockUploadResponse: Document = {
        id: 'doc-new',
        filename: 'test.pdf',
        original_filename: 'test.pdf',
        file_size: 7,
        mime_type: 'application/pdf',
        tags: [],
        created_at: '2024-01-01T00:00:00Z',
        has_ocr_text: false,
        ocr_status: 'pending',
      };

      mockDocumentService.upload.mockResolvedValue({ data: mockUploadResponse });

      const result = await documentService.upload(mockFile);

      expect(result.data).toEqual(mockUploadResponse);
      expect(mockDocumentService.upload).toHaveBeenCalledWith(mockFile);
    });
  });

  describe('download', () => {
    it('should download file as blob', async () => {
      const mockBlob = new Blob(['file content'], { type: 'application/pdf' });
      mockDocumentService.download.mockResolvedValue({ data: mockBlob });

      const result = await documentService.download('doc-123');

      expect(result.data).toEqual(mockBlob);
      expect(mockDocumentService.download).toHaveBeenCalledWith('doc-123');
    });
  });
});

describe('OcrResponse interface', () => {
  it('should have correct type structure', () => {
    const ocrResponse: OcrResponse = {
      document_id: 'doc-123',
      filename: 'test.pdf',
      has_ocr_text: true,
      ocr_text: 'Sample text',
      ocr_confidence: 95.5,
      ocr_word_count: 100,
      ocr_processing_time_ms: 1000,
      ocr_status: 'completed',
      ocr_error: null,
      ocr_completed_at: '2024-01-01T00:00:00Z',
    };

    // Type assertions to ensure correct types
    expect(typeof ocrResponse.document_id).toBe('string');
    expect(typeof ocrResponse.filename).toBe('string');
    expect(typeof ocrResponse.has_ocr_text).toBe('boolean');
    expect(typeof ocrResponse.ocr_text).toBe('string');
    expect(typeof ocrResponse.ocr_confidence).toBe('number');
    expect(typeof ocrResponse.ocr_word_count).toBe('number');
    expect(typeof ocrResponse.ocr_processing_time_ms).toBe('number');
    expect(typeof ocrResponse.ocr_status).toBe('string');
    expect(ocrResponse.ocr_error).toBeNull();
    expect(typeof ocrResponse.ocr_completed_at).toBe('string');
  });

  it('should allow optional/null fields', () => {
    const ocrResponseMinimal: OcrResponse = {
      document_id: 'doc-456',
      filename: 'text.txt',
      has_ocr_text: false,
      ocr_text: null,
      ocr_confidence: undefined,
      ocr_word_count: undefined,
      ocr_processing_time_ms: undefined,
      ocr_status: 'pending',
      ocr_error: undefined,
      ocr_completed_at: undefined,
    };

    expect(ocrResponseMinimal.has_ocr_text).toBe(false);
    expect(ocrResponseMinimal.ocr_text).toBeNull();
    expect(ocrResponseMinimal.ocr_confidence).toBeUndefined();
  });
});

describe('documentService.deleteLowConfidence', () => {
  it('should delete low confidence documents successfully', async () => {
    const mockDeleteResponse = {
      data: {
        success: true,
        message: 'Successfully deleted 3 documents with OCR confidence below 30%',
        deleted_count: 3,
        matched_count: 3,
        successful_file_deletions: 3,
        failed_file_deletions: 0,
        ignored_file_creation_failures: 0,
        deleted_document_ids: ['doc-1', 'doc-2', 'doc-3']
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockDeleteResponse);

    const result = await documentService.deleteLowConfidence(30.0, false);

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
    expect(result.data.success).toBe(true);
    expect(result.data.deleted_count).toBe(3);
    expect(result.data.matched_count).toBe(3);
    expect(result.data.deleted_document_ids).toHaveLength(3);
  });

  it('should preview low confidence documents without deleting', async () => {
    const mockPreviewResponse = {
      data: {
        success: true,
        message: 'Found 5 documents with OCR confidence below 50%',
        matched_count: 5,
        preview: true,
        document_ids: ['doc-1', 'doc-2', 'doc-3', 'doc-4', 'doc-5']
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPreviewResponse);

    const result = await documentService.deleteLowConfidence(50.0, true);

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(50.0, true);
    expect(result.data.success).toBe(true);
    expect(result.data.preview).toBe(true);
    expect(result.data.matched_count).toBe(5);
    expect(result.data.document_ids).toHaveLength(5);
    expect(result.data).not.toHaveProperty('deleted_count');
  });

  it('should handle no matching documents', async () => {
    const mockEmptyResponse = {
      data: {
        success: true,
        message: 'No documents found with OCR confidence below 10%',
        deleted_count: 0
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockEmptyResponse);

    const result = await documentService.deleteLowConfidence(10.0, false);

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(10.0, false);
    expect(result.data.success).toBe(true);
    expect(result.data.deleted_count).toBe(0);
  });

  it('should handle validation errors for invalid confidence threshold', async () => {
    const mockErrorResponse = {
      data: {
        success: false,
        message: 'max_confidence must be between 0.0 and 100.0',
        matched_count: 0
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockErrorResponse);

    const result = await documentService.deleteLowConfidence(-10.0, false);

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(-10.0, false);
    expect(result.data.success).toBe(false);
    expect(result.data.message).toContain('must be between 0.0 and 100.0');
  });

  it('should handle API errors gracefully', async () => {
    const mockError = new Error('Network error');
    mockDocumentService.deleteLowConfidence.mockRejectedValue(mockError);

    await expect(documentService.deleteLowConfidence(30.0, false))
      .rejects.toThrow('Network error');

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(30.0, false);
  });

  it('should use correct default values', async () => {
    const mockResponse = {
      data: { success: true, matched_count: 0 },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);

    // Test with explicit false value (the default)
    await documentService.deleteLowConfidence(40.0, false);

    expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(40.0, false);
  });

  it('should handle partial deletion failures', async () => {
    const mockPartialFailureResponse = {
      data: {
        success: true,
        message: 'Successfully deleted 2 documents with OCR confidence below 25%',
        deleted_count: 2,
        matched_count: 3,
        successful_file_deletions: 1,
        failed_file_deletions: 1,
        ignored_file_creation_failures: 1,
        deleted_document_ids: ['doc-1', 'doc-2']
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockPartialFailureResponse);

    const result = await documentService.deleteLowConfidence(25.0, false);

    expect(result.data.success).toBe(true);
    expect(result.data.deleted_count).toBe(2);
    expect(result.data.matched_count).toBe(3);
    expect(result.data.failed_file_deletions).toBe(1);
    expect(result.data.ignored_file_creation_failures).toBe(1);
  });

  it('should properly encode confidence threshold values', async () => {
    const mockResponse = {
      data: { success: true, matched_count: 0 },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.deleteLowConfidence.mockResolvedValue(mockResponse);

    // Test various confidence values
    const testValues = [0.0, 0.1, 30.5, 50.0, 99.9, 100.0];

    for (const confidence of testValues) {
      mockDocumentService.deleteLowConfidence.mockClear();
      await documentService.deleteLowConfidence(confidence, true);
      expect(mockDocumentService.deleteLowConfidence).toHaveBeenCalledWith(confidence, true);
    }
  });
});

describe('documentService.getFailedOcrDocuments', () => {
  const mockFailedOcrResponse = {
    documents: [
      {
        id: 'doc-1',
        filename: 'failed_doc1.pdf',
        failure_reason: 'low_ocr_confidence',
        failure_stage: 'ocr',
        created_at: '2024-01-01T10:00:00Z',
        retry_count: 1
      },
      {
        id: 'doc-2',
        filename: 'failed_doc2.pdf',
        failure_reason: 'pdf_parsing_error',
        failure_stage: 'ocr',
        created_at: '2024-01-01T11:00:00Z',
        retry_count: 0
      }
    ],
    pagination: {
      total: 2,
      limit: 50,
      offset: 0,
      has_more: false
    },
    statistics: {
      total_failed: 2,
      failure_categories: [
        { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
        { reason: 'pdf_parsing_error', display_name: 'PDF Parsing Error', count: 1 }
      ]
    }
  };

  it('should fetch failed OCR documents successfully', async () => {
    const mockResponse = {
      data: mockFailedOcrResponse,
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.getFailedOcrDocuments.mockResolvedValue(mockResponse);

    const result = await documentService.getFailedOcrDocuments(50, 0);

    expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(50, 0);
    expect(result.data).toEqual(mockFailedOcrResponse);
    expect(result.data.documents).toHaveLength(2);
    expect(result.data.documents[0].failure_stage).toBe('ocr');
    expect(result.data.pagination.total).toBe(2);
  });

  it('should handle pagination parameters correctly', async () => {
    mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });

    await documentService.getFailedOcrDocuments(25, 10);

    expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith(25, 10);
  });

  it('should use default pagination when not specified', async () => {
    mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: mockFailedOcrResponse });

    await documentService.getFailedOcrDocuments();

    expect(mockDocumentService.getFailedOcrDocuments).toHaveBeenCalledWith();
  });

  it('should handle empty results', async () => {
    const emptyResponse = {
      documents: [],
      pagination: { total: 0, limit: 50, offset: 0, has_more: false },
      statistics: { total_failed: 0, failure_categories: [] }
    };

    mockDocumentService.getFailedOcrDocuments.mockResolvedValue({ data: emptyResponse });

    const result = await documentService.getFailedOcrDocuments();

    expect(result.data.documents).toHaveLength(0);
    expect(result.data.pagination.total).toBe(0);
    expect(result.data.statistics.total_failed).toBe(0);
  });

  it('should handle API errors', async () => {
    const mockError = new Error('Network error');
    mockDocumentService.getFailedOcrDocuments.mockRejectedValue(mockError);

    await expect(documentService.getFailedOcrDocuments()).rejects.toThrow('Network error');
  });
});

describe('documentService.getFailedDocuments', () => {
  const mockFailedDocumentsResponse = {
    documents: [
      {
        id: 'doc-1',
        filename: 'failed_doc1.pdf',
        failure_reason: 'low_ocr_confidence',
        failure_stage: 'ocr',
        created_at: '2024-01-01T10:00:00Z',
        retry_count: 1
      },
      {
        id: 'doc-2',
        filename: 'duplicate_doc.pdf',
        failure_reason: 'duplicate_content',
        failure_stage: 'ingestion',
        created_at: '2024-01-01T11:00:00Z',
        retry_count: 0
      },
      {
        id: 'doc-3',
        filename: 'large_file.pdf',
        failure_reason: 'file_too_large',
        failure_stage: 'validation',
        created_at: '2024-01-01T12:00:00Z',
        retry_count: 2
      }
    ],
    pagination: {
      total: 3,
      limit: 25,
      offset: 0,
      has_more: false
    },
    statistics: {
      total_failed: 3,
      failure_categories: [
        { reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 },
        { reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 },
        { reason: 'file_too_large', display_name: 'File Too Large', count: 1 }
      ]
    }
  };

  it('should fetch failed documents with default parameters', async () => {
    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });

    const result = await documentService.getFailedDocuments();

    expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith();
    expect(result.data).toEqual(mockFailedDocumentsResponse);
    expect(result.data.documents).toHaveLength(3);
  });

  it('should filter by stage parameter', async () => {
    const ocrOnlyResponse = {
      ...mockFailedDocumentsResponse,
      documents: [mockFailedDocumentsResponse.documents[0]], // Only OCR failure
      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
      statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
    };

    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: ocrOnlyResponse });

    const result = await documentService.getFailedDocuments(25, 0, 'ocr');

    expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr');
    expect(result.data.documents).toHaveLength(1);
    expect(result.data.documents[0].failure_stage).toBe('ocr');
  });

  it('should filter by reason parameter', async () => {
    const duplicateOnlyResponse = {
      ...mockFailedDocumentsResponse,
      documents: [mockFailedDocumentsResponse.documents[1]], // Only duplicate failure
      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
      statistics: { total_failed: 1, failure_categories: [{ reason: 'duplicate_content', display_name: 'Duplicate Content', count: 1 }] }
    };

    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: duplicateOnlyResponse });

    const result = await documentService.getFailedDocuments(25, 0, undefined, 'duplicate_content');

    expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, undefined, 'duplicate_content');
    expect(result.data.documents).toHaveLength(1);
    expect(result.data.documents[0].failure_reason).toBe('duplicate_content');
  });

  it('should filter by both stage and reason', async () => {
    const filteredResponse = {
      ...mockFailedDocumentsResponse,
      documents: [mockFailedDocumentsResponse.documents[0]], // OCR + low_ocr_confidence
      pagination: { ...mockFailedDocumentsResponse.pagination, total: 1 },
      statistics: { total_failed: 1, failure_categories: [{ reason: 'low_ocr_confidence', display_name: 'Low OCR Confidence', count: 1 }] }
    };

    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: filteredResponse });

    const result = await documentService.getFailedDocuments(25, 0, 'ocr', 'low_ocr_confidence');

    expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(25, 0, 'ocr', 'low_ocr_confidence');
    expect(result.data.documents).toHaveLength(1);
    expect(result.data.documents[0].failure_stage).toBe('ocr');
    expect(result.data.documents[0].failure_reason).toBe('low_ocr_confidence');
  });

  it('should handle custom pagination', async () => {
    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: mockFailedDocumentsResponse });

    await documentService.getFailedDocuments(10, 20);

    expect(mockDocumentService.getFailedDocuments).toHaveBeenCalledWith(10, 20);
  });

  it('should handle empty results', async () => {
    const emptyResponse = {
      documents: [],
      pagination: { total: 0, limit: 25, offset: 0, has_more: false },
      statistics: { total_failed: 0, failure_categories: [] }
    };

    mockDocumentService.getFailedDocuments.mockResolvedValue({ data: emptyResponse });

    const result = await documentService.getFailedDocuments();

    expect(result.data.documents).toHaveLength(0);
    expect(result.data.statistics.total_failed).toBe(0);
  });
});

describe('documentService.retryOcr', () => {
  it('should retry OCR for a document successfully', async () => {
    const mockRetryResponse = {
      data: {
        success: true,
        message: 'OCR retry queued successfully',
        document_id: 'doc-123'
      },
      status: 200,
      statusText: 'OK',
      headers: {},
      config: {},
    };

    mockDocumentService.retryOcr.mockResolvedValue(mockRetryResponse);

    const result = await documentService.retryOcr('doc-123');

    expect(mockDocumentService.retryOcr).toHaveBeenCalledWith('doc-123');
    expect(result.data.success).toBe(true);
    expect(result.data.document_id).toBe('doc-123');
  });

  it('should handle retry errors', async () => {
    const mockError = {
      response: {
        status: 404,
        data: { error: 'Document not found' }
      }
    };

    mockDocumentService.retryOcr.mockRejectedValue(mockError);

    await expect(documentService.retryOcr('non-existent-doc')).rejects.toMatchObject({
      response: { status: 404 }
    });
  });

  it('should handle network errors', async () => {
    mockDocumentService.retryOcr.mockRejectedValue(new Error('Network error'));

    await expect(documentService.retryOcr('doc-123')).rejects.toThrow('Network error');
  });
});