Merge pull request #66 from readur/feat/delete-low-confidence-documents
feat(server/client): implement button deleting low confidence documen…
This commit is contained in:
commit
dd7bc618d7
|
|
@ -28,6 +28,7 @@ import {
|
|||
Snackbar,
|
||||
Tabs,
|
||||
Tab,
|
||||
TextField,
|
||||
useTheme,
|
||||
} from '@mui/material';
|
||||
import Grid from '@mui/material/GridLegacy';
|
||||
|
|
@ -147,6 +148,12 @@ const FailedOcrPage: React.FC = () => {
|
|||
message: '',
|
||||
severity: 'success'
|
||||
});
|
||||
|
||||
// Low confidence documents state
|
||||
const [confidenceThreshold, setConfidenceThreshold] = useState<number>(30);
|
||||
const [lowConfidenceLoading, setLowConfidenceLoading] = useState(false);
|
||||
const [previewData, setPreviewData] = useState<any>(null);
|
||||
const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false);
|
||||
|
||||
const fetchFailedDocuments = async () => {
|
||||
try {
|
||||
|
|
@ -297,8 +304,68 @@ const FailedOcrPage: React.FC = () => {
|
|||
const refreshCurrentTab = () => {
|
||||
if (currentTab === 0) {
|
||||
fetchFailedDocuments();
|
||||
} else {
|
||||
} else if (currentTab === 1) {
|
||||
fetchDuplicates();
|
||||
} else if (currentTab === 2) {
|
||||
handlePreviewLowConfidence();
|
||||
}
|
||||
};
|
||||
|
||||
// Low confidence document handlers
|
||||
const handlePreviewLowConfidence = async () => {
|
||||
try {
|
||||
setLowConfidenceLoading(true);
|
||||
const response = await documentService.deleteLowConfidence(confidenceThreshold, true);
|
||||
setPreviewData(response.data);
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: response.data.message,
|
||||
severity: 'info'
|
||||
});
|
||||
} catch (error) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'Failed to preview low confidence documents',
|
||||
severity: 'error'
|
||||
});
|
||||
} finally {
|
||||
setLowConfidenceLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleDeleteLowConfidence = async () => {
|
||||
if (!previewData || previewData.matched_count === 0) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'No documents to delete',
|
||||
severity: 'warning'
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
setLowConfidenceLoading(true);
|
||||
const response = await documentService.deleteLowConfidence(confidenceThreshold, false);
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: response.data.message,
|
||||
severity: 'success'
|
||||
});
|
||||
setPreviewData(null);
|
||||
setConfirmDeleteOpen(false);
|
||||
|
||||
// Refresh other tabs if they have data affected
|
||||
if (currentTab === 0) {
|
||||
fetchFailedDocuments();
|
||||
}
|
||||
} catch (error) {
|
||||
setSnackbar({
|
||||
open: true,
|
||||
message: 'Failed to delete low confidence documents',
|
||||
severity: 'error'
|
||||
});
|
||||
} finally {
|
||||
setLowConfidenceLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -314,7 +381,7 @@ const FailedOcrPage: React.FC = () => {
|
|||
<Box sx={{ p: 3 }}>
|
||||
<Box display="flex" justifyContent="space-between" alignItems="center" mb={3}>
|
||||
<Typography variant="h4" component="h1">
|
||||
Failed OCR & Duplicates
|
||||
Document Management
|
||||
</Typography>
|
||||
<Button
|
||||
variant="outlined"
|
||||
|
|
@ -327,7 +394,7 @@ const FailedOcrPage: React.FC = () => {
|
|||
</Box>
|
||||
|
||||
<Paper sx={{ mb: 3 }}>
|
||||
<Tabs value={currentTab} onChange={handleTabChange} aria-label="failed ocr and duplicates tabs">
|
||||
<Tabs value={currentTab} onChange={handleTabChange} aria-label="document management tabs">
|
||||
<Tab
|
||||
icon={<ErrorIcon />}
|
||||
label={`Failed OCR${statistics ? ` (${statistics.total_failed})` : ''}`}
|
||||
|
|
@ -338,6 +405,11 @@ const FailedOcrPage: React.FC = () => {
|
|||
label={`Duplicates${duplicateStatistics ? ` (${duplicateStatistics.total_duplicate_groups})` : ''}`}
|
||||
iconPosition="start"
|
||||
/>
|
||||
<Tab
|
||||
icon={<FindInPageIcon />}
|
||||
label={`Low Confidence${previewData ? ` (${previewData.matched_count})` : ''}`}
|
||||
iconPosition="start"
|
||||
/>
|
||||
</Tabs>
|
||||
</Paper>
|
||||
|
||||
|
|
@ -830,6 +902,128 @@ const FailedOcrPage: React.FC = () => {
|
|||
</>
|
||||
)}
|
||||
|
||||
{/* Low Confidence Documents Tab Content */}
|
||||
{currentTab === 2 && (
|
||||
<>
|
||||
<Alert severity="info" sx={{ mb: 3 }}>
|
||||
<AlertTitle>Low Confidence Document Deletion</AlertTitle>
|
||||
<Typography>
|
||||
This tool allows you to delete documents with OCR confidence below a specified threshold.
|
||||
Use the preview feature first to see what documents would be affected before deleting.
|
||||
</Typography>
|
||||
</Alert>
|
||||
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Grid container spacing={3} alignItems="center">
|
||||
<Grid item xs={12} md={4}>
|
||||
<TextField
|
||||
label="Maximum Confidence Threshold (%)"
|
||||
type="number"
|
||||
value={confidenceThreshold}
|
||||
onChange={(e) => setConfidenceThreshold(Math.max(0, Math.min(100, Number(e.target.value))))}
|
||||
fullWidth
|
||||
inputProps={{ min: 0, max: 100, step: 1 }}
|
||||
helperText="Documents with confidence below this value will be deleted"
|
||||
/>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={4}>
|
||||
<Button
|
||||
variant="outlined"
|
||||
onClick={handlePreviewLowConfidence}
|
||||
disabled={lowConfidenceLoading}
|
||||
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <FindInPageIcon />}
|
||||
fullWidth
|
||||
>
|
||||
Preview Documents
|
||||
</Button>
|
||||
</Grid>
|
||||
<Grid item xs={12} md={4}>
|
||||
<Button
|
||||
variant="contained"
|
||||
color="warning"
|
||||
onClick={() => setConfirmDeleteOpen(true)}
|
||||
disabled={!previewData || previewData.matched_count === 0 || lowConfidenceLoading}
|
||||
startIcon={<DeleteIcon />}
|
||||
fullWidth
|
||||
>
|
||||
Delete Low Confidence Documents
|
||||
</Button>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Preview Results */}
|
||||
{previewData && (
|
||||
<Card sx={{ mb: 3 }}>
|
||||
<CardContent>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Preview Results
|
||||
</Typography>
|
||||
<Typography color={previewData.matched_count > 0 ? 'warning.main' : 'success.main'}>
|
||||
{previewData.message}
|
||||
</Typography>
|
||||
{previewData.matched_count > 0 && (
|
||||
<Box sx={{ mt: 2 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Document IDs that would be deleted:
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ fontFamily: 'monospace', wordBreak: 'break-all' }}>
|
||||
{previewData.document_ids.slice(0, 10).join(', ')}
|
||||
{previewData.document_ids.length > 10 && ` ... and ${previewData.document_ids.length - 10} more`}
|
||||
</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Loading State */}
|
||||
{lowConfidenceLoading && !previewData && (
|
||||
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
|
||||
<CircularProgress />
|
||||
<Typography sx={{ ml: 2 }}>Processing request...</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Confirmation Dialog */}
|
||||
<Dialog
|
||||
open={confirmDeleteOpen}
|
||||
onClose={() => setConfirmDeleteOpen(false)}
|
||||
maxWidth="sm"
|
||||
fullWidth
|
||||
>
|
||||
<DialogTitle color="warning.main">
|
||||
<DeleteIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
||||
Confirm Low Confidence Document Deletion
|
||||
</DialogTitle>
|
||||
<DialogContent>
|
||||
<Typography>
|
||||
Are you sure you want to delete {previewData?.matched_count || 0} documents with OCR confidence below {confidenceThreshold}%?
|
||||
</Typography>
|
||||
<Alert severity="warning" sx={{ mt: 2 }}>
|
||||
This action cannot be undone. The documents and their files will be permanently deleted.
|
||||
</Alert>
|
||||
</DialogContent>
|
||||
<DialogActions>
|
||||
<Button onClick={() => setConfirmDeleteOpen(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
onClick={handleDeleteLowConfidence}
|
||||
color="warning"
|
||||
variant="contained"
|
||||
disabled={lowConfidenceLoading}
|
||||
startIcon={lowConfidenceLoading ? <CircularProgress size={20} /> : <DeleteIcon />}
|
||||
>
|
||||
{lowConfidenceLoading ? 'Deleting...' : 'Delete Documents'}
|
||||
</Button>
|
||||
</DialogActions>
|
||||
</Dialog>
|
||||
|
||||
{/* Document Details Dialog */}
|
||||
<Dialog
|
||||
open={detailsOpen}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,15 @@ vi.mock('../../services/api', () => ({
|
|||
retryOcr: () => Promise.resolve({
|
||||
data: { success: true, message: 'OCR retry queued successfully' }
|
||||
}),
|
||||
deleteLowConfidence: vi.fn(() => Promise.resolve({
|
||||
data: {
|
||||
success: true,
|
||||
message: 'Found 0 documents with OCR confidence below 30%',
|
||||
matched_count: 0,
|
||||
preview: true,
|
||||
document_ids: []
|
||||
}
|
||||
})),
|
||||
},
|
||||
}));
|
||||
|
||||
|
|
@ -55,7 +64,7 @@ describe('FailedOcrPage', () => {
|
|||
|
||||
// Wait for the page to load and show the title
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Failed OCR & Duplicates')).toBeInTheDocument();
|
||||
expect(screen.getByText('Document Management')).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -92,4 +101,224 @@ describe('FailedOcrPage', () => {
|
|||
// test('handles retry OCR functionality', async () => { ... });
|
||||
// test('handles API errors gracefully', async () => { ... });
|
||||
// test('refreshes data when refresh button is clicked', async () => { ... });
|
||||
});
|
||||
|
||||
describe('FailedOcrPage - Low Confidence Deletion', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
test('renders low confidence deletion tab', async () => {
|
||||
render(
|
||||
<FailedOcrPageWrapper>
|
||||
<FailedOcrPage />
|
||||
</FailedOcrPageWrapper>
|
||||
);
|
||||
|
||||
// Wait for tabs to load
|
||||
await waitFor(() => {
|
||||
const tabs = screen.getByRole('tablist');
|
||||
expect(tabs).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Check for Low Confidence tab
|
||||
await waitFor(() => {
|
||||
const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
expect(lowConfidenceTab).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test('displays confidence threshold input when low confidence tab is active', async () => {
|
||||
render(
|
||||
<FailedOcrPageWrapper>
|
||||
<FailedOcrPage />
|
||||
</FailedOcrPageWrapper>
|
||||
);
|
||||
|
||||
// Wait for component to load
|
||||
await waitFor(() => {
|
||||
const tabs = screen.getByRole('tablist');
|
||||
expect(tabs).toBeInTheDocument();
|
||||
});
|
||||
|
||||
// Click on Low Confidence tab (third tab, index 2)
|
||||
const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
lowConfidenceTab.click();
|
||||
|
||||
// Wait for tab content to render
|
||||
await waitFor(() => {
|
||||
const thresholdInput = screen.getByLabelText(/Maximum Confidence Threshold/i);
|
||||
expect(thresholdInput).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test('displays preview and delete buttons in low confidence tab', async () => {
|
||||
render(
|
||||
<FailedOcrPageWrapper>
|
||||
<FailedOcrPage />
|
||||
</FailedOcrPageWrapper>
|
||||
);
|
||||
|
||||
// Navigate to Low Confidence tab
|
||||
await waitFor(() => {
|
||||
const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
lowConfidenceTab.click();
|
||||
});
|
||||
|
||||
// Check for action buttons
|
||||
await waitFor(() => {
|
||||
const previewButton = screen.getByText(/Preview Documents/i);
|
||||
const deleteButton = screen.getByText(/Delete Low Confidence Documents/i);
|
||||
|
||||
expect(previewButton).toBeInTheDocument();
|
||||
expect(deleteButton).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
test('shows informational alert about low confidence deletion', async () => {
|
||||
render(
|
||||
<FailedOcrPageWrapper>
|
||||
<FailedOcrPage />
|
||||
</FailedOcrPageWrapper>
|
||||
);
|
||||
|
||||
// Navigate to Low Confidence tab
|
||||
await waitFor(() => {
|
||||
const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
lowConfidenceTab.click();
|
||||
});
|
||||
|
||||
// Check for informational content
|
||||
await waitFor(() => {
|
||||
const alertTitle = screen.getByText(/Low Confidence Document Deletion/i);
|
||||
const alertText = screen.getByText(/This tool allows you to delete documents/i);
|
||||
|
||||
expect(alertTitle).toBeInTheDocument();
|
||||
expect(alertText).toBeInTheDocument();
|
||||
});
|
||||
});
|
||||
|
||||
// DISABLED - Interactive tests that would require complex user event simulation
|
||||
// These tests would need fireEvent.change, fireEvent.click, and proper async handling
|
||||
|
||||
// test('calls deleteLowConfidence API when preview button is clicked', async () => {
|
||||
// const mockDeleteLowConfidence = vi.mocked(documentService.deleteLowConfidence);
|
||||
//
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// // Navigate to tab and click preview
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// const previewButton = screen.getByText(/Preview Documents/i);
|
||||
// fireEvent.click(previewButton);
|
||||
//
|
||||
// await waitFor(() => {
|
||||
// expect(mockDeleteLowConfidence).toHaveBeenCalledWith(30, true);
|
||||
// });
|
||||
// });
|
||||
|
||||
// test('validates confidence threshold input values', async () => {
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// const thresholdInput = screen.getByLabelText(/Maximum Confidence Threshold/i);
|
||||
//
|
||||
// // Test invalid values
|
||||
// fireEvent.change(thresholdInput, { target: { value: '150' } });
|
||||
// expect(thresholdInput.value).toBe('100'); // Should be clamped
|
||||
//
|
||||
// fireEvent.change(thresholdInput, { target: { value: '-10' } });
|
||||
// expect(thresholdInput.value).toBe('0'); // Should be clamped
|
||||
// });
|
||||
|
||||
// test('shows confirmation dialog before deletion', async () => {
|
||||
// const mockDeleteLowConfidence = vi.mocked(documentService.deleteLowConfidence);
|
||||
// mockDeleteLowConfidence.mockResolvedValueOnce({
|
||||
// data: {
|
||||
// success: true,
|
||||
// matched_count: 5,
|
||||
// preview: true,
|
||||
// document_ids: ['doc1', 'doc2', 'doc3', 'doc4', 'doc5']
|
||||
// }
|
||||
// });
|
||||
//
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// // Navigate to tab, preview, then try to delete
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// const previewButton = screen.getByText(/Preview Documents/i);
|
||||
// fireEvent.click(previewButton);
|
||||
//
|
||||
// await waitFor(() => {
|
||||
// const deleteButton = screen.getByText(/Delete Low Confidence Documents/i);
|
||||
// fireEvent.click(deleteButton);
|
||||
// });
|
||||
//
|
||||
// // Should show confirmation dialog
|
||||
// await waitFor(() => {
|
||||
// const confirmDialog = screen.getByText(/Confirm Low Confidence Document Deletion/i);
|
||||
// expect(confirmDialog).toBeInTheDocument();
|
||||
// });
|
||||
// });
|
||||
|
||||
// test('disables delete button when no preview data available', async () => {
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// await waitFor(() => {
|
||||
// const deleteButton = screen.getByText(/Delete Low Confidence Documents/i);
|
||||
// expect(deleteButton).toBeDisabled();
|
||||
// });
|
||||
// });
|
||||
|
||||
// test('displays preview results after API call', async () => {
|
||||
// const mockDeleteLowConfidence = vi.mocked(documentService.deleteLowConfidence);
|
||||
// mockDeleteLowConfidence.mockResolvedValueOnce({
|
||||
// data: {
|
||||
// success: true,
|
||||
// message: 'Found 3 documents with OCR confidence below 30%',
|
||||
// matched_count: 3,
|
||||
// preview: true,
|
||||
// document_ids: ['doc1', 'doc2', 'doc3']
|
||||
// }
|
||||
// });
|
||||
//
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// const previewButton = screen.getByText(/Preview Documents/i);
|
||||
// fireEvent.click(previewButton);
|
||||
//
|
||||
// await waitFor(() => {
|
||||
// expect(screen.getByText(/Preview Results/i)).toBeInTheDocument();
|
||||
// expect(screen.getByText(/Found 3 documents/i)).toBeInTheDocument();
|
||||
// });
|
||||
// });
|
||||
|
||||
// test('handles API errors gracefully', async () => {
|
||||
// const mockDeleteLowConfidence = vi.mocked(documentService.deleteLowConfidence);
|
||||
// mockDeleteLowConfidence.mockRejectedValueOnce(new Error('Network error'));
|
||||
//
|
||||
// render(<FailedOcrPageWrapper><FailedOcrPage /></FailedOcrPageWrapper>);
|
||||
//
|
||||
// const lowConfidenceTab = screen.getByText(/Low Confidence/i);
|
||||
// fireEvent.click(lowConfidenceTab);
|
||||
//
|
||||
// const previewButton = screen.getByText(/Preview Documents/i);
|
||||
// fireEvent.click(previewButton);
|
||||
//
|
||||
// await waitFor(() => {
|
||||
// // Should show error message via snackbar or similar
|
||||
// expect(screen.getByText(/Failed to preview low confidence documents/i)).toBeInTheDocument();
|
||||
// });
|
||||
// });
|
||||
});
|
||||
|
|
@ -22,6 +22,7 @@ export const documentService = {
|
|||
getFailedOcrDocuments: vi.fn(),
|
||||
getDuplicates: vi.fn(),
|
||||
retryOcr: vi.fn(),
|
||||
deleteLowConfidence: vi.fn(),
|
||||
}
|
||||
|
||||
// Re-export types that components might need
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ const mockGetOcrText = vi.fn();
|
|||
const mockList = vi.fn();
|
||||
const mockUpload = vi.fn();
|
||||
const mockDownload = vi.fn();
|
||||
const mockDeleteLowConfidence = vi.fn();
|
||||
|
||||
// Mock the entire api module
|
||||
vi.mock('../api', async () => {
|
||||
|
|
@ -17,6 +18,7 @@ vi.mock('../api', async () => {
|
|||
list: mockList,
|
||||
upload: mockUpload,
|
||||
download: mockDownload,
|
||||
deleteLowConfidence: mockDeleteLowConfidence,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
|
@ -309,4 +311,183 @@ describe('OcrResponse interface', () => {
|
|||
expect(ocrResponseMinimal.ocr_text).toBeNull();
|
||||
expect(ocrResponseMinimal.ocr_confidence).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('documentService.deleteLowConfidence', () => {
|
||||
it('should delete low confidence documents successfully', async () => {
|
||||
const mockDeleteResponse = {
|
||||
data: {
|
||||
success: true,
|
||||
message: 'Successfully deleted 3 documents with OCR confidence below 30%',
|
||||
deleted_count: 3,
|
||||
matched_count: 3,
|
||||
successful_file_deletions: 3,
|
||||
failed_file_deletions: 0,
|
||||
ignored_file_creation_failures: 0,
|
||||
deleted_document_ids: ['doc-1', 'doc-2', 'doc-3']
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockDeleteResponse);
|
||||
|
||||
const result = await documentService.deleteLowConfidence(30.0, false);
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(30.0, false);
|
||||
expect(result.data.success).toBe(true);
|
||||
expect(result.data.deleted_count).toBe(3);
|
||||
expect(result.data.matched_count).toBe(3);
|
||||
expect(result.data.deleted_document_ids).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should preview low confidence documents without deleting', async () => {
|
||||
const mockPreviewResponse = {
|
||||
data: {
|
||||
success: true,
|
||||
message: 'Found 5 documents with OCR confidence below 50%',
|
||||
matched_count: 5,
|
||||
preview: true,
|
||||
document_ids: ['doc-1', 'doc-2', 'doc-3', 'doc-4', 'doc-5']
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockPreviewResponse);
|
||||
|
||||
const result = await documentService.deleteLowConfidence(50.0, true);
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(50.0, true);
|
||||
expect(result.data.success).toBe(true);
|
||||
expect(result.data.preview).toBe(true);
|
||||
expect(result.data.matched_count).toBe(5);
|
||||
expect(result.data.document_ids).toHaveLength(5);
|
||||
expect(result.data).not.toHaveProperty('deleted_count');
|
||||
});
|
||||
|
||||
it('should handle no matching documents', async () => {
|
||||
const mockEmptyResponse = {
|
||||
data: {
|
||||
success: true,
|
||||
message: 'No documents found with OCR confidence below 10%',
|
||||
deleted_count: 0
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockEmptyResponse);
|
||||
|
||||
const result = await documentService.deleteLowConfidence(10.0, false);
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(10.0, false);
|
||||
expect(result.data.success).toBe(true);
|
||||
expect(result.data.deleted_count).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle validation errors for invalid confidence threshold', async () => {
|
||||
const mockErrorResponse = {
|
||||
data: {
|
||||
success: false,
|
||||
message: 'max_confidence must be between 0.0 and 100.0',
|
||||
matched_count: 0
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockErrorResponse);
|
||||
|
||||
const result = await documentService.deleteLowConfidence(-10.0, false);
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(-10.0, false);
|
||||
expect(result.data.success).toBe(false);
|
||||
expect(result.data.message).toContain('must be between 0.0 and 100.0');
|
||||
});
|
||||
|
||||
it('should handle API errors gracefully', async () => {
|
||||
const mockError = new Error('Network error');
|
||||
mockDeleteLowConfidence.mockRejectedValue(mockError);
|
||||
|
||||
await expect(documentService.deleteLowConfidence(30.0, false))
|
||||
.rejects.toThrow('Network error');
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(30.0, false);
|
||||
});
|
||||
|
||||
it('should use correct default values', async () => {
|
||||
const mockResponse = {
|
||||
data: { success: true, matched_count: 0 },
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockResponse);
|
||||
|
||||
// Test with explicit false value (the default)
|
||||
await documentService.deleteLowConfidence(40.0, false);
|
||||
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(40.0, false);
|
||||
});
|
||||
|
||||
it('should handle partial deletion failures', async () => {
|
||||
const mockPartialFailureResponse = {
|
||||
data: {
|
||||
success: true,
|
||||
message: 'Successfully deleted 2 documents with OCR confidence below 25%',
|
||||
deleted_count: 2,
|
||||
matched_count: 3,
|
||||
successful_file_deletions: 1,
|
||||
failed_file_deletions: 1,
|
||||
ignored_file_creation_failures: 1,
|
||||
deleted_document_ids: ['doc-1', 'doc-2']
|
||||
},
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockPartialFailureResponse);
|
||||
|
||||
const result = await documentService.deleteLowConfidence(25.0, false);
|
||||
|
||||
expect(result.data.success).toBe(true);
|
||||
expect(result.data.deleted_count).toBe(2);
|
||||
expect(result.data.matched_count).toBe(3);
|
||||
expect(result.data.failed_file_deletions).toBe(1);
|
||||
expect(result.data.ignored_file_creation_failures).toBe(1);
|
||||
});
|
||||
|
||||
it('should properly encode confidence threshold values', async () => {
|
||||
const mockResponse = {
|
||||
data: { success: true, matched_count: 0 },
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
headers: {},
|
||||
config: {},
|
||||
};
|
||||
|
||||
mockDeleteLowConfidence.mockResolvedValue(mockResponse);
|
||||
|
||||
// Test various confidence values
|
||||
const testValues = [0.0, 0.1, 30.5, 50.0, 99.9, 100.0];
|
||||
|
||||
for (const confidence of testValues) {
|
||||
mockDeleteLowConfidence.mockClear();
|
||||
await documentService.deleteLowConfidence(confidence, true);
|
||||
expect(mockDeleteLowConfidence).toHaveBeenCalledWith(confidence, true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -241,6 +241,13 @@ export const documentService = {
|
|||
data: { document_ids: documentIds }
|
||||
})
|
||||
},
|
||||
|
||||
deleteLowConfidence: (maxConfidence: number, previewOnly: boolean = false) => {
|
||||
return api.post('/documents/delete-low-confidence', {
|
||||
max_confidence: maxConfidence,
|
||||
preview_only: previewOnly
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
export interface OcrStatusResponse {
|
||||
|
|
|
|||
|
|
@ -1510,6 +1510,82 @@ impl Database {
|
|||
Ok(deleted_documents)
|
||||
}
|
||||
|
||||
|
||||
pub async fn find_documents_by_confidence_threshold(&self, max_confidence: f32, user_id: uuid::Uuid, user_role: crate::models::UserRole) -> Result<Vec<Document>> {
|
||||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(max_confidence)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
"#,
|
||||
)
|
||||
.bind(max_confidence)
|
||||
.bind(user_id)
|
||||
.fetch_all(&self.pool)
|
||||
.await?;
|
||||
|
||||
rows.into_iter().map(|r| Document {
|
||||
id: r.get("id"),
|
||||
filename: r.get("filename"),
|
||||
original_filename: r.get("original_filename"),
|
||||
file_path: r.get("file_path"),
|
||||
file_size: r.get("file_size"),
|
||||
mime_type: r.get("mime_type"),
|
||||
content: r.get("content"),
|
||||
ocr_text: r.get("ocr_text"),
|
||||
ocr_confidence: r.get("ocr_confidence"),
|
||||
ocr_word_count: r.get("ocr_word_count"),
|
||||
ocr_processing_time_ms: r.get("ocr_processing_time_ms"),
|
||||
ocr_status: r.get("ocr_status"),
|
||||
ocr_error: r.get("ocr_error"),
|
||||
ocr_completed_at: r.get("ocr_completed_at"),
|
||||
tags: r.get("tags"),
|
||||
created_at: r.get("created_at"),
|
||||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
pub async fn count_documents_for_source(&self, source_id: Uuid) -> Result<(i64, i64)> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
|
|
@ -1563,5 +1639,6 @@ impl Database {
|
|||
.collect();
|
||||
|
||||
Ok(results)
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -31,6 +31,12 @@ pub struct BulkDeleteRequest {
|
|||
pub document_ids: Vec<uuid::Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, ToSchema)]
|
||||
pub struct DeleteLowConfidenceRequest {
|
||||
pub max_confidence: f32,
|
||||
pub preview_only: Option<bool>,
|
||||
}
|
||||
|
||||
pub fn router() -> Router<Arc<AppState>> {
|
||||
Router::new()
|
||||
.route("/", post(upload_document))
|
||||
|
|
@ -46,6 +52,7 @@ pub fn router() -> Router<Arc<AppState>> {
|
|||
.route("/{id}/retry-ocr", post(retry_ocr))
|
||||
.route("/failed-ocr", get(get_failed_ocr_documents))
|
||||
.route("/duplicates", get(get_user_duplicates))
|
||||
.route("/delete-low-confidence", post(delete_low_confidence_documents))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
|
|
@ -1017,4 +1024,116 @@ pub async fn bulk_delete_documents(
|
|||
"ignored_file_creation_failures": ignored_file_creation_failures,
|
||||
"deleted_document_ids": deleted_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})))
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/api/documents/delete-low-confidence",
|
||||
request_body = DeleteLowConfidenceRequest,
|
||||
responses(
|
||||
(status = 200, description = "Low confidence documents operation result"),
|
||||
(status = 401, description = "Unauthorized"),
|
||||
(status = 500, description = "Internal server error")
|
||||
),
|
||||
security(
|
||||
("bearer_auth" = [])
|
||||
),
|
||||
tag = "documents"
|
||||
)]
|
||||
pub async fn delete_low_confidence_documents(
|
||||
State(state): State<Arc<AppState>>,
|
||||
auth_user: AuthUser,
|
||||
Json(request): Json<DeleteLowConfidenceRequest>,
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
if request.max_confidence < 0.0 || request.max_confidence > 100.0 {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": false,
|
||||
"message": "max_confidence must be between 0.0 and 100.0",
|
||||
"matched_count": 0
|
||||
})));
|
||||
}
|
||||
|
||||
let is_preview = request.preview_only.unwrap_or(false);
|
||||
|
||||
// Find documents with confidence below threshold
|
||||
let matched_documents = state
|
||||
.db
|
||||
.find_documents_by_confidence_threshold(request.max_confidence, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
let matched_count = matched_documents.len();
|
||||
|
||||
if is_preview {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": format!("Found {} documents with OCR confidence below {}%", matched_count, request.max_confidence),
|
||||
"matched_count": matched_count,
|
||||
"preview": true,
|
||||
"document_ids": matched_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})));
|
||||
}
|
||||
|
||||
if matched_documents.is_empty() {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": format!("No documents found with OCR confidence below {}%", request.max_confidence),
|
||||
"deleted_count": 0
|
||||
})));
|
||||
}
|
||||
|
||||
// Extract document IDs for bulk deletion
|
||||
let document_ids: Vec<uuid::Uuid> = matched_documents.iter().map(|d| d.id).collect();
|
||||
|
||||
// Use existing bulk delete logic
|
||||
let deleted_documents = state
|
||||
.db
|
||||
.bulk_delete_documents(&document_ids, auth_user.user.id, auth_user.user.role)
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
|
||||
// Create ignored file records for all successfully deleted documents
|
||||
let mut ignored_file_creation_failures = 0;
|
||||
for document in &deleted_documents {
|
||||
if let Err(e) = crate::db::ignored_files::create_ignored_file_from_document(
|
||||
state.db.get_pool(),
|
||||
document.id,
|
||||
auth_user.user.id,
|
||||
Some(format!("deleted due to low OCR confidence ({}%)",
|
||||
document.ocr_confidence.unwrap_or(0.0))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
).await {
|
||||
ignored_file_creation_failures += 1;
|
||||
tracing::warn!("Failed to create ignored file record for document {}: {}", document.id, e);
|
||||
}
|
||||
}
|
||||
|
||||
let file_service = FileService::new(state.config.upload_path.clone());
|
||||
let mut successful_file_deletions = 0;
|
||||
let mut failed_file_deletions = 0;
|
||||
|
||||
for document in &deleted_documents {
|
||||
match file_service.delete_document_files(document).await {
|
||||
Ok(_) => successful_file_deletions += 1,
|
||||
Err(e) => {
|
||||
failed_file_deletions += 1;
|
||||
tracing::warn!("Failed to delete files for document {}: {}", document.id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted_count = deleted_documents.len();
|
||||
|
||||
Ok(Json(serde_json::json!({
|
||||
"success": true,
|
||||
"message": format!("Successfully deleted {} documents with OCR confidence below {}%", deleted_count, request.max_confidence),
|
||||
"deleted_count": deleted_count,
|
||||
"matched_count": matched_count,
|
||||
"successful_file_deletions": successful_file_deletions,
|
||||
"failed_file_deletions": failed_file_deletions,
|
||||
"ignored_file_creation_failures": ignored_file_creation_failures,
|
||||
"deleted_document_ids": deleted_documents.iter().map(|d| d.id).collect::<Vec<_>>()
|
||||
})))
|
||||
}
|
||||
|
|
@ -367,4 +367,270 @@ mod document_routes_deletion_tests {
|
|||
assert!(!unauthorized_error.contains("403"));
|
||||
assert!(!validation_error.contains("serde"));
|
||||
}
|
||||
|
||||
// Low confidence deletion tests
|
||||
mod low_confidence_deletion_tests {
|
||||
use super::*;
|
||||
use crate::routes::documents::DeleteLowConfidenceRequest;
|
||||
|
||||
fn create_low_confidence_document(user_id: Uuid, confidence: f32) -> Document {
|
||||
Document {
|
||||
id: Uuid::new_v4(),
|
||||
filename: format!("low_conf_{}.pdf", confidence),
|
||||
original_filename: format!("low_conf_{}.pdf", confidence),
|
||||
file_path: format!("/uploads/low_conf_{}.pdf", confidence),
|
||||
file_size: 1024,
|
||||
mime_type: "application/pdf".to_string(),
|
||||
content: Some("Test document content".to_string()),
|
||||
ocr_text: Some("Low quality OCR text".to_string()),
|
||||
ocr_confidence: Some(confidence),
|
||||
ocr_word_count: Some(10),
|
||||
ocr_processing_time_ms: Some(500),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(Utc::now()),
|
||||
tags: vec!["low-confidence".to_string()],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_low_confidence_request_serialization() {
|
||||
// Test valid request
|
||||
let valid_request = json!({
|
||||
"max_confidence": 50.0,
|
||||
"preview_only": true
|
||||
});
|
||||
|
||||
let result: Result<DeleteLowConfidenceRequest, _> = serde_json::from_value(valid_request);
|
||||
assert!(result.is_ok());
|
||||
let request = result.unwrap();
|
||||
assert_eq!(request.max_confidence, 50.0);
|
||||
assert_eq!(request.preview_only, Some(true));
|
||||
|
||||
// Test request with only max_confidence
|
||||
let minimal_request = json!({
|
||||
"max_confidence": 30.0
|
||||
});
|
||||
|
||||
let result: Result<DeleteLowConfidenceRequest, _> = serde_json::from_value(minimal_request);
|
||||
assert!(result.is_ok());
|
||||
let request = result.unwrap();
|
||||
assert_eq!(request.max_confidence, 30.0);
|
||||
assert_eq!(request.preview_only, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_low_confidence_request_validation() {
|
||||
// Test invalid confidence values
|
||||
let invalid_negative = json!({
|
||||
"max_confidence": -10.0,
|
||||
"preview_only": false
|
||||
});
|
||||
|
||||
let result: Result<DeleteLowConfidenceRequest, _> = serde_json::from_value(invalid_negative);
|
||||
assert!(result.is_ok()); // Serialization succeeds, validation happens in handler
|
||||
|
||||
let invalid_too_high = json!({
|
||||
"max_confidence": 150.0,
|
||||
"preview_only": false
|
||||
});
|
||||
|
||||
let result: Result<DeleteLowConfidenceRequest, _> = serde_json::from_value(invalid_too_high);
|
||||
assert!(result.is_ok()); // Serialization succeeds, validation happens in handler
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_confidence_threshold_logic() {
|
||||
let user = create_test_user(UserRole::User);
|
||||
|
||||
// Create documents with various confidence levels
|
||||
let high_confidence_doc = create_low_confidence_document(user.id, 95.0);
|
||||
let medium_confidence_doc = create_low_confidence_document(user.id, 60.0);
|
||||
let low_confidence_doc = create_low_confidence_document(user.id, 25.0);
|
||||
let very_low_confidence_doc = create_low_confidence_document(user.id, 5.0);
|
||||
|
||||
let documents = vec![
|
||||
&high_confidence_doc,
|
||||
&medium_confidence_doc,
|
||||
&low_confidence_doc,
|
||||
&very_low_confidence_doc
|
||||
];
|
||||
|
||||
// Test threshold logic for different confidence values
|
||||
let threshold_50 = 50.0;
|
||||
let threshold_30 = 30.0;
|
||||
let threshold_10 = 10.0;
|
||||
|
||||
// Documents below 50% threshold
|
||||
let below_50: Vec<_> = documents.iter()
|
||||
.filter(|doc| doc.ocr_confidence.unwrap_or(0.0) < threshold_50)
|
||||
.collect();
|
||||
assert_eq!(below_50.len(), 2); // 25.0 and 5.0
|
||||
|
||||
// Documents below 30% threshold
|
||||
let below_30: Vec<_> = documents.iter()
|
||||
.filter(|doc| doc.ocr_confidence.unwrap_or(0.0) < threshold_30)
|
||||
.collect();
|
||||
assert_eq!(below_30.len(), 2); // 25.0 and 5.0
|
||||
|
||||
// Documents below 10% threshold
|
||||
let below_10: Vec<_> = documents.iter()
|
||||
.filter(|doc| doc.ocr_confidence.unwrap_or(0.0) < threshold_10)
|
||||
.collect();
|
||||
assert_eq!(below_10.len(), 1); // 5.0
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_user_role_authorization_for_low_confidence_deletion() {
|
||||
let user1 = create_test_user(UserRole::User);
|
||||
let user2 = create_test_user(UserRole::User);
|
||||
let admin = create_test_user(UserRole::Admin);
|
||||
|
||||
let user1_doc = create_low_confidence_document(user1.id, 25.0);
|
||||
let user2_doc = create_low_confidence_document(user2.id, 15.0);
|
||||
|
||||
// User1 should only be able to delete their own low confidence documents
|
||||
assert_eq!(user1_doc.user_id, user1.id);
|
||||
assert_ne!(user1_doc.user_id, user2.id);
|
||||
|
||||
// User2 should only be able to delete their own low confidence documents
|
||||
assert_eq!(user2_doc.user_id, user2.id);
|
||||
assert_ne!(user2_doc.user_id, user1.id);
|
||||
|
||||
// Admin should be able to delete any low confidence documents
|
||||
let admin_can_delete_user1 = user1_doc.user_id == admin.id || admin.role == UserRole::Admin;
|
||||
let admin_can_delete_user2 = user2_doc.user_id == admin.id || admin.role == UserRole::Admin;
|
||||
assert!(admin_can_delete_user1);
|
||||
assert!(admin_can_delete_user2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_cases_for_confidence_values() {
|
||||
let user = create_test_user(UserRole::User);
|
||||
|
||||
// Test document with None confidence (should not be included)
|
||||
let mut no_confidence_doc = create_low_confidence_document(user.id, 0.0);
|
||||
no_confidence_doc.ocr_confidence = None;
|
||||
|
||||
// Test document with exactly threshold confidence (should not be included)
|
||||
let exact_threshold_doc = create_low_confidence_document(user.id, 30.0);
|
||||
|
||||
// Test document just below threshold (should be included)
|
||||
let just_below_doc = create_low_confidence_document(user.id, 29.9);
|
||||
|
||||
let threshold = 30.0;
|
||||
|
||||
// None confidence should be excluded (no OCR confidence available)
|
||||
assert!(no_confidence_doc.ocr_confidence.is_none());
|
||||
|
||||
// Exact threshold should be excluded (not less than threshold)
|
||||
assert_eq!(exact_threshold_doc.ocr_confidence.unwrap(), threshold);
|
||||
assert!(!(exact_threshold_doc.ocr_confidence.unwrap() < threshold));
|
||||
|
||||
// Just below threshold should be included
|
||||
assert!(just_below_doc.ocr_confidence.unwrap() < threshold);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_preview_mode_behavior() {
|
||||
let user = create_test_user(UserRole::User);
|
||||
let doc1 = create_low_confidence_document(user.id, 20.0);
|
||||
let doc2 = create_low_confidence_document(user.id, 10.0);
|
||||
|
||||
let preview_request = DeleteLowConfidenceRequest {
|
||||
max_confidence: 30.0,
|
||||
preview_only: Some(true),
|
||||
};
|
||||
|
||||
let delete_request = DeleteLowConfidenceRequest {
|
||||
max_confidence: 30.0,
|
||||
preview_only: Some(false),
|
||||
};
|
||||
|
||||
let no_preview_request = DeleteLowConfidenceRequest {
|
||||
max_confidence: 30.0,
|
||||
preview_only: None,
|
||||
};
|
||||
|
||||
// Preview mode should be true when explicitly set
|
||||
assert_eq!(preview_request.preview_only.unwrap_or(false), true);
|
||||
|
||||
// Delete mode should be false when explicitly set
|
||||
assert_eq!(delete_request.preview_only.unwrap_or(false), false);
|
||||
|
||||
// Default should be false when not specified
|
||||
assert_eq!(no_preview_request.preview_only.unwrap_or(false), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_response_format_expectations() {
|
||||
// Test expected response structure for preview mode
|
||||
let expected_preview_response = json!({
|
||||
"success": true,
|
||||
"message": "Found 5 documents with OCR confidence below 30%",
|
||||
"matched_count": 5,
|
||||
"preview": true,
|
||||
"document_ids": ["uuid1", "uuid2", "uuid3", "uuid4", "uuid5"]
|
||||
});
|
||||
|
||||
// Test expected response structure for delete mode
|
||||
let expected_delete_response = json!({
|
||||
"success": true,
|
||||
"message": "Successfully deleted 5 documents with OCR confidence below 30%",
|
||||
"deleted_count": 5,
|
||||
"matched_count": 5,
|
||||
"successful_file_deletions": 5,
|
||||
"failed_file_deletions": 0,
|
||||
"ignored_file_creation_failures": 0,
|
||||
"deleted_document_ids": ["uuid1", "uuid2", "uuid3", "uuid4", "uuid5"]
|
||||
});
|
||||
|
||||
// Verify JSON structure is valid
|
||||
assert!(expected_preview_response.is_object());
|
||||
assert!(expected_delete_response.is_object());
|
||||
|
||||
// Verify required fields exist
|
||||
assert!(expected_preview_response["success"].is_boolean());
|
||||
assert!(expected_preview_response["matched_count"].is_number());
|
||||
assert!(expected_preview_response["document_ids"].is_array());
|
||||
|
||||
assert!(expected_delete_response["success"].is_boolean());
|
||||
assert!(expected_delete_response["deleted_count"].is_number());
|
||||
assert!(expected_delete_response["deleted_document_ids"].is_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_scenarios() {
|
||||
// Test validation error for invalid confidence range
|
||||
let invalid_confidence_cases = vec![
|
||||
(-1.0, "negative confidence"),
|
||||
(101.0, "confidence over 100"),
|
||||
(150.5, "way over 100"),
|
||||
];
|
||||
|
||||
for (confidence, description) in invalid_confidence_cases {
|
||||
let request = DeleteLowConfidenceRequest {
|
||||
max_confidence: confidence,
|
||||
preview_only: Some(false),
|
||||
};
|
||||
|
||||
// Validation logic should catch these in the handler
|
||||
assert!(confidence < 0.0 || confidence > 100.0,
|
||||
"Should be invalid: {}", description);
|
||||
}
|
||||
|
||||
// Test empty result scenario
|
||||
let request = DeleteLowConfidenceRequest {
|
||||
max_confidence: 0.0, // Very low threshold, should match nothing
|
||||
preview_only: Some(true),
|
||||
};
|
||||
|
||||
assert_eq!(request.max_confidence, 0.0);
|
||||
// This should result in zero matched documents
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1525,4 +1525,275 @@ mod deletion_error_handling_tests {
|
|||
// If transaction were to be rolled back, document would exist again
|
||||
// This test verifies the transaction was committed properly
|
||||
}
|
||||
|
||||
mod low_confidence_deletion_db_tests {
|
||||
use super::*;
|
||||
use crate::models::UserRole;
|
||||
|
||||
#[cfg(test)]
|
||||
fn create_test_document_with_confidence(user_id: Uuid, confidence: f32) -> Document {
|
||||
Document {
|
||||
id: Uuid::new_v4(),
|
||||
filename: format!("test_conf_{}.pdf", confidence),
|
||||
original_filename: format!("test_conf_{}.pdf", confidence),
|
||||
file_path: format!("/uploads/test_conf_{}.pdf", confidence),
|
||||
file_size: 1024,
|
||||
mime_type: "application/pdf".to_string(),
|
||||
content: Some("Test document content".to_string()),
|
||||
ocr_text: Some("Test OCR text".to_string()),
|
||||
ocr_confidence: Some(confidence),
|
||||
ocr_word_count: Some(50),
|
||||
ocr_processing_time_ms: Some(1000),
|
||||
ocr_status: Some("completed".to_string()),
|
||||
ocr_error: None,
|
||||
ocr_completed_at: Some(Utc::now()),
|
||||
tags: vec!["test".to_string()],
|
||||
created_at: Utc::now(),
|
||||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("test_hash_123456789abcdef123456789abcdef123456789abcdef123456789abcdef".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_confidence_filtering_logic() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
let documents = vec![
|
||||
create_test_document_with_confidence(user_id, 95.0), // Should not be deleted
|
||||
create_test_document_with_confidence(user_id, 75.0), // Should not be deleted
|
||||
create_test_document_with_confidence(user_id, 45.0), // Should not be deleted
|
||||
create_test_document_with_confidence(user_id, 25.0), // Should be deleted (< 30)
|
||||
create_test_document_with_confidence(user_id, 15.0), // Should be deleted (< 30)
|
||||
create_test_document_with_confidence(user_id, 5.0), // Should be deleted (< 30)
|
||||
];
|
||||
|
||||
let threshold = 30.0;
|
||||
let low_confidence_docs: Vec<_> = documents.iter()
|
||||
.filter(|doc| {
|
||||
doc.ocr_confidence.is_some() &&
|
||||
doc.ocr_confidence.unwrap() < threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
assert_eq!(low_confidence_docs.len(), 3);
|
||||
assert_eq!(low_confidence_docs[0].ocr_confidence.unwrap(), 25.0);
|
||||
assert_eq!(low_confidence_docs[1].ocr_confidence.unwrap(), 15.0);
|
||||
assert_eq!(low_confidence_docs[2].ocr_confidence.unwrap(), 5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_documents_without_ocr_confidence_excluded() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
let mut doc_no_confidence = create_test_document_with_confidence(user_id, 20.0);
|
||||
doc_no_confidence.ocr_confidence = None;
|
||||
|
||||
let doc_with_confidence = create_test_document_with_confidence(user_id, 20.0);
|
||||
|
||||
let documents = vec![doc_no_confidence, doc_with_confidence];
|
||||
let threshold = 30.0;
|
||||
|
||||
let low_confidence_docs: Vec<_> = documents.iter()
|
||||
.filter(|doc| {
|
||||
doc.ocr_confidence.is_some() &&
|
||||
doc.ocr_confidence.unwrap() < threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Only the document with confidence should be included
|
||||
assert_eq!(low_confidence_docs.len(), 1);
|
||||
assert!(low_confidence_docs[0].ocr_confidence.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_user_role_authorization_in_filtering() {
|
||||
let user1_id = Uuid::new_v4();
|
||||
let user2_id = Uuid::new_v4();
|
||||
|
||||
let user1_doc = create_test_document_with_confidence(user1_id, 20.0);
|
||||
let user2_doc = create_test_document_with_confidence(user2_id, 15.0);
|
||||
|
||||
// Regular user should only see their own documents
|
||||
let user_role = UserRole::User;
|
||||
let admin_role = UserRole::Admin;
|
||||
|
||||
// User1 should only access their own document
|
||||
let user1_can_access_own = user1_doc.user_id == user1_id || user_role == UserRole::Admin;
|
||||
let user1_can_access_other = user2_doc.user_id == user1_id || user_role == UserRole::Admin;
|
||||
|
||||
assert!(user1_can_access_own);
|
||||
assert!(!user1_can_access_other);
|
||||
|
||||
// Admin should access all documents
|
||||
let admin_can_access_user1 = user1_doc.user_id == user1_id || admin_role == UserRole::Admin;
|
||||
let admin_can_access_user2 = user2_doc.user_id == user1_id || admin_role == UserRole::Admin;
|
||||
|
||||
assert!(admin_can_access_user1);
|
||||
assert!(admin_can_access_user2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_conditions_for_confidence_thresholds() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
let test_cases = vec![
|
||||
(0.0, 10.0, true), // 0% < 10% threshold
|
||||
(10.0, 10.0, false), // 10% = 10% threshold (not less than)
|
||||
(10.1, 10.0, false), // 10.1% > 10% threshold
|
||||
(29.9, 30.0, true), // 29.9% < 30% threshold
|
||||
(30.0, 30.0, false), // 30% = 30% threshold (not less than)
|
||||
(30.1, 30.0, false), // 30.1% > 30% threshold
|
||||
(99.9, 100.0, true), // 99.9% < 100% threshold
|
||||
(100.0, 100.0, false), // 100% = 100% threshold (not less than)
|
||||
];
|
||||
|
||||
for (doc_confidence, threshold, should_be_included) in test_cases {
|
||||
let doc = create_test_document_with_confidence(user_id, doc_confidence);
|
||||
let is_included = doc.ocr_confidence.is_some() &&
|
||||
doc.ocr_confidence.unwrap() < threshold;
|
||||
|
||||
assert_eq!(is_included, should_be_included,
|
||||
"Document with {}% confidence vs {}% threshold",
|
||||
doc_confidence, threshold);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_performance_considerations_for_large_datasets() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Create a large number of test documents
|
||||
let mut documents = Vec::new();
|
||||
for i in 0..1000 {
|
||||
let confidence = (i as f32) / 10.0; // 0.0 to 99.9
|
||||
documents.push(create_test_document_with_confidence(user_id, confidence));
|
||||
}
|
||||
|
||||
let threshold = 50.0;
|
||||
let start_time = std::time::Instant::now();
|
||||
|
||||
let low_confidence_docs: Vec<_> = documents.iter()
|
||||
.filter(|doc| {
|
||||
doc.ocr_confidence.is_some() &&
|
||||
doc.ocr_confidence.unwrap() < threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
let elapsed = start_time.elapsed();
|
||||
|
||||
// Verify the filtering works correctly for large datasets
|
||||
assert_eq!(low_confidence_docs.len(), 500); // 0.0 to 49.9
|
||||
|
||||
// Performance should be reasonable (under 10ms for 1000 documents in memory)
|
||||
assert!(elapsed.as_millis() < 10,
|
||||
"Filtering 1000 documents took too long: {:?}", elapsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_query_structure_expectations() {
|
||||
// Test that our expected SQL query structure would work
|
||||
let user_id = Uuid::new_v4();
|
||||
let confidence_threshold = 30.0;
|
||||
|
||||
// This tests the logical structure we expect in the actual SQL query
|
||||
let expected_where_conditions = vec![
|
||||
"ocr_confidence IS NOT NULL",
|
||||
"ocr_confidence < $1", // $1 = confidence_threshold
|
||||
"user_id = $2", // $2 = user_id (for non-admin users)
|
||||
];
|
||||
|
||||
// Verify our test documents would match the expected query logic
|
||||
let test_doc = create_test_document_with_confidence(user_id, 25.0);
|
||||
|
||||
// Simulate the SQL conditions
|
||||
let confidence_not_null = test_doc.ocr_confidence.is_some();
|
||||
let confidence_below_threshold = test_doc.ocr_confidence.unwrap() < confidence_threshold;
|
||||
let user_matches = test_doc.user_id == user_id;
|
||||
|
||||
assert!(confidence_not_null);
|
||||
assert!(confidence_below_threshold);
|
||||
assert!(user_matches);
|
||||
|
||||
// This document should be included in results
|
||||
let would_be_selected = confidence_not_null && confidence_below_threshold && user_matches;
|
||||
assert!(would_be_selected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deletion_ordering_expectations() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
let mut documents = vec![
|
||||
create_test_document_with_confidence(user_id, 25.0),
|
||||
create_test_document_with_confidence(user_id, 5.0),
|
||||
create_test_document_with_confidence(user_id, 15.0),
|
||||
create_test_document_with_confidence(user_id, 35.0), // Above threshold
|
||||
];
|
||||
|
||||
let threshold = 30.0;
|
||||
let mut low_confidence_docs: Vec<_> = documents.iter()
|
||||
.filter(|doc| {
|
||||
doc.ocr_confidence.is_some() &&
|
||||
doc.ocr_confidence.unwrap() < threshold
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by confidence ascending (lowest first) then by creation date descending (newest first)
|
||||
low_confidence_docs.sort_by(|a, b| {
|
||||
let conf_a = a.ocr_confidence.unwrap();
|
||||
let conf_b = b.ocr_confidence.unwrap();
|
||||
conf_a.partial_cmp(&conf_b).unwrap()
|
||||
.then_with(|| b.created_at.cmp(&a.created_at))
|
||||
});
|
||||
|
||||
assert_eq!(low_confidence_docs.len(), 3);
|
||||
assert_eq!(low_confidence_docs[0].ocr_confidence.unwrap(), 5.0); // Lowest confidence first
|
||||
assert_eq!(low_confidence_docs[1].ocr_confidence.unwrap(), 15.0);
|
||||
assert_eq!(low_confidence_docs[2].ocr_confidence.unwrap(), 25.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_handling_scenarios() {
|
||||
let user_id = Uuid::new_v4();
|
||||
|
||||
// Test invalid threshold values (these would be caught by the API handler)
|
||||
let invalid_thresholds = vec![-1.0, 101.0, f32::NAN, f32::INFINITY];
|
||||
|
||||
for threshold in invalid_thresholds {
|
||||
// The database query itself should handle these gracefully
|
||||
// Invalid thresholds should either match no documents or be rejected
|
||||
let test_doc = create_test_document_with_confidence(user_id, 50.0);
|
||||
|
||||
if threshold.is_finite() {
|
||||
let would_match = test_doc.ocr_confidence.is_some() &&
|
||||
test_doc.ocr_confidence.unwrap() < threshold;
|
||||
|
||||
if threshold < 0.0 {
|
||||
assert!(!would_match, "Negative threshold should match no documents");
|
||||
}
|
||||
if threshold > 100.0 {
|
||||
// Documents with confidence > 100 shouldn't exist, but if they did,
|
||||
// they should still be considered for deletion if threshold > 100
|
||||
assert!(would_match, "Threshold > 100 should match normal documents");
|
||||
}
|
||||
} else {
|
||||
// NaN and infinity comparisons
|
||||
let would_match = test_doc.ocr_confidence.is_some() &&
|
||||
test_doc.ocr_confidence.unwrap() < threshold;
|
||||
|
||||
if threshold.is_nan() {
|
||||
// NaN comparisons should always be false
|
||||
assert!(!would_match, "NaN threshold should match no documents");
|
||||
} else if threshold == f32::INFINITY {
|
||||
// Positive infinity should match all finite numbers
|
||||
assert!(would_match, "Positive infinity threshold should match finite documents");
|
||||
} else {
|
||||
// Other invalid values like negative infinity
|
||||
assert!(!would_match, "Invalid threshold should match no documents");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue