Merge pull request #82 from readur/feat/merge-document-management-tabs

feat(client): combine the low confidence ocr tab with the bulk cleanup…
This commit is contained in:
Jon Fuller 2025-07-01 13:55:10 -07:00 committed by GitHub
commit 36dcbaed98
5 changed files with 497 additions and 50 deletions

View File

@ -137,6 +137,22 @@ GET /api/auth/me
Authorization: Bearer <jwt_token> Authorization: Bearer <jwt_token>
``` ```
#### OIDC Login (Redirect)
```bash
GET /api/auth/oidc/login
```
Redirects to the configured OIDC provider for authentication.
#### OIDC Callback
```bash
GET /api/auth/oidc/callback?code=<auth_code>&state=<state>
```
Handles the callback from the OIDC provider and issues a JWT token.
#### Logout #### Logout
```bash ```bash
@ -217,6 +233,128 @@ Content-Type: application/json
} }
``` ```
#### Get Document Debug Information
```bash
GET /api/documents/{id}/debug
Authorization: Bearer <jwt_token>
```
Response:
```json
{
"document_id": "550e8400-e29b-41d4-a716-446655440000",
"processing_pipeline": {
"upload": "completed",
"ocr_queue": "completed",
"ocr_processing": "completed",
"validation": "completed"
},
"ocr_details": {
"confidence": 89.5,
"word_count": 342,
"processing_time": 4.2
},
"file_info": {
"mime_type": "application/pdf",
"size": 1048576,
"pages": 3
}
}
```
#### Get Document Thumbnail
```bash
GET /api/documents/{id}/thumbnail
Authorization: Bearer <jwt_token>
```
#### Get Document OCR Text
```bash
GET /api/documents/{id}/ocr
Authorization: Bearer <jwt_token>
```
#### Get Document Processed Image
```bash
GET /api/documents/{id}/processed-image
Authorization: Bearer <jwt_token>
```
#### View Document in Browser
```bash
GET /api/documents/{id}/view
Authorization: Bearer <jwt_token>
```
#### Get Failed Documents
```bash
GET /api/documents/failed?limit=50&offset=0
Authorization: Bearer <jwt_token>
```
Query parameters:
- `limit` - Number of results (default: 50)
- `offset` - Pagination offset
- `stage` - Filter by failure stage
- `reason` - Filter by failure reason
#### View Failed Document
```bash
GET /api/documents/failed/{id}/view
Authorization: Bearer <jwt_token>
```
#### Get Duplicate Documents
```bash
GET /api/documents/duplicates?limit=50&offset=0
Authorization: Bearer <jwt_token>
```
#### Delete Low Confidence Documents
```bash
POST /api/documents/delete-low-confidence
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"confidence_threshold": 70.0,
"preview_only": false
}
```
#### Delete Failed OCR Documents
```bash
POST /api/documents/delete-failed-ocr
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"preview_only": false
}
```
#### Bulk Delete Documents
```bash
DELETE /api/documents
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"document_ids": ["550e8400-e29b-41d4-a716-446655440000", "..."]
}
```
### Search Endpoints ### Search Endpoints
#### Search Documents #### Search Documents
@ -301,10 +439,10 @@ Response:
} }
``` ```
#### Reprocess Document #### Retry OCR Processing
```bash ```bash
POST /api/documents/{id}/reprocess POST /api/documents/{id}/retry-ocr
Authorization: Bearer <jwt_token> Authorization: Bearer <jwt_token>
``` ```
@ -315,6 +453,53 @@ GET /api/queue/failed
Authorization: Bearer <jwt_token> Authorization: Bearer <jwt_token>
``` ```
#### Get Queue Statistics
```bash
GET /api/queue/stats
Authorization: Bearer <jwt_token>
```
Response:
```json
{
"pending_count": 15,
"processing_count": 3,
"failed_count": 2,
"completed_today": 127,
"average_processing_time_seconds": 4.5,
"queue_health": "healthy"
}
```
#### Requeue Failed Items
```bash
POST /api/queue/requeue-failed
Authorization: Bearer <jwt_token>
```
#### Enqueue Pending Documents
```bash
POST /api/queue/enqueue-pending
Authorization: Bearer <jwt_token>
```
#### Pause OCR Processing
```bash
POST /api/queue/pause
Authorization: Bearer <jwt_token>
```
#### Resume OCR Processing
```bash
POST /api/queue/resume
Authorization: Bearer <jwt_token>
```
### Settings Endpoints ### Settings Endpoints
#### Get User Settings #### Get User Settings
@ -391,6 +576,119 @@ POST /api/sources/{id}/sync
Authorization: Bearer <jwt_token> Authorization: Bearer <jwt_token>
``` ```
#### Stop Source Sync
```bash
POST /api/sources/{id}/sync/stop
Authorization: Bearer <jwt_token>
```
#### Test Source Connection
```bash
POST /api/sources/{id}/test
Authorization: Bearer <jwt_token>
```
#### Estimate Source Crawl
```bash
POST /api/sources/{id}/estimate
Authorization: Bearer <jwt_token>
```
#### Estimate Crawl with Configuration
```bash
POST /api/sources/estimate
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"source_type": "webdav",
"config": {
"url": "https://example.com/webdav",
"username": "user",
"password": "pass"
}
}
```
#### Test Connection with Configuration
```bash
POST /api/sources/test-connection
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"source_type": "webdav",
"config": {
"url": "https://example.com/webdav",
"username": "user",
"password": "pass"
}
}
```
### WebDAV Endpoints
#### Test WebDAV Connection
```bash
POST /api/webdav/test-connection
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"url": "https://example.com/webdav",
"username": "user",
"password": "pass"
}
```
#### Estimate WebDAV Crawl
```bash
POST /api/webdav/estimate-crawl
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"url": "https://example.com/webdav",
"username": "user",
"password": "pass"
}
```
#### Get WebDAV Sync Status
```bash
GET /api/webdav/sync-status
Authorization: Bearer <jwt_token>
```
#### Start WebDAV Sync
```bash
POST /api/webdav/start-sync
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"url": "https://example.com/webdav",
"username": "user",
"password": "pass"
}
```
#### Cancel WebDAV Sync
```bash
POST /api/webdav/cancel-sync
Authorization: Bearer <jwt_token>
```
### Labels Endpoints ### Labels Endpoints
#### List Labels #### List Labels
@ -469,30 +767,147 @@ DELETE /api/users/{id}
Authorization: Bearer <jwt_token> Authorization: Bearer <jwt_token>
``` ```
## WebSocket API ### Notifications Endpoints
Connect to receive real-time updates: #### List Notifications
```javascript ```bash
const ws = new WebSocket('ws://localhost:8000/ws'); GET /api/notifications?limit=50&offset=0
Authorization: Bearer <jwt_token>
ws.onmessage = (event) => {
const data = JSON.parse(event.data);
console.log('Event:', data);
};
// Authenticate
ws.send(JSON.stringify({
type: 'auth',
token: 'your_jwt_token'
}));
``` ```
Event types: #### Get Notification Summary
- `document.uploaded` - New document uploaded
- `ocr.completed` - OCR processing completed ```bash
- `ocr.failed` - OCR processing failed GET /api/notifications/summary
- `source.sync.completed` - Source sync finished Authorization: Bearer <jwt_token>
```
Response:
```json
{
"unread_count": 5,
"total_count": 23,
"latest_notification": {
"id": 1,
"type": "ocr_completed",
"message": "OCR processing completed for document.pdf",
"created_at": "2024-01-01T12:00:00Z"
}
}
```
#### Mark Notification as Read
```bash
POST /api/notifications/{id}/read
Authorization: Bearer <jwt_token>
```
#### Mark All Notifications as Read
```bash
POST /api/notifications/read-all
Authorization: Bearer <jwt_token>
```
#### Delete Notification
```bash
DELETE /api/notifications/{id}
Authorization: Bearer <jwt_token>
```
### Ignored Files Endpoints
#### List Ignored Files
```bash
GET /api/ignored-files?limit=50&offset=0
Authorization: Bearer <jwt_token>
```
Query parameters:
- `limit` - Number of results (default: 50)
- `offset` - Pagination offset
- `filename` - Filter by filename
- `source_type` - Filter by source type
#### Get Ignored Files Statistics
```bash
GET /api/ignored-files/stats
Authorization: Bearer <jwt_token>
```
Response:
```json
{
"total_ignored_files": 42,
"total_size_bytes": 104857600,
"most_recent_ignored_at": "2024-01-01T12:00:00Z"
}
```
#### Get Ignored File Details
```bash
GET /api/ignored-files/{id}
Authorization: Bearer <jwt_token>
```
#### Remove File from Ignored List
```bash
DELETE /api/ignored-files/{id}
Authorization: Bearer <jwt_token>
```
#### Bulk Remove Files from Ignored List
```bash
DELETE /api/ignored-files/bulk-delete
Authorization: Bearer <jwt_token>
Content-Type: application/json
{
"ignored_file_ids": [1, 2, 3, 4]
}
```
### Metrics Endpoints
#### Get System Metrics
```bash
GET /api/metrics
Authorization: Bearer <jwt_token>
```
#### Get Prometheus Metrics
```bash
GET /metrics
```
Returns Prometheus-formatted metrics (no authentication required).
### Health Check
#### Health Check
```bash
GET /api/health
```
Response:
```json
{
"status": "healthy",
"timestamp": "2024-01-01T12:00:00Z",
"version": "1.0.0"
}
```
## Examples ## Examples
@ -602,7 +1017,12 @@ curl -X GET http://localhost:8000/api/documents/550e8400-e29b-41d4-a716-44665544
The complete OpenAPI specification is available at: The complete OpenAPI specification is available at:
``` ```
GET /api/openapi.json GET /api-docs/openapi.json
```
Interactive Swagger UI documentation is available at:
```
GET /swagger-ui
``` ```
You can use this with tools like Swagger UI or to generate client libraries. You can use this with tools like Swagger UI or to generate client libraries.

View File

@ -73,7 +73,6 @@ const navigationItems: NavigationItem[] = [
{ text: 'Watch Folder', icon: FolderIcon, path: '/watch' }, { text: 'Watch Folder', icon: FolderIcon, path: '/watch' },
{ text: 'Document Management', icon: ManageIcon, path: '/documents/management' }, { text: 'Document Management', icon: ManageIcon, path: '/documents/management' },
{ text: 'Ignored Files', icon: BlockIcon, path: '/ignored-files' }, { text: 'Ignored Files', icon: BlockIcon, path: '/ignored-files' },
{ text: 'Debug', icon: BugReportIcon, path: '/debug' },
]; ];
const AppLayout: React.FC<AppLayoutProps> = ({ children }) => { const AppLayout: React.FC<AppLayoutProps> = ({ children }) => {
@ -533,6 +532,9 @@ const AppLayout: React.FC<AppLayoutProps> = ({ children }) => {
<MenuItem onClick={() => navigate('/settings')}> <MenuItem onClick={() => navigate('/settings')}>
<SettingsIcon sx={{ mr: 2 }} /> Settings <SettingsIcon sx={{ mr: 2 }} /> Settings
</MenuItem> </MenuItem>
<MenuItem onClick={() => navigate('/debug')}>
<BugReportIcon sx={{ mr: 2 }} /> Debug
</MenuItem>
<Divider /> <Divider />
<MenuItem onClick={() => window.open('/swagger-ui', '_blank')}> <MenuItem onClick={() => window.open('/swagger-ui', '_blank')}>
<ApiIcon sx={{ mr: 2 }} /> API Documentation <ApiIcon sx={{ mr: 2 }} /> API Documentation

View File

@ -582,12 +582,12 @@ const DocumentManagementPage: React.FC = () => {
if (currentTab === 0) { if (currentTab === 0) {
fetchFailedDocuments(); fetchFailedDocuments();
} else if (currentTab === 1) { } else if (currentTab === 1) {
// Refresh both low confidence and failed documents for the merged cleanup tab
handlePreviewLowConfidence(); handlePreviewLowConfidence();
handlePreviewFailedDocuments();
} else if (currentTab === 2) { } else if (currentTab === 2) {
fetchDuplicates(); fetchDuplicates();
} else if (currentTab === 3) { } else if (currentTab === 3) {
handlePreviewFailedDocuments();
} else if (currentTab === 4) {
fetchIgnoredFiles(); fetchIgnoredFiles();
fetchIgnoredFilesStats(); fetchIgnoredFilesStats();
} }
@ -750,10 +750,10 @@ const DocumentManagementPage: React.FC = () => {
iconPosition="start" iconPosition="start"
/> />
</Tooltip> </Tooltip>
<Tooltip title="Manage documents with low OCR confidence scores - preview and delete documents below a confidence threshold"> <Tooltip title="Manage and clean up documents with quality issues - low OCR confidence or failed processing">
<Tab <Tab
icon={<FindInPageIcon />} icon={<DeleteIcon />}
label={`Low Quality Manager${previewData ? ` (${previewData.matched_count})` : ''}`} label={`Document Cleanup${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0) > 0 ? ` (${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0)})` : ''}`}
iconPosition="start" iconPosition="start"
/> />
</Tooltip> </Tooltip>
@ -764,13 +764,6 @@ const DocumentManagementPage: React.FC = () => {
iconPosition="start" iconPosition="start"
/> />
</Tooltip> </Tooltip>
<Tooltip title="Bulk operations for document cleanup and maintenance">
<Tab
icon={<DeleteIcon />}
label="Bulk Cleanup"
iconPosition="start"
/>
</Tooltip>
<Tooltip title="Manage files that have been ignored during sync operations"> <Tooltip title="Manage files that have been ignored during sync operations">
<Tab <Tab
icon={<BlockIcon />} icon={<BlockIcon />}
@ -1373,17 +1366,28 @@ const DocumentManagementPage: React.FC = () => {
</> </>
)} )}
{/* Low Quality Manager Tab Content */} {/* Document Cleanup Tab Content - Merged Low Quality Manager and Bulk Cleanup */}
{currentTab === 1 && ( {currentTab === 1 && (
<> <>
<Alert severity="info" sx={{ mb: 3 }}> <Alert severity="info" sx={{ mb: 3 }}>
<AlertTitle>Low Confidence Document Deletion</AlertTitle> <AlertTitle>Document Cleanup Center</AlertTitle>
<Typography> <Typography>
This tool allows you to delete documents with OCR confidence below a specified threshold. Clean up your document library by removing problematic documents. You can delete:
Use the preview feature first to see what documents would be affected before deleting. </Typography>
<Box component="ul" sx={{ mt: 1, mb: 0, pl: 2 }}>
<li>Documents with low OCR confidence scores (below a threshold you set)</li>
<li>Documents where OCR processing failed completely</li>
</Box>
<Typography sx={{ mt: 1 }}>
Always use the preview feature before deleting to see which documents will be affected.
</Typography> </Typography>
</Alert> </Alert>
{/* Low Confidence Documents Section */}
<Typography variant="h5" gutterBottom sx={{ mb: 2 }}>
Low Confidence Documents
</Typography>
<Card sx={{ mb: 3 }}> <Card sx={{ mb: 3 }}>
<CardContent> <CardContent>
<Grid container spacing={3} alignItems="center"> <Grid container spacing={3} alignItems="center">
@ -1504,18 +1508,20 @@ const DocumentManagementPage: React.FC = () => {
<Typography sx={{ ml: 2 }}>Processing request...</Typography> <Typography sx={{ ml: 2 }}>Processing request...</Typography>
</Box> </Box>
)} )}
</>
)}
{/* Delete Failed Documents Tab Content */} {/* Divider between sections */}
{currentTab === 3 && ( <Divider sx={{ my: 4 }} />
<>
{/* Failed Documents Section */}
<Typography variant="h5" gutterBottom sx={{ mt: 4, mb: 2 }}>
Failed OCR Documents
</Typography>
<Alert severity="warning" sx={{ mb: 3 }}> <Alert severity="warning" sx={{ mb: 3 }}>
<AlertTitle>Delete Failed OCR Documents</AlertTitle> <AlertTitle>Delete Failed OCR Documents</AlertTitle>
<Typography> <Typography>
This tool allows you to delete all documents where OCR processing failed completely. This section allows you to delete all documents where OCR processing failed completely.
This includes documents with NULL confidence values or explicit failure status. This includes documents with NULL confidence values or explicit failure status.
Use the preview feature first to see what documents would be affected before deleting.
</Typography> </Typography>
</Alert> </Alert>
@ -1549,7 +1555,7 @@ const DocumentManagementPage: React.FC = () => {
</CardContent> </CardContent>
</Card> </Card>
{/* Preview Results */} {/* Preview Results for Failed Documents */}
{failedPreviewData && ( {failedPreviewData && (
<Card sx={{ mb: 3 }}> <Card sx={{ mb: 3 }}>
<CardContent> <CardContent>
@ -1574,7 +1580,7 @@ const DocumentManagementPage: React.FC = () => {
</Card> </Card>
)} )}
{/* Loading State */} {/* Loading State for Failed Documents */}
{failedDocsLoading && !failedPreviewData && ( {failedDocsLoading && !failedPreviewData && (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="200px"> <Box display="flex" justifyContent="center" alignItems="center" minHeight="200px">
<CircularProgress /> <CircularProgress />
@ -1585,7 +1591,7 @@ const DocumentManagementPage: React.FC = () => {
)} )}
{/* Ignored Files Tab Content */} {/* Ignored Files Tab Content */}
{currentTab === 4 && ( {currentTab === 3 && (
<> <>
<Alert severity="info" sx={{ mb: 3 }}> <Alert severity="info" sx={{ mb: 3 }}>
<AlertTitle>Ignored Files Management</AlertTitle> <AlertTitle>Ignored Files Management</AlertTitle>

View File

@ -1992,6 +1992,20 @@ pub async fn delete_low_confidence_documents(
} }
/// Delete all documents with failed OCR processing /// Delete all documents with failed OCR processing
#[utoipa::path(
post,
path = "/api/documents/delete-failed-ocr",
tag = "documents",
security(
("bearer_auth" = [])
),
request_body = serde_json::Value,
responses(
(status = 200, description = "Failed OCR documents deleted successfully", body = serde_json::Value),
(status = 401, description = "Unauthorized"),
(status = 500, description = "Internal server error")
)
)]
pub async fn delete_failed_ocr_documents( pub async fn delete_failed_ocr_documents(
State(state): State<Arc<AppState>>, State(state): State<Arc<AppState>>,
auth_user: AuthUser, auth_user: AuthUser,

View File

@ -50,7 +50,11 @@ use crate::{
crate::routes::documents::get_document_ocr, crate::routes::documents::get_document_ocr,
crate::routes::documents::get_processed_image, crate::routes::documents::get_processed_image,
crate::routes::documents::retry_ocr, crate::routes::documents::retry_ocr,
crate::routes::documents::get_document_debug_info,
crate::routes::documents::get_failed_ocr_documents, crate::routes::documents::get_failed_ocr_documents,
crate::routes::documents::view_failed_document,
crate::routes::documents::delete_low_confidence_documents,
crate::routes::documents::delete_failed_ocr_documents,
crate::routes::documents::get_user_duplicates, crate::routes::documents::get_user_duplicates,
// Labels endpoints // Labels endpoints
crate::routes::labels::get_labels, crate::routes::labels::get_labels,
@ -79,6 +83,7 @@ use crate::{
// Queue endpoints // Queue endpoints
crate::routes::queue::get_queue_stats, crate::routes::queue::get_queue_stats,
crate::routes::queue::requeue_failed, crate::routes::queue::requeue_failed,
crate::routes::queue::enqueue_pending_documents,
crate::routes::queue::get_ocr_status, crate::routes::queue::get_ocr_status,
crate::routes::queue::pause_ocr_processing, crate::routes::queue::pause_ocr_processing,
crate::routes::queue::resume_ocr_processing, crate::routes::queue::resume_ocr_processing,