diff --git a/docs/api-reference.md b/docs/api-reference.md index 8cdd193..f158671 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -137,6 +137,22 @@ GET /api/auth/me Authorization: Bearer ``` +#### OIDC Login (Redirect) + +```bash +GET /api/auth/oidc/login +``` + +Redirects to the configured OIDC provider for authentication. + +#### OIDC Callback + +```bash +GET /api/auth/oidc/callback?code=&state= +``` + +Handles the callback from the OIDC provider and issues a JWT token. + #### Logout ```bash @@ -217,6 +233,128 @@ Content-Type: application/json } ``` +#### Get Document Debug Information + +```bash +GET /api/documents/{id}/debug +Authorization: Bearer +``` + +Response: +```json +{ + "document_id": "550e8400-e29b-41d4-a716-446655440000", + "processing_pipeline": { + "upload": "completed", + "ocr_queue": "completed", + "ocr_processing": "completed", + "validation": "completed" + }, + "ocr_details": { + "confidence": 89.5, + "word_count": 342, + "processing_time": 4.2 + }, + "file_info": { + "mime_type": "application/pdf", + "size": 1048576, + "pages": 3 + } +} +``` + +#### Get Document Thumbnail + +```bash +GET /api/documents/{id}/thumbnail +Authorization: Bearer +``` + +#### Get Document OCR Text + +```bash +GET /api/documents/{id}/ocr +Authorization: Bearer +``` + +#### Get Document Processed Image + +```bash +GET /api/documents/{id}/processed-image +Authorization: Bearer +``` + +#### View Document in Browser + +```bash +GET /api/documents/{id}/view +Authorization: Bearer +``` + +#### Get Failed Documents + +```bash +GET /api/documents/failed?limit=50&offset=0 +Authorization: Bearer +``` + +Query parameters: +- `limit` - Number of results (default: 50) +- `offset` - Pagination offset +- `stage` - Filter by failure stage +- `reason` - Filter by failure reason + +#### View Failed Document + +```bash +GET /api/documents/failed/{id}/view +Authorization: Bearer +``` + +#### Get Duplicate Documents + +```bash +GET /api/documents/duplicates?limit=50&offset=0 +Authorization: Bearer +``` + +#### Delete Low Confidence Documents + +```bash +POST /api/documents/delete-low-confidence +Authorization: Bearer +Content-Type: application/json + +{ + "confidence_threshold": 70.0, + "preview_only": false +} +``` + +#### Delete Failed OCR Documents + +```bash +POST /api/documents/delete-failed-ocr +Authorization: Bearer +Content-Type: application/json + +{ + "preview_only": false +} +``` + +#### Bulk Delete Documents + +```bash +DELETE /api/documents +Authorization: Bearer +Content-Type: application/json + +{ + "document_ids": ["550e8400-e29b-41d4-a716-446655440000", "..."] +} +``` + ### Search Endpoints #### Search Documents @@ -301,10 +439,10 @@ Response: } ``` -#### Reprocess Document +#### Retry OCR Processing ```bash -POST /api/documents/{id}/reprocess +POST /api/documents/{id}/retry-ocr Authorization: Bearer ``` @@ -315,6 +453,53 @@ GET /api/queue/failed Authorization: Bearer ``` +#### Get Queue Statistics + +```bash +GET /api/queue/stats +Authorization: Bearer +``` + +Response: +```json +{ + "pending_count": 15, + "processing_count": 3, + "failed_count": 2, + "completed_today": 127, + "average_processing_time_seconds": 4.5, + "queue_health": "healthy" +} +``` + +#### Requeue Failed Items + +```bash +POST /api/queue/requeue-failed +Authorization: Bearer +``` + +#### Enqueue Pending Documents + +```bash +POST /api/queue/enqueue-pending +Authorization: Bearer +``` + +#### Pause OCR Processing + +```bash +POST /api/queue/pause +Authorization: Bearer +``` + +#### Resume OCR Processing + +```bash +POST /api/queue/resume +Authorization: Bearer +``` + ### Settings Endpoints #### Get User Settings @@ -391,6 +576,119 @@ POST /api/sources/{id}/sync Authorization: Bearer ``` +#### Stop Source Sync + +```bash +POST /api/sources/{id}/sync/stop +Authorization: Bearer +``` + +#### Test Source Connection + +```bash +POST /api/sources/{id}/test +Authorization: Bearer +``` + +#### Estimate Source Crawl + +```bash +POST /api/sources/{id}/estimate +Authorization: Bearer +``` + +#### Estimate Crawl with Configuration + +```bash +POST /api/sources/estimate +Authorization: Bearer +Content-Type: application/json + +{ + "source_type": "webdav", + "config": { + "url": "https://example.com/webdav", + "username": "user", + "password": "pass" + } +} +``` + +#### Test Connection with Configuration + +```bash +POST /api/sources/test-connection +Authorization: Bearer +Content-Type: application/json + +{ + "source_type": "webdav", + "config": { + "url": "https://example.com/webdav", + "username": "user", + "password": "pass" + } +} +``` + +### WebDAV Endpoints + +#### Test WebDAV Connection + +```bash +POST /api/webdav/test-connection +Authorization: Bearer +Content-Type: application/json + +{ + "url": "https://example.com/webdav", + "username": "user", + "password": "pass" +} +``` + +#### Estimate WebDAV Crawl + +```bash +POST /api/webdav/estimate-crawl +Authorization: Bearer +Content-Type: application/json + +{ + "url": "https://example.com/webdav", + "username": "user", + "password": "pass" +} +``` + +#### Get WebDAV Sync Status + +```bash +GET /api/webdav/sync-status +Authorization: Bearer +``` + +#### Start WebDAV Sync + +```bash +POST /api/webdav/start-sync +Authorization: Bearer +Content-Type: application/json + +{ + "url": "https://example.com/webdav", + "username": "user", + "password": "pass" +} +``` + +#### Cancel WebDAV Sync + +```bash +POST /api/webdav/cancel-sync +Authorization: Bearer +``` + ### Labels Endpoints #### List Labels @@ -469,30 +767,147 @@ DELETE /api/users/{id} Authorization: Bearer ``` -## WebSocket API +### Notifications Endpoints -Connect to receive real-time updates: +#### List Notifications -```javascript -const ws = new WebSocket('ws://localhost:8000/ws'); - -ws.onmessage = (event) => { - const data = JSON.parse(event.data); - console.log('Event:', data); -}; - -// Authenticate -ws.send(JSON.stringify({ - type: 'auth', - token: 'your_jwt_token' -})); +```bash +GET /api/notifications?limit=50&offset=0 +Authorization: Bearer ``` -Event types: -- `document.uploaded` - New document uploaded -- `ocr.completed` - OCR processing completed -- `ocr.failed` - OCR processing failed -- `source.sync.completed` - Source sync finished +#### Get Notification Summary + +```bash +GET /api/notifications/summary +Authorization: Bearer +``` + +Response: +```json +{ + "unread_count": 5, + "total_count": 23, + "latest_notification": { + "id": 1, + "type": "ocr_completed", + "message": "OCR processing completed for document.pdf", + "created_at": "2024-01-01T12:00:00Z" + } +} +``` + +#### Mark Notification as Read + +```bash +POST /api/notifications/{id}/read +Authorization: Bearer +``` + +#### Mark All Notifications as Read + +```bash +POST /api/notifications/read-all +Authorization: Bearer +``` + +#### Delete Notification + +```bash +DELETE /api/notifications/{id} +Authorization: Bearer +``` + +### Ignored Files Endpoints + +#### List Ignored Files + +```bash +GET /api/ignored-files?limit=50&offset=0 +Authorization: Bearer +``` + +Query parameters: +- `limit` - Number of results (default: 50) +- `offset` - Pagination offset +- `filename` - Filter by filename +- `source_type` - Filter by source type + +#### Get Ignored Files Statistics + +```bash +GET /api/ignored-files/stats +Authorization: Bearer +``` + +Response: +```json +{ + "total_ignored_files": 42, + "total_size_bytes": 104857600, + "most_recent_ignored_at": "2024-01-01T12:00:00Z" +} +``` + +#### Get Ignored File Details + +```bash +GET /api/ignored-files/{id} +Authorization: Bearer +``` + +#### Remove File from Ignored List + +```bash +DELETE /api/ignored-files/{id} +Authorization: Bearer +``` + +#### Bulk Remove Files from Ignored List + +```bash +DELETE /api/ignored-files/bulk-delete +Authorization: Bearer +Content-Type: application/json + +{ + "ignored_file_ids": [1, 2, 3, 4] +} +``` + +### Metrics Endpoints + +#### Get System Metrics + +```bash +GET /api/metrics +Authorization: Bearer +``` + +#### Get Prometheus Metrics + +```bash +GET /metrics +``` + +Returns Prometheus-formatted metrics (no authentication required). + +### Health Check + +#### Health Check + +```bash +GET /api/health +``` + +Response: +```json +{ + "status": "healthy", + "timestamp": "2024-01-01T12:00:00Z", + "version": "1.0.0" +} +``` ## Examples @@ -602,7 +1017,12 @@ curl -X GET http://localhost:8000/api/documents/550e8400-e29b-41d4-a716-44665544 The complete OpenAPI specification is available at: ``` -GET /api/openapi.json +GET /api-docs/openapi.json +``` + +Interactive Swagger UI documentation is available at: +``` +GET /swagger-ui ``` You can use this with tools like Swagger UI or to generate client libraries. diff --git a/frontend/src/components/Layout/AppLayout.tsx b/frontend/src/components/Layout/AppLayout.tsx index 5855e11..454bb01 100644 --- a/frontend/src/components/Layout/AppLayout.tsx +++ b/frontend/src/components/Layout/AppLayout.tsx @@ -73,7 +73,6 @@ const navigationItems: NavigationItem[] = [ { text: 'Watch Folder', icon: FolderIcon, path: '/watch' }, { text: 'Document Management', icon: ManageIcon, path: '/documents/management' }, { text: 'Ignored Files', icon: BlockIcon, path: '/ignored-files' }, - { text: 'Debug', icon: BugReportIcon, path: '/debug' }, ]; const AppLayout: React.FC = ({ children }) => { @@ -533,6 +532,9 @@ const AppLayout: React.FC = ({ children }) => { navigate('/settings')}> Settings + navigate('/debug')}> + Debug + window.open('/swagger-ui', '_blank')}> API Documentation diff --git a/frontend/src/pages/DocumentManagementPage.tsx b/frontend/src/pages/DocumentManagementPage.tsx index 3dc4322..c528a16 100644 --- a/frontend/src/pages/DocumentManagementPage.tsx +++ b/frontend/src/pages/DocumentManagementPage.tsx @@ -582,12 +582,12 @@ const DocumentManagementPage: React.FC = () => { if (currentTab === 0) { fetchFailedDocuments(); } else if (currentTab === 1) { + // Refresh both low confidence and failed documents for the merged cleanup tab handlePreviewLowConfidence(); + handlePreviewFailedDocuments(); } else if (currentTab === 2) { fetchDuplicates(); } else if (currentTab === 3) { - handlePreviewFailedDocuments(); - } else if (currentTab === 4) { fetchIgnoredFiles(); fetchIgnoredFilesStats(); } @@ -750,10 +750,10 @@ const DocumentManagementPage: React.FC = () => { iconPosition="start" /> - + } - label={`Low Quality Manager${previewData ? ` (${previewData.matched_count})` : ''}`} + icon={} + label={`Document Cleanup${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0) > 0 ? ` (${(previewData?.matched_count || 0) + (failedPreviewData?.matched_count || 0)})` : ''}`} iconPosition="start" /> @@ -764,13 +764,6 @@ const DocumentManagementPage: React.FC = () => { iconPosition="start" /> - - } - label="Bulk Cleanup" - iconPosition="start" - /> - } @@ -1373,17 +1366,28 @@ const DocumentManagementPage: React.FC = () => { )} - {/* Low Quality Manager Tab Content */} + {/* Document Cleanup Tab Content - Merged Low Quality Manager and Bulk Cleanup */} {currentTab === 1 && ( <> - Low Confidence Document Deletion + Document Cleanup Center - This tool allows you to delete documents with OCR confidence below a specified threshold. - Use the preview feature first to see what documents would be affected before deleting. + Clean up your document library by removing problematic documents. You can delete: + + +
  • Documents with low OCR confidence scores (below a threshold you set)
  • +
  • Documents where OCR processing failed completely
  • +
    + + Always use the preview feature before deleting to see which documents will be affected.
    + {/* Low Confidence Documents Section */} + + Low Confidence Documents + + @@ -1504,18 +1508,20 @@ const DocumentManagementPage: React.FC = () => { Processing request... )} - - )} - {/* Delete Failed Documents Tab Content */} - {currentTab === 3 && ( - <> + {/* Divider between sections */} + + + {/* Failed Documents Section */} + + Failed OCR Documents + + Delete Failed OCR Documents - This tool allows you to delete all documents where OCR processing failed completely. + This section allows you to delete all documents where OCR processing failed completely. This includes documents with NULL confidence values or explicit failure status. - Use the preview feature first to see what documents would be affected before deleting. @@ -1549,7 +1555,7 @@ const DocumentManagementPage: React.FC = () => { - {/* Preview Results */} + {/* Preview Results for Failed Documents */} {failedPreviewData && ( @@ -1574,7 +1580,7 @@ const DocumentManagementPage: React.FC = () => { )} - {/* Loading State */} + {/* Loading State for Failed Documents */} {failedDocsLoading && !failedPreviewData && ( @@ -1585,7 +1591,7 @@ const DocumentManagementPage: React.FC = () => { )} {/* Ignored Files Tab Content */} - {currentTab === 4 && ( + {currentTab === 3 && ( <> Ignored Files Management diff --git a/src/routes/documents.rs b/src/routes/documents.rs index cec3549..7cbb8bb 100644 --- a/src/routes/documents.rs +++ b/src/routes/documents.rs @@ -1992,6 +1992,20 @@ pub async fn delete_low_confidence_documents( } /// Delete all documents with failed OCR processing +#[utoipa::path( + post, + path = "/api/documents/delete-failed-ocr", + tag = "documents", + security( + ("bearer_auth" = []) + ), + request_body = serde_json::Value, + responses( + (status = 200, description = "Failed OCR documents deleted successfully", body = serde_json::Value), + (status = 401, description = "Unauthorized"), + (status = 500, description = "Internal server error") + ) +)] pub async fn delete_failed_ocr_documents( State(state): State>, auth_user: AuthUser, diff --git a/src/swagger.rs b/src/swagger.rs index 673a06a..85bb6fa 100644 --- a/src/swagger.rs +++ b/src/swagger.rs @@ -50,7 +50,11 @@ use crate::{ crate::routes::documents::get_document_ocr, crate::routes::documents::get_processed_image, crate::routes::documents::retry_ocr, + crate::routes::documents::get_document_debug_info, crate::routes::documents::get_failed_ocr_documents, + crate::routes::documents::view_failed_document, + crate::routes::documents::delete_low_confidence_documents, + crate::routes::documents::delete_failed_ocr_documents, crate::routes::documents::get_user_duplicates, // Labels endpoints crate::routes::labels::get_labels, @@ -79,6 +83,7 @@ use crate::{ // Queue endpoints crate::routes::queue::get_queue_stats, crate::routes::queue::requeue_failed, + crate::routes::queue::enqueue_pending_documents, crate::routes::queue::get_ocr_status, crate::routes::queue::pause_ocr_processing, crate::routes::queue::resume_ocr_processing,