fix(frontend): label writing and fetching logic

This commit is contained in:
aaldebs99 2025-06-20 01:32:32 +00:00
parent aeb98acea8
commit 4dd9162415
5 changed files with 413 additions and 86 deletions

View File

@ -180,17 +180,6 @@ const UploadZone: React.FC<UploadZoneProps> = ({ onUploadComplete }) => {
: f : f
)); ));
// Assign labels to the uploaded document if any are selected
if (selectedLabels.length > 0) {
try {
const labelIds = selectedLabels.map(label => label.id);
await api.put(`/labels/documents/${response.data.id}`, { label_ids: labelIds });
} catch (error) {
console.warn('Failed to assign labels to document:', error);
// Don't fail the upload if label assignment fails
}
}
if (onUploadComplete) { if (onUploadComplete) {
onUploadComplete(response.data); onUploadComplete(response.data);
} }

View File

@ -30,6 +30,7 @@ import {
CalendarToday as DateIcon, CalendarToday as DateIcon,
Storage as SizeIcon, Storage as SizeIcon,
Tag as TagIcon, Tag as TagIcon,
Label as LabelIcon,
Visibility as ViewIcon, Visibility as ViewIcon,
Search as SearchIcon, Search as SearchIcon,
Edit as EditIcon, Edit as EditIcon,
@ -37,6 +38,9 @@ import {
} from '@mui/icons-material'; } from '@mui/icons-material';
import { documentService, OcrResponse } from '../services/api'; import { documentService, OcrResponse } from '../services/api';
import DocumentViewer from '../components/DocumentViewer'; import DocumentViewer from '../components/DocumentViewer';
import LabelSelector from '../components/Labels/LabelSelector';
import { type LabelData } from '../components/Labels/Label';
import api from '../services/api';
interface Document { interface Document {
id: string; id: string;
@ -64,6 +68,10 @@ const DocumentDetailsPage: React.FC = () => {
const [processedImageUrl, setProcessedImageUrl] = useState<string | null>(null); const [processedImageUrl, setProcessedImageUrl] = useState<string | null>(null);
const [processedImageLoading, setProcessedImageLoading] = useState<boolean>(false); const [processedImageLoading, setProcessedImageLoading] = useState<boolean>(false);
const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(null); const [thumbnailUrl, setThumbnailUrl] = useState<string | null>(null);
const [documentLabels, setDocumentLabels] = useState<LabelData[]>([]);
const [availableLabels, setAvailableLabels] = useState<LabelData[]>([]);
const [showLabelDialog, setShowLabelDialog] = useState<boolean>(false);
const [labelsLoading, setLabelsLoading] = useState<boolean>(false);
useEffect(() => { useEffect(() => {
if (id) { if (id) {
@ -80,9 +88,14 @@ const DocumentDetailsPage: React.FC = () => {
useEffect(() => { useEffect(() => {
if (document) { if (document) {
loadThumbnail(); loadThumbnail();
fetchDocumentLabels();
} }
}, [document]); }, [document]);
useEffect(() => {
fetchAvailableLabels();
}, []);
const fetchDocumentDetails = async (): Promise<void> => { const fetchDocumentDetails = async (): Promise<void> => {
if (!id) { if (!id) {
setError('No document ID provided'); setError('No document ID provided');
@ -204,6 +217,58 @@ const DocumentDetailsPage: React.FC = () => {
}); });
}; };
const fetchDocumentLabels = async (): Promise<void> => {
if (!id) return;
try {
const response = await api.get(`/labels/documents/${id}`);
if (response.status === 200 && Array.isArray(response.data)) {
setDocumentLabels(response.data);
}
} catch (error) {
console.error('Failed to fetch document labels:', error);
}
};
const fetchAvailableLabels = async (): Promise<void> => {
try {
setLabelsLoading(true);
const response = await api.get('/labels?include_counts=false');
if (response.status === 200 && Array.isArray(response.data)) {
setAvailableLabels(response.data);
}
} catch (error) {
console.error('Failed to fetch available labels:', error);
} finally {
setLabelsLoading(false);
}
};
const handleCreateLabel = async (labelData: Omit<LabelData, 'id' | 'is_system' | 'created_at' | 'updated_at' | 'document_count' | 'source_count'>) => {
try {
const response = await api.post('/labels', labelData);
const newLabel = response.data;
setAvailableLabels(prev => [...prev, newLabel]);
return newLabel;
} catch (error) {
console.error('Failed to create label:', error);
throw error;
}
};
const handleSaveLabels = async (selectedLabels: LabelData[]): Promise<void> => {
if (!id) return;
try {
const labelIds = selectedLabels.map(label => label.id);
await api.put(`/labels/documents/${id}`, { label_ids: labelIds });
setDocumentLabels(selectedLabels);
setShowLabelDialog(false);
} catch (error) {
console.error('Failed to save labels:', error);
}
};
if (loading) { if (loading) {
return ( return (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px"> <Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
@ -435,6 +500,48 @@ const DocumentDetailsPage: React.FC = () => {
</Paper> </Paper>
</Grid> </Grid>
)} )}
{/* Labels Section */}
<Grid item xs={12}>
<Paper sx={{ p: 2 }}>
<Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', mb: 2 }}>
<Box sx={{ display: 'flex', alignItems: 'center' }}>
<LabelIcon color="primary" sx={{ mr: 1 }} />
<Typography variant="subtitle2" color="text.secondary">
Labels
</Typography>
</Box>
<Button
size="small"
startIcon={<EditIcon />}
onClick={() => setShowLabelDialog(true)}
sx={{ borderRadius: 2 }}
>
Edit Labels
</Button>
</Box>
{documentLabels.length > 0 ? (
<Stack direction="row" spacing={1} flexWrap="wrap" gap={1}>
{documentLabels.map((label) => (
<Chip
key={label.id}
label={label.name}
sx={{
backgroundColor: label.background_color || label.color + '20',
color: label.color,
borderColor: label.color,
border: '1px solid',
}}
/>
))}
</Stack>
) : (
<Typography variant="body2" color="text.secondary" sx={{ fontStyle: 'italic' }}>
No labels assigned to this document
</Typography>
)}
</Paper>
</Grid>
</Grid> </Grid>
<Divider sx={{ my: 3 }} /> <Divider sx={{ my: 3 }} />
@ -754,6 +861,46 @@ const DocumentDetailsPage: React.FC = () => {
</Button> </Button>
</DialogActions> </DialogActions>
</Dialog> </Dialog>
{/* Label Edit Dialog */}
<Dialog
open={showLabelDialog}
onClose={() => setShowLabelDialog(false)}
maxWidth="md"
fullWidth
>
<DialogTitle>
Edit Document Labels
</DialogTitle>
<DialogContent>
<Box sx={{ mt: 2 }}>
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
Select labels to assign to this document
</Typography>
<LabelSelector
selectedLabels={documentLabels}
availableLabels={availableLabels}
onLabelsChange={setDocumentLabels}
onCreateLabel={handleCreateLabel}
placeholder="Choose labels for this document..."
size="medium"
disabled={labelsLoading}
/>
</Box>
</DialogContent>
<DialogActions>
<Button onClick={() => setShowLabelDialog(false)}>
Cancel
</Button>
<Button
variant="contained"
onClick={() => handleSaveLabels(documentLabels)}
sx={{ borderRadius: 2 }}
>
Save Labels
</Button>
</DialogActions>
</Dialog>
</Box> </Box>
); );
}; };

View File

@ -3,6 +3,7 @@ use sqlx::{Row, QueryBuilder};
use uuid::Uuid; use uuid::Uuid;
use crate::models::{Document, SearchRequest, SearchMode, SearchSnippet, HighlightRange, EnhancedDocumentResponse}; use crate::models::{Document, SearchRequest, SearchMode, SearchSnippet, HighlightRange, EnhancedDocumentResponse};
use crate::routes::labels::Label;
use super::Database; use super::Database;
impl Database { impl Database {
@ -1292,4 +1293,70 @@ impl Database {
Ok((duplicates, total)) Ok((duplicates, total))
} }
pub async fn get_document_labels(&self, document_id: Uuid) -> Result<Vec<Label>> {
let labels = sqlx::query_as::<_, Label>(
r#"
SELECT
l.id, l.user_id, l.name, l.description, l.color,
l.background_color, l.icon, l.is_system, l.created_at, l.updated_at,
0::bigint as document_count, 0::bigint as source_count
FROM labels l
INNER JOIN document_labels dl ON l.id = dl.label_id
WHERE dl.document_id = $1
ORDER BY l.name
"#
)
.bind(document_id)
.fetch_all(&self.pool)
.await?;
Ok(labels)
}
pub async fn get_labels_for_documents(&self, document_ids: &[Uuid]) -> Result<std::collections::HashMap<Uuid, Vec<Label>>> {
if document_ids.is_empty() {
return Ok(std::collections::HashMap::new());
}
let rows = sqlx::query(
r#"
SELECT
dl.document_id,
l.id, l.user_id, l.name, l.description, l.color,
l.background_color, l.icon, l.is_system, l.created_at, l.updated_at
FROM labels l
INNER JOIN document_labels dl ON l.id = dl.label_id
WHERE dl.document_id = ANY($1)
ORDER BY dl.document_id, l.name
"#
)
.bind(document_ids)
.fetch_all(&self.pool)
.await?;
let mut labels_map: std::collections::HashMap<Uuid, Vec<Label>> = std::collections::HashMap::new();
for row in rows {
let document_id: Uuid = row.get("document_id");
let label = Label {
id: row.get("id"),
user_id: row.get("user_id"),
name: row.get("name"),
description: row.get("description"),
color: row.get("color"),
background_color: row.get("background_color"),
icon: row.get("icon"),
is_system: row.get("is_system"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
document_count: 0,
source_count: 0,
};
labels_map.entry(document_id).or_insert_with(Vec::new).push(label);
}
Ok(labels_map)
}
} }

View File

@ -115,6 +115,9 @@ pub struct DocumentResponse {
pub mime_type: String, pub mime_type: String,
/// Tags associated with the document /// Tags associated with the document
pub tags: Vec<String>, pub tags: Vec<String>,
/// Labels associated with the document
#[serde(default)]
pub labels: Vec<crate::routes::labels::Label>,
/// When the document was created /// When the document was created
pub created_at: DateTime<Utc>, pub created_at: DateTime<Utc>,
/// Whether OCR text has been extracted /// Whether OCR text has been extracted
@ -260,6 +263,7 @@ impl From<Document> for DocumentResponse {
file_size: doc.file_size, file_size: doc.file_size,
mime_type: doc.mime_type, mime_type: doc.mime_type,
tags: doc.tags, tags: doc.tags,
labels: Vec::new(), // Labels will be populated separately where needed
created_at: doc.created_at, created_at: doc.created_at,
has_ocr_text: doc.ocr_text.is_some(), has_ocr_text: doc.ocr_text.is_some(),
ocr_confidence: doc.ocr_confidence, ocr_confidence: doc.ocr_confidence,

View File

@ -7,9 +7,11 @@ use axum::{
}; };
use serde::Deserialize; use serde::Deserialize;
use std::sync::Arc; use std::sync::Arc;
use std::collections::HashMap;
use utoipa::ToSchema; use utoipa::ToSchema;
use sha2::{Sha256, Digest}; use sha2::{Sha256, Digest};
use sqlx::Row; use sqlx::Row;
use axum::body::Bytes;
use crate::{ use crate::{
auth::AuthUser, auth::AuthUser,
@ -69,6 +71,13 @@ async fn get_document_by_id(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.ok_or(StatusCode::NOT_FOUND)?; .ok_or(StatusCode::NOT_FOUND)?;
// Get labels for this document
let labels = state
.db
.get_document_labels(document_id)
.await
.unwrap_or_else(|_| Vec::new());
// Convert to DocumentResponse // Convert to DocumentResponse
let response = DocumentResponse { let response = DocumentResponse {
id: document.id, id: document.id,
@ -79,6 +88,7 @@ async fn get_document_by_id(
created_at: document.created_at, created_at: document.created_at,
has_ocr_text: document.ocr_text.is_some(), has_ocr_text: document.ocr_text.is_some(),
tags: document.tags, tags: document.tags,
labels,
ocr_confidence: document.ocr_confidence, ocr_confidence: document.ocr_confidence,
ocr_word_count: document.ocr_word_count, ocr_word_count: document.ocr_word_count,
ocr_processing_time_ms: document.ocr_processing_time_ms, ocr_processing_time_ms: document.ocr_processing_time_ms,
@ -118,92 +128,190 @@ async fn upload_document(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
.unwrap_or_else(|| crate::models::Settings::default()); .unwrap_or_else(|| crate::models::Settings::default());
let mut label_ids: Option<Vec<uuid::Uuid>> = None;
let mut file_data: Option<(String, Bytes)> = None;
// First pass: collect all multipart fields
while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? { while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? {
let name = field.name().unwrap_or("").to_string(); let name = field.name().unwrap_or("").to_string();
if name == "file" { tracing::info!("Processing multipart field: {}", name);
if name == "label_ids" {
let label_ids_text = field.text().await.map_err(|_| StatusCode::BAD_REQUEST)?;
tracing::info!("Received label_ids field: {}", label_ids_text);
match serde_json::from_str::<Vec<uuid::Uuid>>(&label_ids_text) {
Ok(ids) => {
tracing::info!("Successfully parsed {} label IDs: {:?}", ids.len(), ids);
label_ids = Some(ids);
},
Err(e) => {
tracing::warn!("Failed to parse label_ids from upload: {} - Error: {}", label_ids_text, e);
}
}
} else if name == "file" {
let filename = field let filename = field
.file_name() .file_name()
.ok_or(StatusCode::BAD_REQUEST)? .ok_or(StatusCode::BAD_REQUEST)?
.to_string(); .to_string();
if !file_service.is_allowed_file_type(&filename, &settings.allowed_file_types) {
return Err(StatusCode::BAD_REQUEST);
}
let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?; let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?;
let file_size = data.len() as i64; let data_len = data.len();
file_data = Some((filename.clone(), data));
// Check file size limit tracing::info!("Received file: {}, size: {} bytes", filename, data_len);
let max_size_bytes = (settings.max_file_size_mb as i64) * 1024 * 1024;
if file_size > max_size_bytes {
return Err(StatusCode::PAYLOAD_TOO_LARGE);
}
// Calculate file hash for deduplication
let file_hash = calculate_file_hash(&data);
// Check if this exact file content already exists using efficient hash lookup
match state.db.get_document_by_user_and_hash(auth_user.user.id, &file_hash).await {
Ok(Some(existing_doc)) => {
// Return the existing document instead of creating a duplicate
return Ok(Json(existing_doc.into()));
}
Ok(None) => {
// No duplicate found, proceed with upload
}
Err(_) => {
// Continue even if duplicate check fails
}
}
let mime_type = mime_guess::from_path(&filename)
.first_or_octet_stream()
.to_string();
let file_path = file_service
.save_file(&filename, &data)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let document = file_service.create_document(
&filename,
&filename,
&file_path,
file_size,
&mime_type,
auth_user.user.id,
Some(file_hash),
);
let saved_document = state
.db
.create_document(document)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let document_id = saved_document.id;
let enable_background_ocr = settings.enable_background_ocr;
if enable_background_ocr {
// Use the shared queue service from AppState instead of creating a new one
// Calculate priority based on file size
let priority = match file_size {
0..=1048576 => 10, // <= 1MB: highest priority
..=5242880 => 8, // 1-5MB: high priority
..=10485760 => 6, // 5-10MB: medium priority
..=52428800 => 4, // 10-50MB: low priority
_ => 2, // > 50MB: lowest priority
};
state.queue_service.enqueue_document(document_id, priority, file_size).await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
}
return Ok(Json(saved_document.into()));
} }
} }
// Process the file after collecting all fields
if let Some((filename, data)) = file_data {
if !file_service.is_allowed_file_type(&filename, &settings.allowed_file_types) {
return Err(StatusCode::BAD_REQUEST);
}
let file_size = data.len() as i64;
// Check file size limit
let max_size_bytes = (settings.max_file_size_mb as i64) * 1024 * 1024;
if file_size > max_size_bytes {
return Err(StatusCode::PAYLOAD_TOO_LARGE);
}
// Calculate file hash for deduplication
let file_hash = calculate_file_hash(&data);
// Check if this exact file content already exists using efficient hash lookup
match state.db.get_document_by_user_and_hash(auth_user.user.id, &file_hash).await {
Ok(Some(existing_doc)) => {
// Return the existing document instead of creating a duplicate
let labels = state
.db
.get_document_labels(existing_doc.id)
.await
.unwrap_or_else(|_| Vec::new());
let mut response: DocumentResponse = existing_doc.into();
response.labels = labels;
return Ok(Json(response));
}
Ok(None) => {
// No duplicate found, proceed with upload
}
Err(_) => {
// Continue even if duplicate check fails
}
}
let mime_type = mime_guess::from_path(&filename)
.first_or_octet_stream()
.to_string();
let file_path = file_service
.save_file(&filename, &data)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let document = file_service.create_document(
&filename,
&filename,
&file_path,
file_size,
&mime_type,
auth_user.user.id,
Some(file_hash),
);
let saved_document = state
.db
.create_document(document)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let document_id = saved_document.id;
let enable_background_ocr = settings.enable_background_ocr;
// Assign labels if provided
tracing::info!("Processing label assignment for document {}, label_ids: {:?}", document_id, label_ids);
if let Some(ref label_ids_vec) = label_ids {
if !label_ids_vec.is_empty() {
tracing::info!("Attempting to assign {} labels to document {}", label_ids_vec.len(), document_id);
// Verify all labels exist and are accessible to the user
let label_count = sqlx::query(
"SELECT COUNT(*) as count FROM labels WHERE id = ANY($1) AND (user_id = $2 OR is_system = TRUE)"
)
.bind(label_ids_vec)
.bind(auth_user.user.id)
.fetch_one(state.db.get_pool())
.await
.map_err(|e| {
tracing::error!("Failed to verify labels during upload: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
})?;
let count: i64 = label_count.try_get("count").unwrap_or(0);
tracing::info!("Label verification: found {} valid labels out of {} requested", count, label_ids_vec.len());
if count as usize == label_ids_vec.len() {
// All labels are valid, assign them
for label_id in label_ids_vec {
match sqlx::query(
"INSERT INTO document_labels (document_id, label_id, assigned_by) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING"
)
.bind(document_id)
.bind(label_id)
.bind(auth_user.user.id)
.execute(state.db.get_pool())
.await
{
Ok(result) => {
tracing::info!("Successfully assigned label {} to document {}, rows affected: {}", label_id, document_id, result.rows_affected());
},
Err(e) => {
tracing::error!("Failed to assign label {} to document {}: {}", label_id, document_id, e);
}
}
}
} else {
tracing::warn!("Label verification failed: Some labels were not accessible to user {} during upload (found {}/{} labels)", auth_user.user.id, count, label_ids_vec.len());
}
} else {
tracing::info!("No labels to assign (empty label_ids vector)");
}
} else {
tracing::info!("No labels to assign (label_ids is None)");
}
if enable_background_ocr {
// Use the shared queue service from AppState instead of creating a new one
// Calculate priority based on file size
let priority = match file_size {
0..=1048576 => 10, // <= 1MB: highest priority
..=5242880 => 8, // 1-5MB: high priority
..=10485760 => 6, // 5-10MB: medium priority
..=52428800 => 4, // 10-50MB: low priority
_ => 2, // > 50MB: lowest priority
};
state.queue_service.enqueue_document(document_id, priority, file_size).await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
}
// Get labels for this document (if any were assigned)
let labels = state
.db
.get_document_labels(document_id)
.await
.unwrap_or_else(|_| Vec::new());
let mut response: DocumentResponse = saved_document.into();
response.labels = labels;
return Ok(Json(response));
}
Err(StatusCode::BAD_REQUEST) Err(StatusCode::BAD_REQUEST)
} }
@ -258,7 +366,19 @@ async fn list_documents(
) )
).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; ).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let documents_response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect(); // Get labels for all documents efficiently
let document_ids: Vec<uuid::Uuid> = documents.iter().map(|doc| doc.id).collect();
let labels_map = state
.db
.get_labels_for_documents(&document_ids)
.await
.unwrap_or_else(|_| std::collections::HashMap::new());
let documents_response: Vec<DocumentResponse> = documents.into_iter().map(|doc| {
let mut response: DocumentResponse = doc.into();
response.labels = labels_map.get(&response.id).cloned().unwrap_or_else(Vec::new);
response
}).collect();
let response = serde_json::json!({ let response = serde_json::json!({
"documents": documents_response, "documents": documents_response,