feat(client): show more fields for Documents

This commit is contained in:
perf3ct 2025-07-10 21:02:15 +00:00
parent d8c58ef302
commit 0465777890
9 changed files with 329 additions and 11 deletions

View File

@ -548,6 +548,66 @@ const DocumentDetailsPage: React.FC = () => {
{formatDate(document.created_at)}
</Typography>
</Box>
{document.source_type && (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<Typography variant="body2" color="text.secondary">
Source Type
</Typography>
<Chip
label={document.source_type.replace('_', ' ').toUpperCase()}
size="small"
sx={{
backgroundColor: theme.palette.info.light,
color: theme.palette.info.dark,
fontWeight: 600,
}}
/>
</Box>
)}
{document.source_path && (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<Typography variant="body2" color="text.secondary">
Original Path
</Typography>
<Typography
variant="body2"
sx={{
fontWeight: 600,
maxWidth: '200px',
overflow: 'hidden',
textOverflow: 'ellipsis',
whiteSpace: 'nowrap',
}}
title={document.source_path}
>
{document.source_path}
</Typography>
</Box>
)}
{document.original_created_at && (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<Typography variant="body2" color="text.secondary">
Original Created
</Typography>
<Typography variant="body2" sx={{ fontWeight: 600 }}>
{formatDate(document.original_created_at)}
</Typography>
</Box>
)}
{document.original_modified_at && (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
<Typography variant="body2" color="text.secondary">
Original Modified
</Typography>
<Typography variant="body2" sx={{ fontWeight: 600 }}>
{formatDate(document.original_modified_at)}
</Typography>
</Box>
)}
{document.has_ocr_text && (
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
@ -829,6 +889,102 @@ const DocumentDetailsPage: React.FC = () => {
ocrError={ocrData?.ocr_error}
/>
{/* Source Information */}
{(document.source_type || document.file_permissions || document.file_owner || document.file_group) && (
<Card
sx={{
backgroundColor: theme.palette.background.paper,
backdropFilter: 'blur(10px)',
}}
>
<CardContent sx={{ p: 4 }}>
<Typography variant="h5" sx={{ mb: 3, fontWeight: 700, display: 'flex', alignItems: 'center' }}>
<SourceIcon sx={{ mr: 1, color: theme.palette.primary.main }} />
Source Information
</Typography>
<Grid container spacing={3}>
{document.source_type && (
<Grid item xs={12} sm={6}>
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
Source Type
</Typography>
<Chip
label={document.source_type.replace('_', ' ').toUpperCase()}
color="primary"
variant="outlined"
/>
</Box>
</Grid>
)}
{document.file_permissions && (
<Grid item xs={12} sm={6}>
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
File Permissions
</Typography>
<Typography variant="body1" sx={{ fontFamily: 'monospace', fontWeight: 600 }}>
{document.file_permissions.toString(8)} ({document.file_permissions})
</Typography>
</Box>
</Grid>
)}
{document.file_owner && (
<Grid item xs={12} sm={6}>
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
File Owner
</Typography>
<Typography variant="body1" sx={{ fontWeight: 600 }}>
{document.file_owner}
</Typography>
</Box>
</Grid>
)}
{document.file_group && (
<Grid item xs={12} sm={6}>
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
File Group
</Typography>
<Typography variant="body1" sx={{ fontWeight: 600 }}>
{document.file_group}
</Typography>
</Box>
</Grid>
)}
{document.source_path && (
<Grid item xs={12}>
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
Original Source Path
</Typography>
<Typography
variant="body1"
sx={{
fontFamily: 'monospace',
fontWeight: 600,
wordBreak: 'break-all',
backgroundColor: theme.palette.background.default,
p: 1,
borderRadius: 1,
}}
>
{document.source_path}
</Typography>
</Box>
</Grid>
)}
</Grid>
</CardContent>
</Card>
)}
{/* Enhanced Metadata Display */}
{document.source_metadata && Object.keys(document.source_metadata).length > 0 && (
<Card

View File

@ -14,22 +14,28 @@ export interface Document {
id: string
filename: string
original_filename: string
file_path: string
file_size: number
mime_type: string
tags: string[]
created_at: string
updated_at?: string
user_id?: string
updated_at: string
user_id: string
file_hash?: string
original_created_at?: string
original_modified_at?: string
source_path?: string
source_type?: string
source_id?: string
file_permissions?: number
file_owner?: string
file_group?: string
source_metadata?: Record<string, any>
has_ocr_text: boolean
ocr_confidence?: number
ocr_word_count?: number
ocr_processing_time_ms?: number
ocr_status?: string
// New metadata fields
original_created_at?: string
original_modified_at?: string
source_metadata?: Record<string, any>
}
export interface SearchRequest {

View File

@ -0,0 +1,46 @@
-- Add remaining dedicated metadata fields to documents table
-- These fields extract commonly used metadata from source_metadata JSON
-- into dedicated columns for better querying and indexing
-- Add source path (original file location from source system)
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS source_path TEXT;
-- Add source type (e.g., 'web_upload', 'filesystem', 'webdav', 's3')
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS source_type TEXT;
-- Add file permissions (Unix mode bits from source system)
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS file_permissions INTEGER;
-- Add file owner (username or uid from source system)
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS file_owner TEXT;
-- Add file group (groupname or gid from source system)
ALTER TABLE documents
ADD COLUMN IF NOT EXISTS file_group TEXT;
-- Create indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_documents_source_path ON documents(source_path)
WHERE source_path IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type)
WHERE source_type IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_file_permissions ON documents(file_permissions)
WHERE file_permissions IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_file_owner ON documents(file_owner)
WHERE file_owner IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_documents_file_group ON documents(file_group)
WHERE file_group IS NOT NULL;
-- Add helpful comments
COMMENT ON COLUMN documents.source_path IS 'Original path where the file was located in the source system';
COMMENT ON COLUMN documents.source_type IS 'Type of source where file was ingested from (web_upload, filesystem, webdav, s3, etc.)';
COMMENT ON COLUMN documents.file_permissions IS 'File permissions from source system (Unix mode bits)';
COMMENT ON COLUMN documents.file_owner IS 'File owner from source system (username or uid)';
COMMENT ON COLUMN documents.file_group IS 'File group from source system (groupname or gid)';

View File

@ -9,7 +9,8 @@ pub const DOCUMENT_FIELDS: &str = r#"
content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms,
ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason,
tags, created_at, updated_at, user_id, file_hash, original_created_at,
original_modified_at, source_metadata
original_modified_at, source_path, source_type, source_id, file_permissions,
file_owner, file_group, source_metadata
"#;
/// Maps a database row to a Document struct
@ -39,6 +40,12 @@ pub fn map_row_to_document(row: &sqlx::postgres::PgRow) -> Document {
file_hash: row.get("file_hash"),
original_created_at: row.get("original_created_at"),
original_modified_at: row.get("original_modified_at"),
source_path: row.get("source_path"),
source_type: row.get("source_type"),
source_id: row.get("source_id"),
file_permissions: row.get("file_permissions"),
file_owner: row.get("file_owner"),
file_group: row.get("file_group"),
source_metadata: row.get("source_metadata"),
}
}

View File

@ -54,6 +54,15 @@ pub struct DocumentIngestionRequest {
/// Optional metadata from source file system
pub original_created_at: Option<chrono::DateTime<chrono::Utc>>,
pub original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
/// Original file path in source system
pub source_path: Option<String>,
/// File permissions from source system (Unix mode bits)
pub file_permissions: Option<i32>,
/// File owner from source system
pub file_owner: Option<String>,
/// File group from source system
pub file_group: Option<String>,
/// Additional metadata from source system (EXIF, PDF metadata, etc.)
pub source_metadata: Option<serde_json::Value>,
}
@ -112,6 +121,9 @@ impl DocumentIngestionService {
pub async fn ingest_document(&self, request: DocumentIngestionRequest) -> Result<IngestionResult, Box<dyn std::error::Error + Send + Sync>> {
let file_hash = self.calculate_file_hash(&request.file_data);
let file_size = request.file_data.len() as i64;
// Clone source_type early for error handling
let source_type_for_error = request.source_type.clone();
debug!(
"Ingesting document: {} for user {} (hash: {}, size: {} bytes, policy: {:?})",
@ -184,7 +196,7 @@ impl DocumentIngestionService {
failure_reason: "storage_error".to_string(),
failure_stage: "storage".to_string(),
existing_document_id: None,
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
ingestion_source: source_type_for_error.clone().unwrap_or_else(|| "upload".to_string()),
error_message: Some(e.to_string()),
retry_count: Some(0),
last_retry_at: None,
@ -211,6 +223,12 @@ impl DocumentIngestionService {
Some(file_hash.clone()),
request.original_created_at,
request.original_modified_at,
request.source_path,
request.source_type,
request.source_id,
request.file_permissions,
request.file_owner,
request.file_group,
request.source_metadata,
);
@ -264,7 +282,7 @@ impl DocumentIngestionService {
failure_reason: "database_error".to_string(),
failure_stage: "ingestion".to_string(),
existing_document_id: None,
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
ingestion_source: source_type_for_error.clone().unwrap_or_else(|| "upload".to_string()),
error_message: Some(e.to_string()),
retry_count: Some(0),
last_retry_at: None,
@ -321,6 +339,10 @@ impl DocumentIngestionService {
source_id,
original_created_at,
original_modified_at,
source_path: Some(file_info.path.clone()),
file_permissions: file_info.permissions.map(|p| p as i32),
file_owner: file_info.owner.clone(),
file_group: file_info.group.clone(),
source_metadata,
};
@ -346,6 +368,10 @@ impl DocumentIngestionService {
source_id: None,
original_created_at: None,
original_modified_at: None,
source_path: None, // Direct uploads don't have a source path
file_permissions: None, // Direct uploads don't preserve permissions
file_owner: None, // Direct uploads don't preserve owner
file_group: None, // Direct uploads don't preserve group
source_metadata: None,
};
@ -373,6 +399,10 @@ impl DocumentIngestionService {
source_id: Some(source_id),
original_created_at: None,
original_modified_at: None,
source_path: None, // Source sync files don't have a source path
file_permissions: None, // Source sync files don't preserve permissions
file_owner: None, // Source sync files don't preserve owner
file_group: None, // Source sync files don't preserve group
source_metadata: None,
};
@ -399,6 +429,10 @@ impl DocumentIngestionService {
source_id: Some(webdav_source_id),
original_created_at: None,
original_modified_at: None,
source_path: None, // WebDAV files don't have a source path in this method
file_permissions: None, // WebDAV files don't preserve permissions in this method
file_owner: None, // WebDAV files don't preserve owner in this method
file_group: None, // WebDAV files don't preserve group in this method
source_metadata: None,
};
@ -424,6 +458,10 @@ impl DocumentIngestionService {
source_id: None,
original_created_at: None,
original_modified_at: None,
source_path: None, // Batch files don't have a source path
file_permissions: None, // Batch files don't preserve permissions
file_owner: None, // Batch files don't preserve owner
file_group: None, // Batch files don't preserve group
source_metadata: None,
};

View File

@ -32,7 +32,19 @@ pub struct Document {
pub original_created_at: Option<DateTime<Utc>>,
/// Original file modification timestamp from source system
pub original_modified_at: Option<DateTime<Utc>>,
/// Additional metadata from source system (permissions, attributes, EXIF data, etc.)
/// Original path where the file was located (from source system)
pub source_path: Option<String>,
/// Type of source where file was ingested from (e.g., "web_upload", "filesystem", "webdav")
pub source_type: Option<String>,
/// UUID of the source system/configuration
pub source_id: Option<Uuid>,
/// File permissions from source system (Unix mode bits)
pub file_permissions: Option<i32>,
/// File owner from source system (username or uid)
pub file_owner: Option<String>,
/// File group from source system (groupname or gid)
pub file_group: Option<String>,
/// Additional metadata from source system (EXIF data, PDF metadata, custom attributes, etc.)
pub source_metadata: Option<serde_json::Value>,
}

View File

@ -34,6 +34,8 @@ pub struct DocumentResponse {
pub filename: String,
/// Original filename when uploaded
pub original_filename: String,
/// File path where the document is stored
pub file_path: String,
/// File size in bytes
pub file_size: i64,
/// MIME type of the file
@ -45,6 +47,13 @@ pub struct DocumentResponse {
pub labels: Vec<crate::routes::labels::Label>,
/// When the document was created
pub created_at: DateTime<Utc>,
/// When the document was last updated
pub updated_at: DateTime<Utc>,
/// User who uploaded/owns the document
pub user_id: Uuid,
/// SHA256 hash of the file content
#[serde(skip_serializing_if = "Option::is_none", default)]
pub file_hash: Option<String>,
/// Whether OCR text has been extracted
pub has_ocr_text: bool,
/// OCR confidence score (0-100, higher is better)
@ -61,7 +70,25 @@ pub struct DocumentResponse {
/// Original file modification timestamp from source system
#[serde(skip_serializing_if = "Option::is_none", default)]
pub original_modified_at: Option<DateTime<Utc>>,
/// Additional metadata from source system (permissions, attributes, etc.)
/// Original path where the file was located (from source system)
#[serde(skip_serializing_if = "Option::is_none", default)]
pub source_path: Option<String>,
/// Type of source where file was ingested from
#[serde(skip_serializing_if = "Option::is_none", default)]
pub source_type: Option<String>,
/// UUID of the source system/configuration
#[serde(skip_serializing_if = "Option::is_none", default)]
pub source_id: Option<Uuid>,
/// File permissions from source system (Unix mode bits)
#[serde(skip_serializing_if = "Option::is_none", default)]
pub file_permissions: Option<i32>,
/// File owner from source system
#[serde(skip_serializing_if = "Option::is_none", default)]
pub file_owner: Option<String>,
/// File group from source system
#[serde(skip_serializing_if = "Option::is_none", default)]
pub file_group: Option<String>,
/// Additional metadata from source system (EXIF data, PDF metadata, custom attributes, etc.)
#[serde(skip_serializing_if = "Option::is_none", default)]
pub source_metadata: Option<serde_json::Value>,
}
@ -236,11 +263,15 @@ impl From<Document> for DocumentResponse {
id: doc.id,
filename: doc.filename,
original_filename: doc.original_filename,
file_path: doc.file_path,
file_size: doc.file_size,
mime_type: doc.mime_type,
tags: doc.tags,
labels: Vec::new(), // Labels will be populated separately where needed
created_at: doc.created_at,
updated_at: doc.updated_at,
user_id: doc.user_id,
file_hash: doc.file_hash,
has_ocr_text: doc.ocr_text.is_some(),
ocr_confidence: doc.ocr_confidence,
ocr_word_count: doc.ocr_word_count,
@ -248,6 +279,12 @@ impl From<Document> for DocumentResponse {
ocr_status: doc.ocr_status,
original_created_at: doc.original_created_at,
original_modified_at: doc.original_modified_at,
source_path: doc.source_path,
source_type: doc.source_type,
source_id: doc.source_id,
file_permissions: doc.file_permissions,
file_owner: doc.file_owner,
file_group: doc.file_group,
source_metadata: doc.source_metadata,
}
}

View File

@ -92,6 +92,10 @@ pub async fn upload_document(
deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
original_created_at: None,
original_modified_at: None,
source_path: None, // Web uploads don't have a source path
file_permissions: None, // Web uploads don't preserve permissions
file_owner: None, // Web uploads don't preserve owner
file_group: None, // Web uploads don't preserve group
source_metadata: None,
};

View File

@ -159,6 +159,12 @@ impl FileService {
file_hash: Option<String>,
original_created_at: Option<chrono::DateTime<chrono::Utc>>,
original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
source_path: Option<String>,
source_type: Option<String>,
source_id: Option<Uuid>,
file_permissions: Option<i32>,
file_owner: Option<String>,
file_group: Option<String>,
source_metadata: Option<serde_json::Value>,
) -> Document {
Document {
@ -185,6 +191,12 @@ impl FileService {
file_hash,
original_created_at,
original_modified_at,
source_path,
source_type,
source_id,
file_permissions,
file_owner,
file_group,
source_metadata,
}
}