feat(client): show more fields for Documents
This commit is contained in:
parent
d8c58ef302
commit
0465777890
|
|
@ -548,6 +548,66 @@ const DocumentDetailsPage: React.FC = () => {
|
||||||
{formatDate(document.created_at)}
|
{formatDate(document.created_at)}
|
||||||
</Typography>
|
</Typography>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
{document.source_type && (
|
||||||
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
Source Type
|
||||||
|
</Typography>
|
||||||
|
<Chip
|
||||||
|
label={document.source_type.replace('_', ' ').toUpperCase()}
|
||||||
|
size="small"
|
||||||
|
sx={{
|
||||||
|
backgroundColor: theme.palette.info.light,
|
||||||
|
color: theme.palette.info.dark,
|
||||||
|
fontWeight: 600,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.source_path && (
|
||||||
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
Original Path
|
||||||
|
</Typography>
|
||||||
|
<Typography
|
||||||
|
variant="body2"
|
||||||
|
sx={{
|
||||||
|
fontWeight: 600,
|
||||||
|
maxWidth: '200px',
|
||||||
|
overflow: 'hidden',
|
||||||
|
textOverflow: 'ellipsis',
|
||||||
|
whiteSpace: 'nowrap',
|
||||||
|
}}
|
||||||
|
title={document.source_path}
|
||||||
|
>
|
||||||
|
{document.source_path}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.original_created_at && (
|
||||||
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
Original Created
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ fontWeight: 600 }}>
|
||||||
|
{formatDate(document.original_created_at)}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.original_modified_at && (
|
||||||
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
Original Modified
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body2" sx={{ fontWeight: 600 }}>
|
||||||
|
{formatDate(document.original_modified_at)}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
{document.has_ocr_text && (
|
{document.has_ocr_text && (
|
||||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
||||||
|
|
@ -829,6 +889,102 @@ const DocumentDetailsPage: React.FC = () => {
|
||||||
ocrError={ocrData?.ocr_error}
|
ocrError={ocrData?.ocr_error}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
{/* Source Information */}
|
||||||
|
{(document.source_type || document.file_permissions || document.file_owner || document.file_group) && (
|
||||||
|
<Card
|
||||||
|
sx={{
|
||||||
|
backgroundColor: theme.palette.background.paper,
|
||||||
|
backdropFilter: 'blur(10px)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<CardContent sx={{ p: 4 }}>
|
||||||
|
<Typography variant="h5" sx={{ mb: 3, fontWeight: 700, display: 'flex', alignItems: 'center' }}>
|
||||||
|
<SourceIcon sx={{ mr: 1, color: theme.palette.primary.main }} />
|
||||||
|
Source Information
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Grid container spacing={3}>
|
||||||
|
{document.source_type && (
|
||||||
|
<Grid item xs={12} sm={6}>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
|
||||||
|
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
Source Type
|
||||||
|
</Typography>
|
||||||
|
<Chip
|
||||||
|
label={document.source_type.replace('_', ' ').toUpperCase()}
|
||||||
|
color="primary"
|
||||||
|
variant="outlined"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.file_permissions && (
|
||||||
|
<Grid item xs={12} sm={6}>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
|
||||||
|
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
File Permissions
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body1" sx={{ fontFamily: 'monospace', fontWeight: 600 }}>
|
||||||
|
{document.file_permissions.toString(8)} ({document.file_permissions})
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.file_owner && (
|
||||||
|
<Grid item xs={12} sm={6}>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
|
||||||
|
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
File Owner
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body1" sx={{ fontWeight: 600 }}>
|
||||||
|
{document.file_owner}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.file_group && (
|
||||||
|
<Grid item xs={12} sm={6}>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
|
||||||
|
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
File Group
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="body1" sx={{ fontWeight: 600 }}>
|
||||||
|
{document.file_group}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{document.source_path && (
|
||||||
|
<Grid item xs={12}>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, backgroundColor: theme.palette.action.hover }}>
|
||||||
|
<Typography variant="subtitle2" color="text.secondary" sx={{ mb: 1 }}>
|
||||||
|
Original Source Path
|
||||||
|
</Typography>
|
||||||
|
<Typography
|
||||||
|
variant="body1"
|
||||||
|
sx={{
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontWeight: 600,
|
||||||
|
wordBreak: 'break-all',
|
||||||
|
backgroundColor: theme.palette.background.default,
|
||||||
|
p: 1,
|
||||||
|
borderRadius: 1,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{document.source_path}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</Grid>
|
||||||
|
)}
|
||||||
|
</Grid>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Enhanced Metadata Display */}
|
{/* Enhanced Metadata Display */}
|
||||||
{document.source_metadata && Object.keys(document.source_metadata).length > 0 && (
|
{document.source_metadata && Object.keys(document.source_metadata).length > 0 && (
|
||||||
<Card
|
<Card
|
||||||
|
|
|
||||||
|
|
@ -14,22 +14,28 @@ export interface Document {
|
||||||
id: string
|
id: string
|
||||||
filename: string
|
filename: string
|
||||||
original_filename: string
|
original_filename: string
|
||||||
|
file_path: string
|
||||||
file_size: number
|
file_size: number
|
||||||
mime_type: string
|
mime_type: string
|
||||||
tags: string[]
|
tags: string[]
|
||||||
created_at: string
|
created_at: string
|
||||||
updated_at?: string
|
updated_at: string
|
||||||
user_id?: string
|
user_id: string
|
||||||
file_hash?: string
|
file_hash?: string
|
||||||
|
original_created_at?: string
|
||||||
|
original_modified_at?: string
|
||||||
|
source_path?: string
|
||||||
|
source_type?: string
|
||||||
|
source_id?: string
|
||||||
|
file_permissions?: number
|
||||||
|
file_owner?: string
|
||||||
|
file_group?: string
|
||||||
|
source_metadata?: Record<string, any>
|
||||||
has_ocr_text: boolean
|
has_ocr_text: boolean
|
||||||
ocr_confidence?: number
|
ocr_confidence?: number
|
||||||
ocr_word_count?: number
|
ocr_word_count?: number
|
||||||
ocr_processing_time_ms?: number
|
ocr_processing_time_ms?: number
|
||||||
ocr_status?: string
|
ocr_status?: string
|
||||||
// New metadata fields
|
|
||||||
original_created_at?: string
|
|
||||||
original_modified_at?: string
|
|
||||||
source_metadata?: Record<string, any>
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SearchRequest {
|
export interface SearchRequest {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
-- Add remaining dedicated metadata fields to documents table
|
||||||
|
-- These fields extract commonly used metadata from source_metadata JSON
|
||||||
|
-- into dedicated columns for better querying and indexing
|
||||||
|
|
||||||
|
-- Add source path (original file location from source system)
|
||||||
|
ALTER TABLE documents
|
||||||
|
ADD COLUMN IF NOT EXISTS source_path TEXT;
|
||||||
|
|
||||||
|
-- Add source type (e.g., 'web_upload', 'filesystem', 'webdav', 's3')
|
||||||
|
ALTER TABLE documents
|
||||||
|
ADD COLUMN IF NOT EXISTS source_type TEXT;
|
||||||
|
|
||||||
|
-- Add file permissions (Unix mode bits from source system)
|
||||||
|
ALTER TABLE documents
|
||||||
|
ADD COLUMN IF NOT EXISTS file_permissions INTEGER;
|
||||||
|
|
||||||
|
-- Add file owner (username or uid from source system)
|
||||||
|
ALTER TABLE documents
|
||||||
|
ADD COLUMN IF NOT EXISTS file_owner TEXT;
|
||||||
|
|
||||||
|
-- Add file group (groupname or gid from source system)
|
||||||
|
ALTER TABLE documents
|
||||||
|
ADD COLUMN IF NOT EXISTS file_group TEXT;
|
||||||
|
|
||||||
|
-- Create indexes for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_source_path ON documents(source_path)
|
||||||
|
WHERE source_path IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type)
|
||||||
|
WHERE source_type IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_file_permissions ON documents(file_permissions)
|
||||||
|
WHERE file_permissions IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_file_owner ON documents(file_owner)
|
||||||
|
WHERE file_owner IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_file_group ON documents(file_group)
|
||||||
|
WHERE file_group IS NOT NULL;
|
||||||
|
|
||||||
|
-- Add helpful comments
|
||||||
|
COMMENT ON COLUMN documents.source_path IS 'Original path where the file was located in the source system';
|
||||||
|
COMMENT ON COLUMN documents.source_type IS 'Type of source where file was ingested from (web_upload, filesystem, webdav, s3, etc.)';
|
||||||
|
COMMENT ON COLUMN documents.file_permissions IS 'File permissions from source system (Unix mode bits)';
|
||||||
|
COMMENT ON COLUMN documents.file_owner IS 'File owner from source system (username or uid)';
|
||||||
|
COMMENT ON COLUMN documents.file_group IS 'File group from source system (groupname or gid)';
|
||||||
|
|
@ -9,7 +9,8 @@ pub const DOCUMENT_FIELDS: &str = r#"
|
||||||
content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms,
|
content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms,
|
||||||
ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason,
|
ocr_status, ocr_error, ocr_completed_at, ocr_retry_count, ocr_failure_reason,
|
||||||
tags, created_at, updated_at, user_id, file_hash, original_created_at,
|
tags, created_at, updated_at, user_id, file_hash, original_created_at,
|
||||||
original_modified_at, source_metadata
|
original_modified_at, source_path, source_type, source_id, file_permissions,
|
||||||
|
file_owner, file_group, source_metadata
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
/// Maps a database row to a Document struct
|
/// Maps a database row to a Document struct
|
||||||
|
|
@ -39,6 +40,12 @@ pub fn map_row_to_document(row: &sqlx::postgres::PgRow) -> Document {
|
||||||
file_hash: row.get("file_hash"),
|
file_hash: row.get("file_hash"),
|
||||||
original_created_at: row.get("original_created_at"),
|
original_created_at: row.get("original_created_at"),
|
||||||
original_modified_at: row.get("original_modified_at"),
|
original_modified_at: row.get("original_modified_at"),
|
||||||
|
source_path: row.get("source_path"),
|
||||||
|
source_type: row.get("source_type"),
|
||||||
|
source_id: row.get("source_id"),
|
||||||
|
file_permissions: row.get("file_permissions"),
|
||||||
|
file_owner: row.get("file_owner"),
|
||||||
|
file_group: row.get("file_group"),
|
||||||
source_metadata: row.get("source_metadata"),
|
source_metadata: row.get("source_metadata"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,15 @@ pub struct DocumentIngestionRequest {
|
||||||
/// Optional metadata from source file system
|
/// Optional metadata from source file system
|
||||||
pub original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
pub original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||||
pub original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
pub original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||||
|
/// Original file path in source system
|
||||||
|
pub source_path: Option<String>,
|
||||||
|
/// File permissions from source system (Unix mode bits)
|
||||||
|
pub file_permissions: Option<i32>,
|
||||||
|
/// File owner from source system
|
||||||
|
pub file_owner: Option<String>,
|
||||||
|
/// File group from source system
|
||||||
|
pub file_group: Option<String>,
|
||||||
|
/// Additional metadata from source system (EXIF, PDF metadata, etc.)
|
||||||
pub source_metadata: Option<serde_json::Value>,
|
pub source_metadata: Option<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,6 +121,9 @@ impl DocumentIngestionService {
|
||||||
pub async fn ingest_document(&self, request: DocumentIngestionRequest) -> Result<IngestionResult, Box<dyn std::error::Error + Send + Sync>> {
|
pub async fn ingest_document(&self, request: DocumentIngestionRequest) -> Result<IngestionResult, Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let file_hash = self.calculate_file_hash(&request.file_data);
|
let file_hash = self.calculate_file_hash(&request.file_data);
|
||||||
let file_size = request.file_data.len() as i64;
|
let file_size = request.file_data.len() as i64;
|
||||||
|
|
||||||
|
// Clone source_type early for error handling
|
||||||
|
let source_type_for_error = request.source_type.clone();
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Ingesting document: {} for user {} (hash: {}, size: {} bytes, policy: {:?})",
|
"Ingesting document: {} for user {} (hash: {}, size: {} bytes, policy: {:?})",
|
||||||
|
|
@ -184,7 +196,7 @@ impl DocumentIngestionService {
|
||||||
failure_reason: "storage_error".to_string(),
|
failure_reason: "storage_error".to_string(),
|
||||||
failure_stage: "storage".to_string(),
|
failure_stage: "storage".to_string(),
|
||||||
existing_document_id: None,
|
existing_document_id: None,
|
||||||
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
|
ingestion_source: source_type_for_error.clone().unwrap_or_else(|| "upload".to_string()),
|
||||||
error_message: Some(e.to_string()),
|
error_message: Some(e.to_string()),
|
||||||
retry_count: Some(0),
|
retry_count: Some(0),
|
||||||
last_retry_at: None,
|
last_retry_at: None,
|
||||||
|
|
@ -211,6 +223,12 @@ impl DocumentIngestionService {
|
||||||
Some(file_hash.clone()),
|
Some(file_hash.clone()),
|
||||||
request.original_created_at,
|
request.original_created_at,
|
||||||
request.original_modified_at,
|
request.original_modified_at,
|
||||||
|
request.source_path,
|
||||||
|
request.source_type,
|
||||||
|
request.source_id,
|
||||||
|
request.file_permissions,
|
||||||
|
request.file_owner,
|
||||||
|
request.file_group,
|
||||||
request.source_metadata,
|
request.source_metadata,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -264,7 +282,7 @@ impl DocumentIngestionService {
|
||||||
failure_reason: "database_error".to_string(),
|
failure_reason: "database_error".to_string(),
|
||||||
failure_stage: "ingestion".to_string(),
|
failure_stage: "ingestion".to_string(),
|
||||||
existing_document_id: None,
|
existing_document_id: None,
|
||||||
ingestion_source: request.source_type.unwrap_or_else(|| "upload".to_string()),
|
ingestion_source: source_type_for_error.clone().unwrap_or_else(|| "upload".to_string()),
|
||||||
error_message: Some(e.to_string()),
|
error_message: Some(e.to_string()),
|
||||||
retry_count: Some(0),
|
retry_count: Some(0),
|
||||||
last_retry_at: None,
|
last_retry_at: None,
|
||||||
|
|
@ -321,6 +339,10 @@ impl DocumentIngestionService {
|
||||||
source_id,
|
source_id,
|
||||||
original_created_at,
|
original_created_at,
|
||||||
original_modified_at,
|
original_modified_at,
|
||||||
|
source_path: Some(file_info.path.clone()),
|
||||||
|
file_permissions: file_info.permissions.map(|p| p as i32),
|
||||||
|
file_owner: file_info.owner.clone(),
|
||||||
|
file_group: file_info.group.clone(),
|
||||||
source_metadata,
|
source_metadata,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -346,6 +368,10 @@ impl DocumentIngestionService {
|
||||||
source_id: None,
|
source_id: None,
|
||||||
original_created_at: None,
|
original_created_at: None,
|
||||||
original_modified_at: None,
|
original_modified_at: None,
|
||||||
|
source_path: None, // Direct uploads don't have a source path
|
||||||
|
file_permissions: None, // Direct uploads don't preserve permissions
|
||||||
|
file_owner: None, // Direct uploads don't preserve owner
|
||||||
|
file_group: None, // Direct uploads don't preserve group
|
||||||
source_metadata: None,
|
source_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -373,6 +399,10 @@ impl DocumentIngestionService {
|
||||||
source_id: Some(source_id),
|
source_id: Some(source_id),
|
||||||
original_created_at: None,
|
original_created_at: None,
|
||||||
original_modified_at: None,
|
original_modified_at: None,
|
||||||
|
source_path: None, // Source sync files don't have a source path
|
||||||
|
file_permissions: None, // Source sync files don't preserve permissions
|
||||||
|
file_owner: None, // Source sync files don't preserve owner
|
||||||
|
file_group: None, // Source sync files don't preserve group
|
||||||
source_metadata: None,
|
source_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -399,6 +429,10 @@ impl DocumentIngestionService {
|
||||||
source_id: Some(webdav_source_id),
|
source_id: Some(webdav_source_id),
|
||||||
original_created_at: None,
|
original_created_at: None,
|
||||||
original_modified_at: None,
|
original_modified_at: None,
|
||||||
|
source_path: None, // WebDAV files don't have a source path in this method
|
||||||
|
file_permissions: None, // WebDAV files don't preserve permissions in this method
|
||||||
|
file_owner: None, // WebDAV files don't preserve owner in this method
|
||||||
|
file_group: None, // WebDAV files don't preserve group in this method
|
||||||
source_metadata: None,
|
source_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -424,6 +458,10 @@ impl DocumentIngestionService {
|
||||||
source_id: None,
|
source_id: None,
|
||||||
original_created_at: None,
|
original_created_at: None,
|
||||||
original_modified_at: None,
|
original_modified_at: None,
|
||||||
|
source_path: None, // Batch files don't have a source path
|
||||||
|
file_permissions: None, // Batch files don't preserve permissions
|
||||||
|
file_owner: None, // Batch files don't preserve owner
|
||||||
|
file_group: None, // Batch files don't preserve group
|
||||||
source_metadata: None,
|
source_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,19 @@ pub struct Document {
|
||||||
pub original_created_at: Option<DateTime<Utc>>,
|
pub original_created_at: Option<DateTime<Utc>>,
|
||||||
/// Original file modification timestamp from source system
|
/// Original file modification timestamp from source system
|
||||||
pub original_modified_at: Option<DateTime<Utc>>,
|
pub original_modified_at: Option<DateTime<Utc>>,
|
||||||
/// Additional metadata from source system (permissions, attributes, EXIF data, etc.)
|
/// Original path where the file was located (from source system)
|
||||||
|
pub source_path: Option<String>,
|
||||||
|
/// Type of source where file was ingested from (e.g., "web_upload", "filesystem", "webdav")
|
||||||
|
pub source_type: Option<String>,
|
||||||
|
/// UUID of the source system/configuration
|
||||||
|
pub source_id: Option<Uuid>,
|
||||||
|
/// File permissions from source system (Unix mode bits)
|
||||||
|
pub file_permissions: Option<i32>,
|
||||||
|
/// File owner from source system (username or uid)
|
||||||
|
pub file_owner: Option<String>,
|
||||||
|
/// File group from source system (groupname or gid)
|
||||||
|
pub file_group: Option<String>,
|
||||||
|
/// Additional metadata from source system (EXIF data, PDF metadata, custom attributes, etc.)
|
||||||
pub source_metadata: Option<serde_json::Value>,
|
pub source_metadata: Option<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,8 @@ pub struct DocumentResponse {
|
||||||
pub filename: String,
|
pub filename: String,
|
||||||
/// Original filename when uploaded
|
/// Original filename when uploaded
|
||||||
pub original_filename: String,
|
pub original_filename: String,
|
||||||
|
/// File path where the document is stored
|
||||||
|
pub file_path: String,
|
||||||
/// File size in bytes
|
/// File size in bytes
|
||||||
pub file_size: i64,
|
pub file_size: i64,
|
||||||
/// MIME type of the file
|
/// MIME type of the file
|
||||||
|
|
@ -45,6 +47,13 @@ pub struct DocumentResponse {
|
||||||
pub labels: Vec<crate::routes::labels::Label>,
|
pub labels: Vec<crate::routes::labels::Label>,
|
||||||
/// When the document was created
|
/// When the document was created
|
||||||
pub created_at: DateTime<Utc>,
|
pub created_at: DateTime<Utc>,
|
||||||
|
/// When the document was last updated
|
||||||
|
pub updated_at: DateTime<Utc>,
|
||||||
|
/// User who uploaded/owns the document
|
||||||
|
pub user_id: Uuid,
|
||||||
|
/// SHA256 hash of the file content
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub file_hash: Option<String>,
|
||||||
/// Whether OCR text has been extracted
|
/// Whether OCR text has been extracted
|
||||||
pub has_ocr_text: bool,
|
pub has_ocr_text: bool,
|
||||||
/// OCR confidence score (0-100, higher is better)
|
/// OCR confidence score (0-100, higher is better)
|
||||||
|
|
@ -61,7 +70,25 @@ pub struct DocumentResponse {
|
||||||
/// Original file modification timestamp from source system
|
/// Original file modification timestamp from source system
|
||||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
pub original_modified_at: Option<DateTime<Utc>>,
|
pub original_modified_at: Option<DateTime<Utc>>,
|
||||||
/// Additional metadata from source system (permissions, attributes, etc.)
|
/// Original path where the file was located (from source system)
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub source_path: Option<String>,
|
||||||
|
/// Type of source where file was ingested from
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub source_type: Option<String>,
|
||||||
|
/// UUID of the source system/configuration
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub source_id: Option<Uuid>,
|
||||||
|
/// File permissions from source system (Unix mode bits)
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub file_permissions: Option<i32>,
|
||||||
|
/// File owner from source system
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub file_owner: Option<String>,
|
||||||
|
/// File group from source system
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
|
pub file_group: Option<String>,
|
||||||
|
/// Additional metadata from source system (EXIF data, PDF metadata, custom attributes, etc.)
|
||||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||||
pub source_metadata: Option<serde_json::Value>,
|
pub source_metadata: Option<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
@ -236,11 +263,15 @@ impl From<Document> for DocumentResponse {
|
||||||
id: doc.id,
|
id: doc.id,
|
||||||
filename: doc.filename,
|
filename: doc.filename,
|
||||||
original_filename: doc.original_filename,
|
original_filename: doc.original_filename,
|
||||||
|
file_path: doc.file_path,
|
||||||
file_size: doc.file_size,
|
file_size: doc.file_size,
|
||||||
mime_type: doc.mime_type,
|
mime_type: doc.mime_type,
|
||||||
tags: doc.tags,
|
tags: doc.tags,
|
||||||
labels: Vec::new(), // Labels will be populated separately where needed
|
labels: Vec::new(), // Labels will be populated separately where needed
|
||||||
created_at: doc.created_at,
|
created_at: doc.created_at,
|
||||||
|
updated_at: doc.updated_at,
|
||||||
|
user_id: doc.user_id,
|
||||||
|
file_hash: doc.file_hash,
|
||||||
has_ocr_text: doc.ocr_text.is_some(),
|
has_ocr_text: doc.ocr_text.is_some(),
|
||||||
ocr_confidence: doc.ocr_confidence,
|
ocr_confidence: doc.ocr_confidence,
|
||||||
ocr_word_count: doc.ocr_word_count,
|
ocr_word_count: doc.ocr_word_count,
|
||||||
|
|
@ -248,6 +279,12 @@ impl From<Document> for DocumentResponse {
|
||||||
ocr_status: doc.ocr_status,
|
ocr_status: doc.ocr_status,
|
||||||
original_created_at: doc.original_created_at,
|
original_created_at: doc.original_created_at,
|
||||||
original_modified_at: doc.original_modified_at,
|
original_modified_at: doc.original_modified_at,
|
||||||
|
source_path: doc.source_path,
|
||||||
|
source_type: doc.source_type,
|
||||||
|
source_id: doc.source_id,
|
||||||
|
file_permissions: doc.file_permissions,
|
||||||
|
file_owner: doc.file_owner,
|
||||||
|
file_group: doc.file_group,
|
||||||
source_metadata: doc.source_metadata,
|
source_metadata: doc.source_metadata,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,10 @@ pub async fn upload_document(
|
||||||
deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
|
deduplication_policy: crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
|
||||||
original_created_at: None,
|
original_created_at: None,
|
||||||
original_modified_at: None,
|
original_modified_at: None,
|
||||||
|
source_path: None, // Web uploads don't have a source path
|
||||||
|
file_permissions: None, // Web uploads don't preserve permissions
|
||||||
|
file_owner: None, // Web uploads don't preserve owner
|
||||||
|
file_group: None, // Web uploads don't preserve group
|
||||||
source_metadata: None,
|
source_metadata: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -159,6 +159,12 @@ impl FileService {
|
||||||
file_hash: Option<String>,
|
file_hash: Option<String>,
|
||||||
original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||||
original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||||
|
source_path: Option<String>,
|
||||||
|
source_type: Option<String>,
|
||||||
|
source_id: Option<Uuid>,
|
||||||
|
file_permissions: Option<i32>,
|
||||||
|
file_owner: Option<String>,
|
||||||
|
file_group: Option<String>,
|
||||||
source_metadata: Option<serde_json::Value>,
|
source_metadata: Option<serde_json::Value>,
|
||||||
) -> Document {
|
) -> Document {
|
||||||
Document {
|
Document {
|
||||||
|
|
@ -185,6 +191,12 @@ impl FileService {
|
||||||
file_hash,
|
file_hash,
|
||||||
original_created_at,
|
original_created_at,
|
||||||
original_modified_at,
|
original_modified_at,
|
||||||
|
source_path,
|
||||||
|
source_type,
|
||||||
|
source_id,
|
||||||
|
file_permissions,
|
||||||
|
file_owner,
|
||||||
|
file_group,
|
||||||
source_metadata,
|
source_metadata,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue