feat(server/client): add metadata to file view
This commit is contained in:
parent
1695787f1d
commit
9e43df2fbe
|
|
@ -0,0 +1,232 @@
|
|||
import React, { useState } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Typography,
|
||||
Accordion,
|
||||
AccordionSummary,
|
||||
AccordionDetails,
|
||||
Chip,
|
||||
Grid,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
ExpandMore as ExpandMoreIcon,
|
||||
Security as PermissionsIcon,
|
||||
Person as OwnerIcon,
|
||||
Group as GroupIcon,
|
||||
Storage as StorageIcon,
|
||||
Info as InfoIcon,
|
||||
} from '@mui/icons-material';
|
||||
|
||||
interface MetadataDisplayProps {
|
||||
metadata: any;
|
||||
title?: string;
|
||||
compact?: boolean;
|
||||
}
|
||||
|
||||
const MetadataDisplay: React.FC<MetadataDisplayProps> = ({
|
||||
metadata,
|
||||
title = "Source Metadata",
|
||||
compact = false,
|
||||
}) => {
|
||||
const [expanded, setExpanded] = useState(!compact);
|
||||
|
||||
if (!metadata || Object.keys(metadata).length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const formatValue = (key: string, value: any): React.ReactNode => {
|
||||
// Handle special metadata fields with better formatting
|
||||
if (key === 'permissions' && typeof value === 'number') {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<PermissionsIcon sx={{ fontSize: 16, color: 'primary.main' }} />
|
||||
<Typography variant="body2" component="span">
|
||||
{value.toString(8)} (octal)
|
||||
</Typography>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
if (key === 'owner' || key === 'uid') {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<OwnerIcon sx={{ fontSize: 16, color: 'primary.main' }} />
|
||||
<Typography variant="body2" component="span">
|
||||
{value}
|
||||
</Typography>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
if (key === 'group' || key === 'gid') {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<GroupIcon sx={{ fontSize: 16, color: 'primary.main' }} />
|
||||
<Typography variant="body2" component="span">
|
||||
{value}
|
||||
</Typography>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
if (key === 'storage_class' || key === 'region') {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<StorageIcon sx={{ fontSize: 16, color: 'primary.main' }} />
|
||||
<Typography variant="body2" component="span">
|
||||
{value}
|
||||
</Typography>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
// Handle arrays
|
||||
if (Array.isArray(value)) {
|
||||
return (
|
||||
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
|
||||
{value.map((item, index) => (
|
||||
<Chip
|
||||
key={index}
|
||||
label={String(item)}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
/>
|
||||
))}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
// Handle objects
|
||||
if (typeof value === 'object' && value !== null) {
|
||||
return (
|
||||
<Box sx={{
|
||||
backgroundColor: 'grey.100',
|
||||
p: 1,
|
||||
borderRadius: 1,
|
||||
fontFamily: 'monospace',
|
||||
fontSize: '0.75rem',
|
||||
maxHeight: '100px',
|
||||
overflow: 'auto'
|
||||
}}>
|
||||
<pre style={{ margin: 0, whiteSpace: 'pre-wrap' }}>
|
||||
{JSON.stringify(value, null, 2)}
|
||||
</pre>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
// Handle boolean values
|
||||
if (typeof value === 'boolean') {
|
||||
return (
|
||||
<Chip
|
||||
label={value ? 'Yes' : 'No'}
|
||||
color={value ? 'success' : 'default'}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
// Handle dates
|
||||
if (typeof value === 'string' && (
|
||||
key.includes('date') ||
|
||||
key.includes('time') ||
|
||||
key.includes('created') ||
|
||||
key.includes('modified')
|
||||
)) {
|
||||
try {
|
||||
const date = new Date(value);
|
||||
if (!isNaN(date.getTime())) {
|
||||
return (
|
||||
<Typography variant="body2" component="span">
|
||||
{date.toLocaleString()}
|
||||
</Typography>
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// Fall through to default handling
|
||||
}
|
||||
}
|
||||
|
||||
// Default: display as string
|
||||
return (
|
||||
<Typography variant="body2" component="span">
|
||||
{String(value)}
|
||||
</Typography>
|
||||
);
|
||||
};
|
||||
|
||||
const formatKeyName = (key: string): string => {
|
||||
// Convert snake_case and camelCase to Title Case
|
||||
return key
|
||||
.replace(/([a-z])([A-Z])/g, '$1 $2') // camelCase to spaces
|
||||
.replace(/_/g, ' ') // snake_case to spaces
|
||||
.replace(/\b\w/g, (letter) => letter.toUpperCase()); // Title Case
|
||||
};
|
||||
|
||||
const renderMetadata = () => {
|
||||
return (
|
||||
<Grid container spacing={2}>
|
||||
{Object.entries(metadata).map(([key, value]) => (
|
||||
<Grid item xs={12} sm={6} key={key}>
|
||||
<Box sx={{ mb: 1 }}>
|
||||
<Typography
|
||||
variant="caption"
|
||||
color="text.secondary"
|
||||
sx={{ fontWeight: 600, textTransform: 'uppercase', letterSpacing: 0.5 }}
|
||||
>
|
||||
{formatKeyName(key)}
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ pl: 1 }}>
|
||||
{formatValue(key, value)}
|
||||
</Box>
|
||||
</Grid>
|
||||
))}
|
||||
</Grid>
|
||||
);
|
||||
};
|
||||
|
||||
if (compact) {
|
||||
return (
|
||||
<Accordion expanded={expanded} onChange={(_, isExpanded) => setExpanded(isExpanded)}>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
sx={{
|
||||
backgroundColor: 'grey.50',
|
||||
'&:hover': { backgroundColor: 'grey.100' }
|
||||
}}
|
||||
>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<InfoIcon sx={{ fontSize: 20, color: 'primary.main' }} />
|
||||
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
|
||||
{title}
|
||||
</Typography>
|
||||
<Chip
|
||||
label={`${Object.keys(metadata).length} fields`}
|
||||
size="small"
|
||||
variant="outlined"
|
||||
/>
|
||||
</Box>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
{renderMetadata()}
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 2 }}>
|
||||
<InfoIcon sx={{ color: 'primary.main' }} />
|
||||
<Typography variant="subtitle2" sx={{ fontWeight: 600 }}>
|
||||
{title}
|
||||
</Typography>
|
||||
</Box>
|
||||
{renderMetadata()}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default MetadataDisplay;
|
||||
|
|
@ -35,11 +35,16 @@ import {
|
|||
Search as SearchIcon,
|
||||
Edit as EditIcon,
|
||||
PhotoFilter as ProcessedImageIcon,
|
||||
Source as SourceIcon,
|
||||
AccessTime as AccessTimeIcon,
|
||||
Create as CreateIcon,
|
||||
Info as InfoIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { documentService, OcrResponse } from '../services/api';
|
||||
import DocumentViewer from '../components/DocumentViewer';
|
||||
import LabelSelector from '../components/Labels/LabelSelector';
|
||||
import { type LabelData } from '../components/Labels/Label';
|
||||
import MetadataDisplay from '../components/MetadataDisplay';
|
||||
import api from '../services/api';
|
||||
|
||||
interface Document {
|
||||
|
|
@ -51,6 +56,9 @@ interface Document {
|
|||
created_at: string;
|
||||
has_ocr_text?: boolean;
|
||||
tags?: string[];
|
||||
original_created_at?: string;
|
||||
original_modified_at?: string;
|
||||
source_metadata?: any;
|
||||
}
|
||||
|
||||
const DocumentDetailsPage: React.FC = () => {
|
||||
|
|
@ -500,6 +508,55 @@ const DocumentDetailsPage: React.FC = () => {
|
|||
</Paper>
|
||||
</Grid>
|
||||
|
||||
{/* Source Metadata Section */}
|
||||
{(document.original_created_at || document.original_modified_at || document.source_metadata) && (
|
||||
<>
|
||||
{document.original_created_at && (
|
||||
<Grid item xs={12} sm={6}>
|
||||
<Paper sx={{ p: 2, height: '100%' }}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', mb: 1 }}>
|
||||
<CreateIcon color="primary" sx={{ mr: 1 }} />
|
||||
<Typography variant="subtitle2" color="text.secondary">
|
||||
Original Created
|
||||
</Typography>
|
||||
</Box>
|
||||
<Typography variant="body1" sx={{ fontWeight: 500 }}>
|
||||
{formatDate(document.original_created_at)}
|
||||
</Typography>
|
||||
</Paper>
|
||||
</Grid>
|
||||
)}
|
||||
|
||||
{document.original_modified_at && (
|
||||
<Grid item xs={12} sm={6}>
|
||||
<Paper sx={{ p: 2, height: '100%' }}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', mb: 1 }}>
|
||||
<AccessTimeIcon color="primary" sx={{ mr: 1 }} />
|
||||
<Typography variant="subtitle2" color="text.secondary">
|
||||
Original Modified
|
||||
</Typography>
|
||||
</Box>
|
||||
<Typography variant="body1" sx={{ fontWeight: 500 }}>
|
||||
{formatDate(document.original_modified_at)}
|
||||
</Typography>
|
||||
</Paper>
|
||||
</Grid>
|
||||
)}
|
||||
|
||||
{document.source_metadata && Object.keys(document.source_metadata).length > 0 && (
|
||||
<Grid item xs={12}>
|
||||
<Paper sx={{ p: 2 }}>
|
||||
<MetadataDisplay
|
||||
metadata={document.source_metadata}
|
||||
title="Source Metadata"
|
||||
compact={false}
|
||||
/>
|
||||
</Paper>
|
||||
</Grid>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
|
||||
{document.tags && document.tags.length > 0 && (
|
||||
<Grid item xs={12}>
|
||||
<Paper sx={{ p: 2 }}>
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ import { format } from 'date-fns';
|
|||
import { api, documentService, queueService } from '../services/api';
|
||||
import DocumentViewer from '../components/DocumentViewer';
|
||||
import FailedDocumentViewer from '../components/FailedDocumentViewer';
|
||||
import MetadataDisplay from '../components/MetadataDisplay';
|
||||
|
||||
interface FailedDocument {
|
||||
id: string;
|
||||
|
|
@ -78,6 +79,9 @@ interface FailedDocument {
|
|||
ocr_word_count?: number;
|
||||
failure_reason: string;
|
||||
error_message?: string;
|
||||
original_created_at?: string;
|
||||
original_modified_at?: string;
|
||||
source_metadata?: any;
|
||||
}
|
||||
|
||||
interface FailureCategory {
|
||||
|
|
@ -1989,6 +1993,39 @@ const DocumentManagementPage: React.FC = () => {
|
|||
sx={{ mb: 2 }}
|
||||
/>
|
||||
|
||||
{/* Source Metadata Section */}
|
||||
{selectedDocument.original_created_at && (
|
||||
<>
|
||||
<Typography variant="body2" color="text.secondary" component="div">
|
||||
<strong>Original Created:</strong>
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||
{format(new Date(selectedDocument.original_created_at), 'PPpp')}
|
||||
</Typography>
|
||||
</>
|
||||
)}
|
||||
|
||||
{selectedDocument.original_modified_at && (
|
||||
<>
|
||||
<Typography variant="body2" color="text.secondary" component="div">
|
||||
<strong>Original Modified:</strong>
|
||||
</Typography>
|
||||
<Typography variant="body2" sx={{ mb: 2 }}>
|
||||
{format(new Date(selectedDocument.original_modified_at), 'PPpp')}
|
||||
</Typography>
|
||||
</>
|
||||
)}
|
||||
|
||||
{selectedDocument.source_metadata && Object.keys(selectedDocument.source_metadata).length > 0 && (
|
||||
<Box sx={{ mt: 2, mb: 2 }}>
|
||||
<MetadataDisplay
|
||||
metadata={selectedDocument.source_metadata}
|
||||
title="Source Metadata"
|
||||
compact={true}
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Typography variant="body2" color="text.secondary" component="div" sx={{ mt: 2 }}>
|
||||
<strong>Retry Count:</strong>
|
||||
</Typography>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
-- Add metadata preservation fields to documents table
|
||||
ALTER TABLE documents
|
||||
ADD COLUMN original_created_at TIMESTAMPTZ,
|
||||
ADD COLUMN original_modified_at TIMESTAMPTZ,
|
||||
ADD COLUMN source_metadata JSONB;
|
||||
|
||||
-- Add comment to explain fields
|
||||
COMMENT ON COLUMN documents.original_created_at IS 'Original file creation timestamp from source system';
|
||||
COMMENT ON COLUMN documents.original_modified_at IS 'Original file modification timestamp from source system';
|
||||
COMMENT ON COLUMN documents.source_metadata IS 'Additional metadata from source system (permissions, attributes, EXIF data, etc.)';
|
||||
|
||||
-- Create index on source_metadata for efficient JSONB queries
|
||||
CREATE INDEX idx_documents_source_metadata ON documents USING gin (source_metadata);
|
||||
|
||||
-- Note: We cannot reliably populate original_created_at and original_modified_at
|
||||
-- for existing documents as we don't have this information stored.
|
||||
-- These fields will remain NULL for existing documents, which is correct.
|
||||
|
|
@ -10,9 +10,9 @@ impl Database {
|
|||
pub async fn create_document(&self, document: Document) -> Result<Document> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22)
|
||||
RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
"#
|
||||
)
|
||||
.bind(document.id)
|
||||
|
|
@ -34,6 +34,9 @@ impl Database {
|
|||
.bind(document.updated_at)
|
||||
.bind(document.user_id)
|
||||
.bind(&document.file_hash)
|
||||
.bind(document.original_created_at)
|
||||
.bind(document.original_modified_at)
|
||||
.bind(&document.source_metadata)
|
||||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
|
||||
|
|
@ -57,6 +60,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +70,7 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see all documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -72,7 +78,7 @@ impl Database {
|
|||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -117,6 +123,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -129,7 +138,7 @@ impl Database {
|
|||
// Admin with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -146,7 +155,7 @@ impl Database {
|
|||
// Admin without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
|
|
@ -161,7 +170,7 @@ impl Database {
|
|||
// Regular user with OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3 AND ocr_status = $4
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -179,7 +188,7 @@ impl Database {
|
|||
// Regular user without OCR filter
|
||||
sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $3
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -216,6 +225,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -268,7 +280,7 @@ impl Database {
|
|||
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -303,6 +315,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -312,7 +327,7 @@ impl Database {
|
|||
pub async fn find_documents_by_filename(&self, filename: &str) -> Result<Vec<Document>> {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE filename = $1 OR original_filename = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -344,6 +359,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -353,7 +371,7 @@ impl Database {
|
|||
pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec<Document>, i64)> {
|
||||
let mut query_builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "#
|
||||
);
|
||||
|
||||
|
|
@ -415,6 +433,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -456,7 +477,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -499,7 +520,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -645,7 +666,7 @@ impl Database {
|
|||
// Use trigram similarity for substring matching
|
||||
let mut builder = QueryBuilder::new(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
similarity(filename, "#
|
||||
);
|
||||
|
|
@ -684,7 +705,7 @@ impl Database {
|
|||
|
||||
let mut builder = QueryBuilder::new(&format!(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash,
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata,
|
||||
GREATEST(
|
||||
CASE WHEN filename ILIKE '%' || "#
|
||||
));
|
||||
|
|
@ -993,6 +1014,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -1081,14 +1105,14 @@ impl Database {
|
|||
let query = if user_role == crate::models::UserRole::Admin {
|
||||
// Admins can see any document
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1
|
||||
"#
|
||||
} else {
|
||||
// Regular users can only see their own documents
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE id = $1 AND user_id = $2
|
||||
"#
|
||||
|
|
@ -1128,6 +1152,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})),
|
||||
None => Ok(None),
|
||||
}
|
||||
|
|
@ -1137,7 +1164,7 @@ impl Database {
|
|||
pub async fn get_document_by_user_and_hash(&self, user_id: Uuid, file_hash: &str) -> Result<Option<Document>> {
|
||||
let row = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE user_id = $1 AND file_hash = $2
|
||||
LIMIT 1
|
||||
|
|
@ -1169,6 +1196,9 @@ impl Database {
|
|||
updated_at: row.get("updated_at"),
|
||||
user_id: row.get("user_id"),
|
||||
file_hash: row.get("file_hash"),
|
||||
original_created_at: row.get("original_created_at"),
|
||||
original_modified_at: row.get("original_modified_at"),
|
||||
source_metadata: row.get("source_metadata"),
|
||||
})),
|
||||
None => Ok(None),
|
||||
}
|
||||
|
|
@ -1393,6 +1423,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
})
|
||||
} else {
|
||||
let row = sqlx::query(
|
||||
|
|
@ -1427,6 +1460,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
})
|
||||
};
|
||||
|
||||
|
|
@ -1470,6 +1506,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
|
|
@ -1504,6 +1543,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
|
|
@ -1515,7 +1557,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1545,11 +1587,14 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_confidence IS NOT NULL AND ocr_confidence < $1 AND user_id = $2
|
||||
ORDER BY ocr_confidence ASC, created_at DESC
|
||||
|
|
@ -1580,6 +1625,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
|
|
@ -1591,7 +1639,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1620,11 +1668,14 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_status = 'failed' OR (ocr_confidence IS NULL AND ocr_status != 'pending' AND ocr_status != 'processing')) AND user_id = $1
|
||||
ORDER BY created_at DESC
|
||||
|
|
@ -1654,6 +1705,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
|
|
@ -1665,7 +1719,7 @@ impl Database {
|
|||
let documents = if user_role == crate::models::UserRole::Admin {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE (ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
|
|
@ -1699,11 +1753,14 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
} else {
|
||||
let rows = sqlx::query(
|
||||
r#"
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash
|
||||
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash, original_created_at, original_modified_at, source_metadata
|
||||
FROM documents
|
||||
WHERE ((ocr_confidence IS NOT NULL AND ocr_confidence < $1)
|
||||
OR ocr_status = 'failed'
|
||||
|
|
@ -1739,6 +1796,9 @@ impl Database {
|
|||
updated_at: r.get("updated_at"),
|
||||
user_id: r.get("user_id"),
|
||||
file_hash: r.get("file_hash"),
|
||||
original_created_at: r.get("original_created_at"),
|
||||
original_modified_at: r.get("original_modified_at"),
|
||||
source_metadata: r.get("source_metadata"),
|
||||
}).collect()
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,10 @@
|
|||
use uuid::Uuid;
|
||||
use sha2::{Digest, Sha256};
|
||||
use tracing::{debug, info, warn};
|
||||
use chrono::Utc;
|
||||
use serde_json;
|
||||
|
||||
use crate::models::Document;
|
||||
use crate::models::{Document, FileInfo};
|
||||
use crate::db::Database;
|
||||
use crate::services::file_service::FileService;
|
||||
|
||||
|
|
@ -49,6 +51,10 @@ pub struct DocumentIngestionRequest {
|
|||
/// Optional source identifier for tracking
|
||||
pub source_type: Option<String>,
|
||||
pub source_id: Option<Uuid>,
|
||||
/// Optional metadata from source file system
|
||||
pub original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
pub source_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub struct DocumentIngestionService {
|
||||
|
|
@ -61,6 +67,47 @@ impl DocumentIngestionService {
|
|||
Self { db, file_service }
|
||||
}
|
||||
|
||||
/// Extract metadata from FileInfo for storage in document
|
||||
fn extract_metadata_from_file_info(file_info: &FileInfo) -> (Option<chrono::DateTime<chrono::Utc>>, Option<chrono::DateTime<chrono::Utc>>, Option<serde_json::Value>) {
|
||||
let original_created_at = file_info.created_at;
|
||||
let original_modified_at = file_info.last_modified;
|
||||
|
||||
// Build comprehensive metadata object
|
||||
let mut metadata = serde_json::Map::new();
|
||||
|
||||
// Add permissions if available
|
||||
if let Some(perms) = file_info.permissions {
|
||||
metadata.insert("permissions".to_string(), serde_json::Value::Number(perms.into()));
|
||||
}
|
||||
|
||||
// Add owner/group info
|
||||
if let Some(ref owner) = file_info.owner {
|
||||
metadata.insert("owner".to_string(), serde_json::Value::String(owner.clone()));
|
||||
}
|
||||
|
||||
if let Some(ref group) = file_info.group {
|
||||
metadata.insert("group".to_string(), serde_json::Value::String(group.clone()));
|
||||
}
|
||||
|
||||
// Add source path
|
||||
metadata.insert("source_path".to_string(), serde_json::Value::String(file_info.path.clone()));
|
||||
|
||||
// Merge any additional metadata from the source
|
||||
if let Some(ref source_meta) = file_info.metadata {
|
||||
if let serde_json::Value::Object(source_map) = source_meta {
|
||||
metadata.extend(source_map.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let final_metadata = if metadata.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(serde_json::Value::Object(metadata))
|
||||
};
|
||||
|
||||
(original_created_at, original_modified_at, final_metadata)
|
||||
}
|
||||
|
||||
/// Unified document ingestion with configurable deduplication policy
|
||||
pub async fn ingest_document(&self, request: DocumentIngestionRequest) -> Result<IngestionResult, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let file_hash = self.calculate_file_hash(&request.file_data);
|
||||
|
|
@ -156,6 +203,9 @@ impl DocumentIngestionService {
|
|||
&request.mime_type,
|
||||
request.user_id,
|
||||
Some(file_hash.clone()),
|
||||
request.original_created_at,
|
||||
request.original_modified_at,
|
||||
request.source_metadata,
|
||||
);
|
||||
|
||||
let saved_document = match self.db.create_document(document).await {
|
||||
|
|
@ -235,6 +285,36 @@ impl DocumentIngestionService {
|
|||
format!("{:x}", result)
|
||||
}
|
||||
|
||||
/// Ingest document from source with FileInfo metadata
|
||||
pub async fn ingest_from_file_info(
|
||||
&self,
|
||||
file_info: &FileInfo,
|
||||
file_data: Vec<u8>,
|
||||
user_id: Uuid,
|
||||
deduplication_policy: DeduplicationPolicy,
|
||||
source_type: &str,
|
||||
source_id: Option<Uuid>,
|
||||
) -> Result<IngestionResult, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let (original_created_at, original_modified_at, source_metadata) =
|
||||
Self::extract_metadata_from_file_info(file_info);
|
||||
|
||||
let request = DocumentIngestionRequest {
|
||||
filename: file_info.name.clone(),
|
||||
original_filename: file_info.name.clone(),
|
||||
file_data,
|
||||
mime_type: file_info.mime_type.clone(),
|
||||
user_id,
|
||||
deduplication_policy,
|
||||
source_type: Some(source_type.to_string()),
|
||||
source_id,
|
||||
original_created_at,
|
||||
original_modified_at,
|
||||
source_metadata,
|
||||
};
|
||||
|
||||
self.ingest_document(request).await
|
||||
}
|
||||
|
||||
/// Convenience method for direct uploads (maintains backward compatibility)
|
||||
pub async fn ingest_upload(
|
||||
&self,
|
||||
|
|
@ -252,6 +332,9 @@ impl DocumentIngestionService {
|
|||
deduplication_policy: DeduplicationPolicy::AllowDuplicateContent, // Fixed behavior for uploads
|
||||
source_type: Some("direct_upload".to_string()),
|
||||
source_id: None,
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
self.ingest_document(request).await
|
||||
|
|
@ -276,6 +359,9 @@ impl DocumentIngestionService {
|
|||
deduplication_policy: DeduplicationPolicy::Skip, // Skip duplicates for source sync
|
||||
source_type: Some(source_type.to_string()),
|
||||
source_id: Some(source_id),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
self.ingest_document(request).await
|
||||
|
|
@ -299,6 +385,9 @@ impl DocumentIngestionService {
|
|||
deduplication_policy: DeduplicationPolicy::TrackAsDuplicate, // Track duplicates for WebDAV
|
||||
source_type: Some("webdav".to_string()),
|
||||
source_id: Some(webdav_source_id),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
self.ingest_document(request).await
|
||||
|
|
@ -321,6 +410,9 @@ impl DocumentIngestionService {
|
|||
deduplication_policy: DeduplicationPolicy::Skip, // Skip duplicates for batch operations
|
||||
source_type: Some("batch_ingest".to_string()),
|
||||
source_id: None,
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
self.ingest_document(request).await
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
|
|||
use sqlx::FromRow;
|
||||
use uuid::Uuid;
|
||||
use utoipa::{ToSchema, IntoParams};
|
||||
use serde_json;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, ToSchema)]
|
||||
pub enum UserRole {
|
||||
|
|
@ -133,6 +134,12 @@ pub struct Document {
|
|||
pub updated_at: DateTime<Utc>,
|
||||
pub user_id: Uuid,
|
||||
pub file_hash: Option<String>,
|
||||
/// Original file creation timestamp from source system
|
||||
pub original_created_at: Option<DateTime<Utc>>,
|
||||
/// Original file modification timestamp from source system
|
||||
pub original_modified_at: Option<DateTime<Utc>>,
|
||||
/// Additional metadata from source system (permissions, attributes, EXIF data, etc.)
|
||||
pub source_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, ToSchema)]
|
||||
|
|
@ -307,6 +314,15 @@ pub struct DocumentResponse {
|
|||
pub ocr_processing_time_ms: Option<i32>,
|
||||
/// Current status of OCR processing (pending, processing, completed, failed)
|
||||
pub ocr_status: Option<String>,
|
||||
/// Original file creation timestamp from source system
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_created_at: Option<DateTime<Utc>>,
|
||||
/// Original file modification timestamp from source system
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub original_modified_at: Option<DateTime<Utc>>,
|
||||
/// Additional metadata from source system (permissions, attributes, etc.)
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub source_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, IntoParams)]
|
||||
|
|
@ -447,6 +463,9 @@ impl From<Document> for DocumentResponse {
|
|||
ocr_word_count: doc.ocr_word_count,
|
||||
ocr_processing_time_ms: doc.ocr_processing_time_ms,
|
||||
ocr_status: doc.ocr_status,
|
||||
original_created_at: doc.original_created_at,
|
||||
original_modified_at: doc.original_modified_at,
|
||||
source_metadata: doc.source_metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -900,6 +919,16 @@ pub struct FileInfo {
|
|||
pub last_modified: Option<DateTime<Utc>>,
|
||||
pub etag: String,
|
||||
pub is_directory: bool,
|
||||
/// Original file creation time from source system
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
/// File permissions (Unix mode bits or similar)
|
||||
pub permissions: Option<u32>,
|
||||
/// File owner (username or uid)
|
||||
pub owner: Option<String>,
|
||||
/// File group (groupname or gid)
|
||||
pub group: Option<String>,
|
||||
/// Additional metadata from source (EXIF, PDF metadata, custom attributes, etc.)
|
||||
pub metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, ToSchema)]
|
||||
|
|
|
|||
|
|
@ -116,6 +116,9 @@ async fn get_document_by_id(
|
|||
ocr_word_count: document.ocr_word_count,
|
||||
ocr_processing_time_ms: document.ocr_processing_time_ms,
|
||||
ocr_status: document.ocr_status,
|
||||
original_created_at: document.original_created_at,
|
||||
original_modified_at: document.original_modified_at,
|
||||
source_metadata: document.source_metadata,
|
||||
};
|
||||
|
||||
Ok(Json(response))
|
||||
|
|
|
|||
|
|
@ -283,24 +283,25 @@ async fn process_single_file(
|
|||
|
||||
let result = if let Some(source_id) = webdav_source_id {
|
||||
ingestion_service
|
||||
.ingest_from_webdav(
|
||||
&file_info.name,
|
||||
.ingest_from_file_info(
|
||||
&file_info,
|
||||
file_data,
|
||||
&file_info.mime_type,
|
||||
user_id,
|
||||
source_id,
|
||||
crate::ingestion::document_ingestion::DeduplicationPolicy::TrackAsDuplicate,
|
||||
"webdav_sync",
|
||||
Some(source_id),
|
||||
)
|
||||
.await
|
||||
} else {
|
||||
// Fallback for backward compatibility - treat as generic WebDAV sync
|
||||
ingestion_service
|
||||
.ingest_from_source(
|
||||
&file_info.name,
|
||||
.ingest_from_file_info(
|
||||
&file_info,
|
||||
file_data,
|
||||
&file_info.mime_type,
|
||||
user_id,
|
||||
uuid::Uuid::new_v4(), // Generate a temporary ID for tracking
|
||||
crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
|
||||
"webdav_sync",
|
||||
Some(uuid::Uuid::new_v4()), // Generate a temporary ID for tracking
|
||||
)
|
||||
.await
|
||||
};
|
||||
|
|
|
|||
|
|
@ -533,13 +533,13 @@ impl SourceSyncService {
|
|||
let ingestion_service = DocumentIngestionService::new(state.db.clone(), file_service);
|
||||
|
||||
let result = ingestion_service
|
||||
.ingest_from_source(
|
||||
&file_info.name,
|
||||
.ingest_from_file_info(
|
||||
file_info,
|
||||
file_data,
|
||||
&file_info.mime_type,
|
||||
user_id,
|
||||
source_id,
|
||||
crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
|
||||
"source_sync",
|
||||
Some(source_id),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow!("Document ingestion failed for {}: {}", file_info.name, e))?;
|
||||
|
|
@ -637,13 +637,13 @@ impl SourceSyncService {
|
|||
let ingestion_service = DocumentIngestionService::new(state.db.clone(), file_service);
|
||||
|
||||
let result = ingestion_service
|
||||
.ingest_from_source(
|
||||
&file_info.name,
|
||||
.ingest_from_file_info(
|
||||
file_info,
|
||||
file_data,
|
||||
&file_info.mime_type,
|
||||
user_id,
|
||||
source_id,
|
||||
crate::ingestion::document_ingestion::DeduplicationPolicy::Skip,
|
||||
"source_sync",
|
||||
Some(source_id),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow!("Document ingestion failed for {}: {}", file_info.name, e))?;
|
||||
|
|
|
|||
|
|
@ -158,6 +158,9 @@ impl FileService {
|
|||
mime_type: &str,
|
||||
user_id: Uuid,
|
||||
file_hash: Option<String>,
|
||||
original_created_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
original_modified_at: Option<chrono::DateTime<chrono::Utc>>,
|
||||
source_metadata: Option<serde_json::Value>,
|
||||
) -> Document {
|
||||
Document {
|
||||
id: Uuid::new_v4(),
|
||||
|
|
@ -179,6 +182,9 @@ impl FileService {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash,
|
||||
original_created_at,
|
||||
original_modified_at,
|
||||
source_metadata,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use chrono::{DateTime, Utc};
|
|||
use tracing::{debug, info, warn};
|
||||
use walkdir::WalkDir;
|
||||
use sha2::{Sha256, Digest};
|
||||
use serde_json;
|
||||
|
||||
use crate::models::{FileInfo, LocalFolderSourceConfig};
|
||||
|
||||
|
|
@ -89,6 +90,14 @@ impl LocalFolderService {
|
|||
DateTime::from_timestamp(duration.as_secs() as i64, 0)
|
||||
});
|
||||
|
||||
// Try to get creation time (not available on all systems)
|
||||
let created_time = metadata.created()
|
||||
.ok()
|
||||
.and_then(|time| {
|
||||
let duration = time.duration_since(std::time::UNIX_EPOCH).ok()?;
|
||||
DateTime::from_timestamp(duration.as_secs() as i64, 0)
|
||||
});
|
||||
|
||||
let file_name = path.file_name()
|
||||
.and_then(|name| name.to_str())
|
||||
.unwrap_or("unknown")
|
||||
|
|
@ -100,6 +109,34 @@ impl LocalFolderService {
|
|||
// Determine MIME type based on extension
|
||||
let mime_type = Self::get_mime_type(&extension);
|
||||
|
||||
// Extract file permissions and ownership info
|
||||
#[cfg(unix)]
|
||||
let (permissions, owner, group) = {
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
(
|
||||
Some(metadata.mode() & 0o777), // File mode bits (permissions)
|
||||
Some(metadata.uid().to_string()), // User ID
|
||||
Some(metadata.gid().to_string()), // Group ID
|
||||
)
|
||||
};
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let (permissions, owner, group) = (None, None, None);
|
||||
|
||||
// Prepare additional metadata
|
||||
let mut additional_metadata = serde_json::Map::new();
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
additional_metadata.insert("inode".to_string(), serde_json::Value::Number(metadata.ino().into()));
|
||||
additional_metadata.insert("nlinks".to_string(), serde_json::Value::Number(metadata.nlink().into()));
|
||||
additional_metadata.insert("device".to_string(), serde_json::Value::Number(metadata.dev().into()));
|
||||
}
|
||||
|
||||
// Add file attributes
|
||||
additional_metadata.insert("readonly".to_string(), serde_json::Value::Bool(metadata.permissions().readonly()));
|
||||
|
||||
let file_info = FileInfo {
|
||||
path: path.to_string_lossy().to_string(),
|
||||
name: file_name,
|
||||
|
|
@ -108,6 +145,11 @@ impl LocalFolderService {
|
|||
last_modified: modified_time,
|
||||
etag,
|
||||
is_directory: false,
|
||||
created_at: created_time,
|
||||
permissions,
|
||||
owner,
|
||||
group,
|
||||
metadata: if additional_metadata.is_empty() { None } else { Some(serde_json::Value::Object(additional_metadata)) },
|
||||
};
|
||||
|
||||
files.push(file_info);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use serde_json;
|
||||
|
||||
#[cfg(feature = "s3")]
|
||||
use aws_sdk_s3::Client;
|
||||
|
|
@ -149,6 +150,32 @@ impl S3Service {
|
|||
|
||||
let mime_type = Self::get_mime_type(&extension);
|
||||
|
||||
// Build additional metadata from S3 object properties
|
||||
let mut metadata_map = serde_json::Map::new();
|
||||
|
||||
// Add S3-specific metadata
|
||||
if let Some(storage_class) = &object.storage_class {
|
||||
metadata_map.insert("storage_class".to_string(), serde_json::Value::String(storage_class.as_str().to_string()));
|
||||
}
|
||||
|
||||
if let Some(owner) = &object.owner {
|
||||
if let Some(display_name) = &owner.display_name {
|
||||
metadata_map.insert("owner_display_name".to_string(), serde_json::Value::String(display_name.clone()));
|
||||
}
|
||||
if let Some(id) = &owner.id {
|
||||
metadata_map.insert("owner_id".to_string(), serde_json::Value::String(id.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// Store the S3 key for reference
|
||||
metadata_map.insert("s3_key".to_string(), serde_json::Value::String(key.clone()));
|
||||
|
||||
// Add bucket name for reference
|
||||
metadata_map.insert("s3_bucket".to_string(), serde_json::Value::String(self.config.bucket_name.clone()));
|
||||
|
||||
// If we have region info, add it
|
||||
metadata_map.insert("s3_region".to_string(), serde_json::Value::String(self.config.region.clone()));
|
||||
|
||||
let file_info = FileInfo {
|
||||
path: key.clone(),
|
||||
name: file_name,
|
||||
|
|
@ -157,6 +184,11 @@ impl S3Service {
|
|||
last_modified,
|
||||
etag,
|
||||
is_directory: false,
|
||||
created_at: None, // S3 doesn't provide creation time, only last modified
|
||||
permissions: None, // S3 uses different permission model (ACLs/policies)
|
||||
owner: object.owner.as_ref().and_then(|o| o.display_name.clone()),
|
||||
group: None, // S3 doesn't have Unix-style groups
|
||||
metadata: if metadata_map.is_empty() { None } else { Some(serde_json::Value::Object(metadata_map)) },
|
||||
};
|
||||
|
||||
files.push(file_info);
|
||||
|
|
|
|||
|
|
@ -423,14 +423,7 @@ impl WebDAVService {
|
|||
|
||||
let propfind_body = r#"<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:">
|
||||
<d:prop>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getlastmodified/>
|
||||
<d:getcontenttype/>
|
||||
<d:getetag/>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
<d:allprop/>
|
||||
</d:propfind>"#;
|
||||
|
||||
let response = self.client
|
||||
|
|
|
|||
|
|
@ -2,6 +2,49 @@
|
|||
mod tests {
|
||||
use crate::config::Config;
|
||||
use std::env;
|
||||
use std::sync::Mutex;
|
||||
|
||||
// Mutex to ensure OIDC tests run sequentially to avoid race conditions
|
||||
static OIDC_TEST_MUTEX: Mutex<()> = Mutex::new(());
|
||||
|
||||
// Helper function to safely run a test with environment isolation
|
||||
fn run_with_env_isolation<F, R>(test_fn: F) -> R
|
||||
where
|
||||
F: FnOnce() -> R,
|
||||
{
|
||||
let _guard = OIDC_TEST_MUTEX.lock().unwrap();
|
||||
|
||||
// Store original environment values
|
||||
let original_values: Vec<(String, Option<String>)> = vec![
|
||||
"OIDC_ENABLED",
|
||||
"OIDC_CLIENT_ID",
|
||||
"OIDC_CLIENT_SECRET",
|
||||
"OIDC_ISSUER_URL",
|
||||
"OIDC_REDIRECT_URI",
|
||||
"DATABASE_URL",
|
||||
"JWT_SECRET",
|
||||
].into_iter().map(|key| {
|
||||
(key.to_string(), env::var(key).ok())
|
||||
}).collect();
|
||||
|
||||
// Clean up environment first
|
||||
for (key, _) in &original_values {
|
||||
env::remove_var(key);
|
||||
}
|
||||
|
||||
// Run the test
|
||||
let result = test_fn();
|
||||
|
||||
// Restore original environment
|
||||
for (key, original_value) in original_values {
|
||||
env::remove_var(&key);
|
||||
if let Some(value) = original_value {
|
||||
env::set_var(&key, value);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn create_base_config() -> Config {
|
||||
Config {
|
||||
|
|
@ -40,291 +83,176 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_oidc_enabled_from_env() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "test-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://provider.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "http://localhost:8000/auth/oidc/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("test-client-secret".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("https://provider.example.com".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("http://localhost:8000/auth/oidc/callback".to_string()));
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_enabled_variations() {
|
||||
let test_cases = vec![
|
||||
("true", true),
|
||||
("TRUE", true),
|
||||
("1", true),
|
||||
("yes", true),
|
||||
("YES", true),
|
||||
("on", true),
|
||||
("ON", true),
|
||||
("false", false),
|
||||
("FALSE", false),
|
||||
("0", false),
|
||||
("no", false),
|
||||
("NO", false),
|
||||
("off", false),
|
||||
("OFF", false),
|
||||
("invalid", false),
|
||||
];
|
||||
|
||||
for (value, expected) in test_cases {
|
||||
// Clean up environment first for each iteration
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
env::set_var("OIDC_ENABLED", value);
|
||||
run_with_env_isolation(|| {
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "test-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://provider.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "http://localhost:8000/auth/oidc/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
assert_eq!(config.oidc_enabled, expected, "Failed for value: {}", value);
|
||||
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
}
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("test-client-secret".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("https://provider.example.com".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("http://localhost:8000/auth/oidc/callback".to_string()));
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_enabled_variations() {
|
||||
run_with_env_isolation(|| {
|
||||
let test_cases = vec![
|
||||
("true", true),
|
||||
("TRUE", true),
|
||||
("1", true),
|
||||
("yes", true),
|
||||
("YES", true),
|
||||
("on", true),
|
||||
("ON", true),
|
||||
("false", false),
|
||||
("FALSE", false),
|
||||
("0", false),
|
||||
("no", false),
|
||||
("NO", false),
|
||||
("off", false),
|
||||
("OFF", false),
|
||||
("invalid", false),
|
||||
];
|
||||
|
||||
for (value, expected) in test_cases {
|
||||
// Clean up environment for each iteration
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
env::set_var("OIDC_ENABLED", value);
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
assert_eq!(config.oidc_enabled, expected, "Failed for value: {}", value);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_partial_config() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
// Only set some OIDC vars
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
// Missing OIDC_CLIENT_SECRET, OIDC_ISSUER_URL, OIDC_REDIRECT_URI
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
run_with_env_isolation(|| {
|
||||
// Only set some OIDC vars
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
// Missing OIDC_CLIENT_SECRET, OIDC_ISSUER_URL, OIDC_REDIRECT_URI
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert!(config.oidc_client_secret.is_none());
|
||||
assert!(config.oidc_issuer_url.is_none());
|
||||
assert!(config.oidc_redirect_uri.is_none());
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert!(config.oidc_client_secret.is_none());
|
||||
assert!(config.oidc_issuer_url.is_none());
|
||||
assert!(config.oidc_redirect_uri.is_none());
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_disabled_with_config_present() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
// OIDC disabled but config present
|
||||
env::set_var("OIDC_ENABLED", "false");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "test-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://provider.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "http://localhost:8000/auth/oidc/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
run_with_env_isolation(|| {
|
||||
// OIDC disabled but config present
|
||||
env::set_var("OIDC_ENABLED", "false");
|
||||
env::set_var("OIDC_CLIENT_ID", "test-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "test-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://provider.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "http://localhost:8000/auth/oidc/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(!config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("test-client-secret".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("https://provider.example.com".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("http://localhost:8000/auth/oidc/callback".to_string()));
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
assert!(!config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id, Some("test-client-id".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("test-client-secret".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("https://provider.example.com".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("http://localhost:8000/auth/oidc/callback".to_string()));
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_empty_values() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "");
|
||||
env::set_var("OIDC_ISSUER_URL", "");
|
||||
env::set_var("OIDC_REDIRECT_URI", "");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
run_with_env_isolation(|| {
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "");
|
||||
env::set_var("OIDC_ISSUER_URL", "");
|
||||
env::set_var("OIDC_REDIRECT_URI", "");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(config.oidc_enabled);
|
||||
// Empty string values should be converted to Some(empty_string)
|
||||
assert_eq!(config.oidc_client_id, Some("".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("".to_string()));
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
assert!(config.oidc_enabled);
|
||||
// Empty string values should be converted to Some(empty_string)
|
||||
assert_eq!(config.oidc_client_id, Some("".to_string()));
|
||||
assert_eq!(config.oidc_client_secret, Some("".to_string()));
|
||||
assert_eq!(config.oidc_issuer_url, Some("".to_string()));
|
||||
assert_eq!(config.oidc_redirect_uri, Some("".to_string()));
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_config_validation_output() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
// Test that validation warnings are properly formatted
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
// Missing required OIDC fields
|
||||
run_with_env_isolation(|| {
|
||||
// Test that validation warnings are properly formatted
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
// Missing required OIDC fields
|
||||
|
||||
// This should succeed but show warnings
|
||||
let config = Config::from_env().unwrap();
|
||||
assert!(config.oidc_enabled);
|
||||
assert!(config.oidc_client_id.is_none());
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
// This should succeed but show warnings
|
||||
let config = Config::from_env().unwrap();
|
||||
assert!(config.oidc_enabled);
|
||||
assert!(config.oidc_client_id.is_none());
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_complete_configuration() {
|
||||
// Clean up environment first to ensure test isolation
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "my-app-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "super-secret-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://auth.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "https://myapp.com/auth/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
run_with_env_isolation(|| {
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "my-app-client-id");
|
||||
env::set_var("OIDC_CLIENT_SECRET", "super-secret-client-secret");
|
||||
env::set_var("OIDC_ISSUER_URL", "https://auth.example.com");
|
||||
env::set_var("OIDC_REDIRECT_URI", "https://myapp.com/auth/callback");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id.unwrap(), "my-app-client-id");
|
||||
assert_eq!(config.oidc_client_secret.unwrap(), "super-secret-client-secret");
|
||||
assert_eq!(config.oidc_issuer_url.unwrap(), "https://auth.example.com");
|
||||
assert_eq!(config.oidc_redirect_uri.unwrap(), "https://myapp.com/auth/callback");
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id.unwrap(), "my-app-client-id");
|
||||
assert_eq!(config.oidc_client_secret.unwrap(), "super-secret-client-secret");
|
||||
assert_eq!(config.oidc_issuer_url.unwrap(), "https://auth.example.com");
|
||||
assert_eq!(config.oidc_redirect_uri.unwrap(), "https://myapp.com/auth/callback");
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_oidc_config_precedence() {
|
||||
// Clean up any existing env vars first
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("OIDC_CLIENT_SECRET");
|
||||
env::remove_var("OIDC_ISSUER_URL");
|
||||
env::remove_var("OIDC_REDIRECT_URI");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
|
||||
// Test that environment variables take precedence
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "env-client-id");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
run_with_env_isolation(|| {
|
||||
// Test that environment variables take precedence
|
||||
env::set_var("OIDC_ENABLED", "true");
|
||||
env::set_var("OIDC_CLIENT_ID", "env-client-id");
|
||||
env::set_var("DATABASE_URL", "postgresql://test:test@localhost/test");
|
||||
env::set_var("JWT_SECRET", "test-secret");
|
||||
|
||||
let config = Config::from_env().unwrap();
|
||||
let config = Config::from_env().unwrap();
|
||||
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id.unwrap(), "env-client-id");
|
||||
|
||||
// Clean up
|
||||
env::remove_var("OIDC_ENABLED");
|
||||
env::remove_var("OIDC_CLIENT_ID");
|
||||
env::remove_var("DATABASE_URL");
|
||||
env::remove_var("JWT_SECRET");
|
||||
assert!(config.oidc_enabled);
|
||||
assert_eq!(config.oidc_client_id.unwrap(), "env-client-id");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -49,6 +49,9 @@ mod tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("abcd1234567890123456789012345678901234567890123456789012345678".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,9 @@ mod document_routes_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("hash123".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -394,6 +397,9 @@ mod document_routes_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@ fn create_test_document(user_id: Uuid) -> Document {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -51,6 +54,9 @@ fn create_test_document_without_ocr(user_id: Uuid) -> Document {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -76,6 +82,9 @@ fn create_test_document_with_ocr_error(user_id: Uuid) -> Document {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1552,6 +1561,9 @@ mod deletion_error_handling_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("test_hash_123456789abcdef123456789abcdef123456789abcdef123456789abcdef".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ mod tests {
|
|||
assert_eq!(ocr_result.text.trim(), test_content);
|
||||
assert_eq!(ocr_result.confidence, 100.0); // Plain text should be 100% confident
|
||||
assert_eq!(ocr_result.word_count, 9); // "This is a test text file with multiple words"
|
||||
assert!(ocr_result.processing_time_ms > 0);
|
||||
assert!(ocr_result.processing_time_ms >= 0);
|
||||
assert!(ocr_result.preprocessing_applied.contains(&"Plain text read".to_string()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -939,6 +939,9 @@ mod tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id: user.id,
|
||||
file_hash: Some("0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
db.create_document(document).await.unwrap();
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ mod tests {
|
|||
"application/pdf",
|
||||
user_id,
|
||||
Some("abcd1234hash".to_string()),
|
||||
None, // original_created_at
|
||||
None, // original_modified_at
|
||||
None, // source_metadata
|
||||
);
|
||||
|
||||
assert_eq!(document.filename, "saved_file.pdf");
|
||||
|
|
@ -189,6 +192,9 @@ mod file_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("hash123".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
(
|
||||
|
|
@ -324,6 +330,9 @@ mod file_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: None,
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
// Try to delete nonexistent files (should not fail)
|
||||
|
|
@ -375,6 +384,9 @@ mod file_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("imagehash456".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
// Verify files exist
|
||||
|
|
@ -430,6 +442,9 @@ mod file_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("hash789".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
// Verify files exist
|
||||
|
|
@ -476,6 +491,9 @@ mod file_deletion_tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("texthash".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
// Verify file exists
|
||||
|
|
|
|||
|
|
@ -81,6 +81,9 @@ mod tests {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
sqlx::query("INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id, file_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)")
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use chrono::{DateTime, Utc};
|
|||
use quick_xml::events::{BytesStart, Event};
|
||||
use quick_xml::reader::Reader;
|
||||
use std::str;
|
||||
use serde_json;
|
||||
|
||||
use crate::models::FileInfo;
|
||||
|
||||
|
|
@ -15,6 +16,12 @@ struct PropFindResponse {
|
|||
content_type: Option<String>,
|
||||
etag: Option<String>,
|
||||
is_collection: bool,
|
||||
creation_date: Option<String>,
|
||||
owner: Option<String>,
|
||||
group: Option<String>,
|
||||
permissions: Option<String>,
|
||||
owner_display_name: Option<String>,
|
||||
metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
||||
|
|
@ -85,6 +92,53 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
|||
"getetag" => {
|
||||
resp.etag = Some(normalize_etag(&text));
|
||||
}
|
||||
"creationdate" => {
|
||||
resp.creation_date = Some(text.trim().to_string());
|
||||
}
|
||||
"owner" => {
|
||||
resp.owner = Some(text.trim().to_string());
|
||||
}
|
||||
"group" => {
|
||||
resp.group = Some(text.trim().to_string());
|
||||
}
|
||||
_ => {
|
||||
// Store any other properties as generic metadata
|
||||
// This handles vendor-specific properties from any WebDAV server
|
||||
if !text.trim().is_empty() && in_prop {
|
||||
if resp.metadata.is_none() {
|
||||
resp.metadata = Some(serde_json::Value::Object(serde_json::Map::new()));
|
||||
}
|
||||
|
||||
if let Some(serde_json::Value::Object(ref mut map)) = resp.metadata {
|
||||
// Special handling for known properties
|
||||
match current_element.as_str() {
|
||||
"permissions" | "oc:permissions" => {
|
||||
resp.permissions = Some(text.trim().to_string());
|
||||
map.insert("permissions_raw".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||
}
|
||||
"fileid" | "oc:fileid" => {
|
||||
map.insert("file_id".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||
}
|
||||
"owner-id" | "oc:owner-id" => {
|
||||
map.insert("owner_id".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||
}
|
||||
"owner-display-name" | "oc:owner-display-name" => {
|
||||
resp.owner_display_name = Some(text.trim().to_string());
|
||||
map.insert("owner_display_name".to_string(), serde_json::Value::String(text.trim().to_string()));
|
||||
}
|
||||
"has-preview" | "nc:has-preview" => {
|
||||
if let Ok(val) = text.trim().parse::<bool>() {
|
||||
map.insert("has_preview".to_string(), serde_json::Value::Bool(val));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Store any other property as-is
|
||||
map.insert(current_element.clone(), serde_json::Value::String(text.trim().to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"status" if in_propstat => {
|
||||
// Check if status is 200 OK
|
||||
if text.contains("200") {
|
||||
|
|
@ -120,6 +174,33 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
|||
.unwrap_or_else(|_| std::borrow::Cow::Borrowed(&name))
|
||||
.to_string();
|
||||
|
||||
// Parse creation date
|
||||
let created_at = resp.creation_date
|
||||
.as_ref()
|
||||
.and_then(|d| parse_http_date(d));
|
||||
|
||||
// Parse permissions (Nextcloud/ownCloud format)
|
||||
let permissions_int = resp.permissions
|
||||
.as_ref()
|
||||
.and_then(|p| {
|
||||
// Nextcloud permissions are a string like "RGDNVW"
|
||||
// Convert to Unix-style octal permissions
|
||||
if p.chars().all(|c| c.is_uppercase()) {
|
||||
// This is Nextcloud format
|
||||
let mut perms = 0u32;
|
||||
if p.contains('R') { perms |= 0o444; } // Read
|
||||
if p.contains('W') { perms |= 0o222; } // Write
|
||||
if p.contains('D') { perms |= 0o111; } // Delete (execute-like)
|
||||
Some(perms)
|
||||
} else {
|
||||
// Try to parse as numeric
|
||||
p.parse().ok()
|
||||
}
|
||||
});
|
||||
|
||||
// Use the metadata collected during parsing
|
||||
let metadata = resp.metadata;
|
||||
|
||||
let file_info = FileInfo {
|
||||
path: resp.href.clone(),
|
||||
name,
|
||||
|
|
@ -128,6 +209,11 @@ pub fn parse_propfind_response(xml_text: &str) -> Result<Vec<FileInfo>> {
|
|||
last_modified: parse_http_date(&resp.last_modified.unwrap_or_default()),
|
||||
etag: resp.etag.unwrap_or_else(|| format!("\"{}\"", uuid::Uuid::new_v4())),
|
||||
is_directory: false,
|
||||
created_at,
|
||||
permissions: permissions_int,
|
||||
owner: resp.owner.or(resp.owner_display_name),
|
||||
group: resp.group,
|
||||
metadata,
|
||||
};
|
||||
|
||||
files.push(file_info);
|
||||
|
|
|
|||
|
|
@ -41,6 +41,9 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some(file_hash),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -239,6 +239,9 @@ impl FileProcessingTestClient {
|
|||
ocr_word_count: doc.ocr_word_count,
|
||||
ocr_processing_time_ms: doc.ocr_processing_time_ms,
|
||||
ocr_status: doc.ocr_status.clone(),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
return Ok(doc_copy);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,6 +59,9 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: Option<String>
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash,
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -248,6 +251,9 @@ async fn test_file_service_create_document_with_hash() {
|
|||
"application/pdf",
|
||||
user_id,
|
||||
Some(test_hash.to_string()),
|
||||
None, // original_created_at
|
||||
None, // original_modified_at
|
||||
None, // source_metadata
|
||||
);
|
||||
|
||||
assert_eq!(document.filename, "test.pdf");
|
||||
|
|
@ -271,6 +277,9 @@ async fn test_file_service_create_document_without_hash() {
|
|||
"application/pdf",
|
||||
user_id,
|
||||
None,
|
||||
None, // original_created_at
|
||||
None, // original_modified_at
|
||||
None, // source_metadata
|
||||
);
|
||||
|
||||
assert_eq!(document.filename, "test.pdf");
|
||||
|
|
|
|||
|
|
@ -356,6 +356,9 @@ async fn test_create_ignored_file_from_document() -> Result<()> {
|
|||
updated_at: chrono::Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("document_hash_123".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
// Insert document into database
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@ fn create_test_file_info(name: &str, path: &str, content: &[u8]) -> FileInfo {
|
|||
etag: "test-etag".to_string(),
|
||||
mime_type: "application/pdf".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -54,6 +59,9 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some(file_hash),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@ fn create_test_file_info(name: &str, path: &str, size: i64) -> FileInfo {
|
|||
etag: "test-etag".to_string(),
|
||||
mime_type: "application/pdf".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -54,6 +59,9 @@ fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Doc
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some(file_hash),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -280,6 +288,11 @@ async fn test_webdav_sync_etag_change_detection() -> Result<()> {
|
|||
etag: new_etag.to_string(),
|
||||
mime_type: "application/pdf".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
// ETag comparison should detect change
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@ fn test_document_response_conversion_with_ocr() {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: Some("abc123".to_string()),
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
let response: DocumentResponse = document.clone().into();
|
||||
|
|
@ -59,6 +62,9 @@ fn test_document_response_conversion_without_ocr() {
|
|||
updated_at: Utc::now(),
|
||||
user_id,
|
||||
file_hash: None,
|
||||
original_created_at: None,
|
||||
original_modified_at: None,
|
||||
source_metadata: None,
|
||||
};
|
||||
|
||||
let response: DocumentResponse = document.clone().into();
|
||||
|
|
|
|||
|
|
@ -607,6 +607,11 @@ fn test_special_characters_in_paths() {
|
|||
last_modified: Some(Utc::now()),
|
||||
etag: "\"test123\"".to_string(),
|
||||
is_directory: false,
|
||||
created_at: None,
|
||||
permissions: None,
|
||||
owner: None,
|
||||
group: None,
|
||||
metadata: None,
|
||||
};
|
||||
|
||||
assert!(!file_info.name.is_empty());
|
||||
|
|
|
|||
Loading…
Reference in New Issue