feat(server): I feel like I'm going to have to come back and fix this later
This commit is contained in:
parent
4aa3d77e40
commit
6898d85981
|
|
@ -218,21 +218,25 @@ async fn process_single_file(
|
||||||
info!("Processing file: {}", file_info.path);
|
info!("Processing file: {}", file_info.path);
|
||||||
|
|
||||||
// Check if we've already processed this file
|
// Check if we've already processed this file
|
||||||
|
info!("Checking WebDAV tracking for: {}", file_info.path);
|
||||||
match state.db.get_webdav_file_by_path(user_id, &file_info.path).await {
|
match state.db.get_webdav_file_by_path(user_id, &file_info.path).await {
|
||||||
Ok(Some(existing_file)) => {
|
Ok(Some(existing_file)) => {
|
||||||
|
info!("Found existing WebDAV file record: {} (current ETag: {}, remote ETag: {})",
|
||||||
|
file_info.path, existing_file.etag, file_info.etag);
|
||||||
|
|
||||||
// Check if file has changed (compare ETags)
|
// Check if file has changed (compare ETags)
|
||||||
if existing_file.etag == file_info.etag {
|
if existing_file.etag == file_info.etag {
|
||||||
info!("Skipping unchanged file: {} (ETag: {})", file_info.path, file_info.etag);
|
info!("Skipping unchanged WebDAV file: {} (ETag: {})", file_info.path, file_info.etag);
|
||||||
return Ok(false); // Not processed (no change)
|
return Ok(false); // Not processed (no change)
|
||||||
}
|
}
|
||||||
info!("File has changed: {} (old ETag: {}, new ETag: {})",
|
info!("WebDAV file has changed: {} (old ETag: {}, new ETag: {})",
|
||||||
file_info.path, existing_file.etag, file_info.etag);
|
file_info.path, existing_file.etag, file_info.etag);
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
info!("New file found: {}", file_info.path);
|
info!("New WebDAV file detected: {}", file_info.path);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Error checking existing file {}: {}", file_info.path, e);
|
warn!("Error checking existing WebDAV file {}: {}", file_info.path, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -245,20 +249,29 @@ async fn process_single_file(
|
||||||
// Calculate file hash for deduplication
|
// Calculate file hash for deduplication
|
||||||
let file_hash = calculate_file_hash(&file_data);
|
let file_hash = calculate_file_hash(&file_data);
|
||||||
|
|
||||||
// Check if this exact file content already exists in the system
|
// Check if this exact file content already exists for this user
|
||||||
// This prevents downloading and processing duplicate files from WebDAV
|
// This prevents downloading and processing duplicate files from WebDAV
|
||||||
|
info!("Checking for duplicate content for user {}: {} (hash: {}, size: {} bytes)",
|
||||||
|
user_id, file_info.name, &file_hash[..8], file_data.len());
|
||||||
|
|
||||||
|
// Query documents with the same file size for this user only
|
||||||
|
let size_filter = file_data.len() as i64;
|
||||||
if let Ok(existing_docs) = state.db.get_documents_by_user_with_role(user_id, crate::models::UserRole::User, 1000, 0).await {
|
if let Ok(existing_docs) = state.db.get_documents_by_user_with_role(user_id, crate::models::UserRole::User, 1000, 0).await {
|
||||||
for existing_doc in existing_docs {
|
let matching_docs: Vec<_> = existing_docs.into_iter()
|
||||||
// Quick size check first (much faster than hash comparison)
|
.filter(|doc| doc.file_size == size_filter)
|
||||||
if existing_doc.file_size == file_data.len() as i64 {
|
.collect();
|
||||||
|
|
||||||
|
info!("Found {} documents with same size for user {}", matching_docs.len(), user_id);
|
||||||
|
|
||||||
|
for existing_doc in matching_docs {
|
||||||
// Read the existing file and compare hashes
|
// Read the existing file and compare hashes
|
||||||
if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
|
if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
|
||||||
let existing_hash = calculate_file_hash(&existing_file_data);
|
let existing_hash = calculate_file_hash(&existing_file_data);
|
||||||
if file_hash == existing_hash {
|
if file_hash == existing_hash {
|
||||||
info!("Skipping duplicate WebDAV file content: {} (hash: {}, already exists as: {})",
|
info!("Found duplicate content for user {}: {} matches existing document {}",
|
||||||
file_info.name, &file_hash[..8], existing_doc.original_filename);
|
user_id, file_info.name, existing_doc.original_filename);
|
||||||
|
|
||||||
// Still record this WebDAV file in the tracking table to prevent re-downloading
|
// Record this WebDAV file as a duplicate but link to existing document
|
||||||
let webdav_file = CreateWebDAVFile {
|
let webdav_file = CreateWebDAVFile {
|
||||||
user_id,
|
user_id,
|
||||||
webdav_path: file_info.path.clone(),
|
webdav_path: file_info.path.clone(),
|
||||||
|
|
@ -275,9 +288,11 @@ async fn process_single_file(
|
||||||
error!("Failed to record duplicate WebDAV file: {}", e);
|
error!("Failed to record duplicate WebDAV file: {}", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("WebDAV file marked as duplicate_content, skipping processing");
|
||||||
return Ok(false); // Not processed (duplicate)
|
return Ok(false); // Not processed (duplicate)
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
|
warn!("Could not read existing file for hash comparison: {}", existing_doc.file_path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -325,8 +340,10 @@ async fn process_single_file(
|
||||||
|
|
||||||
// Queue for OCR processing if enabled
|
// Queue for OCR processing if enabled
|
||||||
if enable_background_ocr {
|
if enable_background_ocr {
|
||||||
|
info!("Background OCR is enabled, queueing document {} for processing", created_document.id);
|
||||||
|
|
||||||
match state.db.pool.acquire().await {
|
match state.db.pool.acquire().await {
|
||||||
Ok(conn) => {
|
Ok(_conn) => {
|
||||||
let queue_service = crate::ocr_queue::OcrQueueService::new(
|
let queue_service = crate::ocr_queue::OcrQueueService::new(
|
||||||
state.db.clone(),
|
state.db.clone(),
|
||||||
state.db.pool.clone(),
|
state.db.pool.clone(),
|
||||||
|
|
@ -350,6 +367,8 @@ async fn process_single_file(
|
||||||
error!("Failed to connect to database for OCR queueing: {}", e);
|
error!("Failed to connect to database for OCR queueing: {}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
info!("Background OCR is disabled, skipping OCR queue for document {}", created_document.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(true) // Successfully processed
|
Ok(true) // Successfully processed
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,16 @@ use crate::{config::Config, db::Database, file_service::FileService, ocr_queue::
|
||||||
|
|
||||||
pub async fn start_folder_watcher(config: Config, db: Database) -> Result<()> {
|
pub async fn start_folder_watcher(config: Config, db: Database) -> Result<()> {
|
||||||
info!("Starting hybrid folder watcher on: {}", config.watch_folder);
|
info!("Starting hybrid folder watcher on: {}", config.watch_folder);
|
||||||
|
info!("Upload path configured as: {}", config.upload_path);
|
||||||
|
|
||||||
|
// Debug: Check if paths resolve correctly
|
||||||
|
let watch_canonical = std::path::Path::new(&config.watch_folder).canonicalize()
|
||||||
|
.unwrap_or_else(|_| std::path::PathBuf::from(&config.watch_folder));
|
||||||
|
let upload_canonical = std::path::Path::new(&config.upload_path).canonicalize()
|
||||||
|
.unwrap_or_else(|_| std::path::PathBuf::from(&config.upload_path));
|
||||||
|
|
||||||
|
info!("Watch folder canonical path: {:?}", watch_canonical);
|
||||||
|
info!("Upload folder canonical path: {:?}", upload_canonical);
|
||||||
|
|
||||||
// Initialize services with shared database
|
// Initialize services with shared database
|
||||||
let file_service = FileService::new(config.upload_path.clone());
|
let file_service = FileService::new(config.upload_path.clone());
|
||||||
|
|
@ -167,6 +177,7 @@ async fn scan_directory(
|
||||||
{
|
{
|
||||||
if entry.file_type().is_file() {
|
if entry.file_type().is_file() {
|
||||||
let path = entry.path().to_path_buf();
|
let path = entry.path().to_path_buf();
|
||||||
|
debug!("Found file during scan: {:?}", path);
|
||||||
|
|
||||||
if let Ok(metadata) = entry.metadata() {
|
if let Ok(metadata) = entry.metadata() {
|
||||||
if let Ok(modified) = metadata.modified() {
|
if let Ok(modified) = metadata.modified() {
|
||||||
|
|
@ -307,12 +318,19 @@ async fn process_file(
|
||||||
// Calculate file hash for deduplication
|
// Calculate file hash for deduplication
|
||||||
let file_hash = calculate_file_hash(&file_data);
|
let file_hash = calculate_file_hash(&file_data);
|
||||||
|
|
||||||
// Check if this exact file content already exists in the system by comparing
|
// Check if this exact file content already exists for the admin user
|
||||||
// against existing files with the same size (performance optimization)
|
debug!("Checking for duplicate content for admin user: {} (hash: {}, size: {} bytes)",
|
||||||
|
filename, &file_hash[..8], file_size);
|
||||||
|
|
||||||
|
// Query documents with the same file size for the admin user only
|
||||||
if let Ok(existing_docs) = db.get_documents_by_user_with_role(admin_user_id, crate::models::UserRole::Admin, 1000, 0).await {
|
if let Ok(existing_docs) = db.get_documents_by_user_with_role(admin_user_id, crate::models::UserRole::Admin, 1000, 0).await {
|
||||||
for existing_doc in existing_docs {
|
let matching_docs: Vec<_> = existing_docs.into_iter()
|
||||||
// Quick size check first (much faster than hash comparison)
|
.filter(|doc| doc.file_size == file_size)
|
||||||
if existing_doc.file_size == file_size {
|
.collect();
|
||||||
|
|
||||||
|
debug!("Found {} documents with same size for admin user", matching_docs.len());
|
||||||
|
|
||||||
|
for existing_doc in matching_docs {
|
||||||
// Read the existing file and compare hashes
|
// Read the existing file and compare hashes
|
||||||
if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
|
if let Ok(existing_file_data) = tokio::fs::read(&existing_doc.file_path).await {
|
||||||
let existing_hash = calculate_file_hash(&existing_file_data);
|
let existing_hash = calculate_file_hash(&existing_file_data);
|
||||||
|
|
@ -324,7 +342,6 @@ async fn process_file(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
debug!("File content is unique: {} (hash: {})", filename, &file_hash[..8]);
|
debug!("File content is unique: {} (hash: {})", filename, &file_hash[..8]);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,11 +143,21 @@ impl WebDAVScheduler {
|
||||||
let elapsed_minutes = elapsed.num_minutes();
|
let elapsed_minutes = elapsed.num_minutes();
|
||||||
|
|
||||||
if elapsed_minutes < sync_interval_minutes as i64 {
|
if elapsed_minutes < sync_interval_minutes as i64 {
|
||||||
|
// Only log this occasionally to avoid spam
|
||||||
|
if elapsed_minutes % 10 == 0 {
|
||||||
info!("Sync not due for user {} (last sync {} minutes ago, interval {} minutes)",
|
info!("Sync not due for user {} (last sync {} minutes ago, interval {} minutes)",
|
||||||
user_settings.user_id, elapsed_minutes, sync_interval_minutes);
|
user_settings.user_id, elapsed_minutes, sync_interval_minutes);
|
||||||
|
}
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!("Sync is due for user {} (last sync {} minutes ago, interval {} minutes)",
|
||||||
|
user_settings.user_id, elapsed_minutes, sync_interval_minutes);
|
||||||
|
} else {
|
||||||
|
info!("No previous sync found for user {}, sync is due", user_settings.user_id);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
info!("No sync state found for user {}, sync is due", user_settings.user_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sync is due
|
// Sync is due
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue