feat(server/client): implement updated FailedOcrPage, duplicate management, and file hashing

2025-06-17 16:17:23 +00:00 · 2025-06-17 16:17:23 +00:00 · bdb136d615
parent 9dccc6d1de
commit bdb136d615
12 changed files with 2008 additions and 7 deletions
--- a/frontend/src/pages/FailedOcrPage.tsx
+++ b/frontend/src/pages/FailedOcrPage.tsx
@ -27,6 +27,8 @@ import {
  Collapse,
  LinearProgress,
  Snackbar,
+  Tabs,
+  Tab,
 } from '@mui/material';
 import {
  Refresh as RefreshIcon,
@ -37,6 +39,7 @@ import {
  Schedule as ScheduleIcon,
  Visibility as VisibilityIcon,
  Download as DownloadIcon,
+  FileCopy as FileCopyIcon,
 } from '@mui/icons-material';
 import { format } from 'date-fns';
 import { api, documentService } from '../services/api';
@ -87,16 +90,54 @@ interface RetryResponse {
  estimated_wait_minutes?: number;
 }

+interface DuplicateDocument {
+  id: string;
+  filename: string;
+  original_filename: string;
+  file_size: number;
+  mime_type: string;
+  created_at: string;
+  user_id: string;
+}
+
+interface DuplicateGroup {
+  file_hash: string;
+  duplicate_count: number;
+  first_uploaded: string;
+  last_uploaded: string;
+  documents: DuplicateDocument[];
+}
+
+interface DuplicatesResponse {
+  duplicates: DuplicateGroup[];
+  pagination: {
+    total: number;
+    limit: number;
+    offset: number;
+    has_more: boolean;
+  };
+  statistics: {
+    total_duplicate_groups: number;
+  };
+}
+
 const FailedOcrPage: React.FC = () => {
+  const [currentTab, setCurrentTab] = useState(0);
  const [documents, setDocuments] = useState<FailedDocument[]>([]);
+  const [duplicates, setDuplicates] = useState<DuplicateGroup[]>([]);
  const [loading, setLoading] = useState(true);
+  const [duplicatesLoading, setDuplicatesLoading] = useState(false);
  const [retrying, setRetrying] = useState<string | null>(null);
  const [statistics, setStatistics] = useState<FailedOcrResponse['statistics'] | null>(null);
+  const [duplicateStatistics, setDuplicateStatistics] = useState<DuplicatesResponse['statistics'] | null>(null);
  const [pagination, setPagination] = useState({ page: 1, limit: 25 });
+  const [duplicatesPagination, setDuplicatesPagination] = useState({ page: 1, limit: 25 });
  const [totalPages, setTotalPages] = useState(0);
+  const [duplicatesTotalPages, setDuplicatesTotalPages] = useState(0);
  const [selectedDocument, setSelectedDocument] = useState<FailedDocument | null>(null);
  const [detailsOpen, setDetailsOpen] = useState(false);
  const [expandedRows, setExpandedRows] = useState<Set<string>>(new Set());
+  const [expandedDuplicateGroups, setExpandedDuplicateGroups] = useState<Set<string>>(new Set());
  const [snackbar, setSnackbar] = useState<{ open: boolean; message: string; severity: 'success' | 'error' }>({
    open: false,
    message: '',
@ -124,10 +165,37 @@ const FailedOcrPage: React.FC = () => {
    }
  };

+  const fetchDuplicates = async () => {
+    try {
+      setDuplicatesLoading(true);
+      const offset = (duplicatesPagination.page - 1) * duplicatesPagination.limit;
+      const response = await documentService.getDuplicates(duplicatesPagination.limit, offset);
+      
+      setDuplicates(response.data.duplicates);
+      setDuplicateStatistics(response.data.statistics);
+      setDuplicatesTotalPages(Math.ceil(response.data.pagination.total / duplicatesPagination.limit));
+    } catch (error) {
+      console.error('Failed to fetch duplicates:', error);
+      setSnackbar({
+        open: true,
+        message: 'Failed to load duplicate documents',
+        severity: 'error'
+      });
+    } finally {
+      setDuplicatesLoading(false);
+    }
+  };
+
  useEffect(() => {
    fetchFailedDocuments();
  }, [pagination.page]);

+  useEffect(() => {
+    if (currentTab === 1) {
+      fetchDuplicates();
+    }
+  }, [currentTab, duplicatesPagination.page]);
+
  const handleRetryOcr = async (document: FailedDocument) => {
    try {
      setRetrying(document.id);
@ -200,6 +268,28 @@ const FailedOcrPage: React.FC = () => {
    setDetailsOpen(true);
  };

+  const toggleDuplicateGroupExpansion = (groupHash: string) => {
+    const newExpanded = new Set(expandedDuplicateGroups);
+    if (newExpanded.has(groupHash)) {
+      newExpanded.delete(groupHash);
+    } else {
+      newExpanded.add(groupHash);
+    }
+    setExpandedDuplicateGroups(newExpanded);
+  };
+
+  const handleTabChange = (event: React.SyntheticEvent, newValue: number) => {
+    setCurrentTab(newValue);
+  };
+
+  const refreshCurrentTab = () => {
+    if (currentTab === 0) {
+      fetchFailedDocuments();
+    } else {
+      fetchDuplicates();
+    }
+  };
+
  if (loading && documents.length === 0) {
    return (
      <Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
@ -212,20 +302,38 @@ const FailedOcrPage: React.FC = () => {
    <Box sx={{ p: 3 }}>
      <Box display="flex" justifyContent="space-between" alignItems="center" mb={3}>
        <Typography variant="h4" component="h1">
-          Failed OCR Documents
+          Failed OCR & Duplicates
        </Typography>
        <Button
          variant="outlined"
          startIcon={<RefreshIcon />}
-          onClick={fetchFailedDocuments}
-          disabled={loading}
+          onClick={refreshCurrentTab}
+          disabled={loading || duplicatesLoading}
        >
          Refresh
        </Button>
      </Box>

-      {/* Statistics Overview */}
-      {statistics && (
+      <Paper sx={{ mb: 3 }}>
+        <Tabs value={currentTab} onChange={handleTabChange} aria-label="failed ocr and duplicates tabs">
+          <Tab
+            icon={<ErrorIcon />}
+            label={`Failed OCR${statistics ? ` (${statistics.total_failed})` : ''}`}
+            iconPosition="start"
+          />
+          <Tab
+            icon={<FileCopyIcon />}
+            label={`Duplicates${duplicateStatistics ? ` (${duplicateStatistics.total_duplicate_groups})` : ''}`}
+            iconPosition="start"
+          />
+        </Tabs>
+      </Paper>
+
+      {/* Failed OCR Tab Content */}
+      {currentTab === 0 && (
+        <>
+          {/* Statistics Overview */}
+          {statistics && (
        <Grid container spacing={3} mb={3}>
          <Grid item xs={12} md={4}>
            <Card>
@ -435,6 +543,174 @@ const FailedOcrPage: React.FC = () => {
          )}
        </>
      )}
+        </>
+      )}
+
+      {/* Duplicates Tab Content */}
+      {currentTab === 1 && (
+        <>
+          {/* Duplicate Statistics Overview */}
+          {duplicateStatistics && (
+            <Grid container spacing={3} mb={3}>
+              <Grid item xs={12} md={6}>
+                <Card>
+                  <CardContent>
+                    <Typography variant="h6" color="warning.main">
+                      <FileCopyIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
+                      Total Duplicate Groups
+                    </Typography>
+                    <Typography variant="h3" color="warning.main">
+                      {duplicateStatistics.total_duplicate_groups}
+                    </Typography>
+                  </CardContent>
+                </Card>
+              </Grid>
+            </Grid>
+          )}
+
+          {duplicatesLoading ? (
+            <Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
+              <CircularProgress />
+            </Box>
+          ) : duplicates.length === 0 ? (
+            <Alert severity="success" sx={{ mt: 2 }}>
+              <AlertTitle>No duplicates found!</AlertTitle>
+              You don't have any duplicate documents. All your files have unique content.
+            </Alert>
+          ) : (
+            <>
+              <Alert severity="info" sx={{ mb: 2 }}>
+                <AlertTitle>Duplicate Documents</AlertTitle>
+                These documents have identical content but may have different filenames. 
+                You can click on each group to see all the documents with the same content.
+              </Alert>
+
+              <TableContainer component={Paper}>
+                <Table>
+                  <TableHead>
+                    <TableRow>
+                      <TableCell />
+                      <TableCell>Content Hash</TableCell>
+                      <TableCell>Duplicate Count</TableCell>
+                      <TableCell>First Uploaded</TableCell>
+                      <TableCell>Last Uploaded</TableCell>
+                      <TableCell>Actions</TableCell>
+                    </TableRow>
+                  </TableHead>
+                  <TableBody>
+                    {duplicates.map((group) => (
+                      <React.Fragment key={group.file_hash}>
+                        <TableRow>
+                          <TableCell>
+                            <IconButton
+                              size="small"
+                              onClick={() => toggleDuplicateGroupExpansion(group.file_hash)}
+                            >
+                              {expandedDuplicateGroups.has(group.file_hash) ? <ExpandLessIcon /> : <ExpandMoreIcon />}
+                            </IconButton>
+                          </TableCell>
+                          <TableCell>
+                            <Typography variant="body2" fontFamily="monospace">
+                              {group.file_hash.substring(0, 16)}...
+                            </Typography>
+                          </TableCell>
+                          <TableCell>
+                            <Chip
+                              label={`${group.duplicate_count} files`}
+                              color="warning"
+                              size="small"
+                            />
+                          </TableCell>
+                          <TableCell>
+                            <Typography variant="body2">
+                              {format(new Date(group.first_uploaded), 'MMM dd, yyyy')}
+                            </Typography>
+                          </TableCell>
+                          <TableCell>
+                            <Typography variant="body2">
+                              {format(new Date(group.last_uploaded), 'MMM dd, yyyy')}
+                            </Typography>
+                          </TableCell>
+                          <TableCell>
+                            <Typography variant="body2" color="text.secondary">
+                              View files below
+                            </Typography>
+                          </TableCell>
+                        </TableRow>
+                        <TableRow>
+                          <TableCell sx={{ paddingBottom: 0, paddingTop: 0 }} colSpan={6}>
+                            <Collapse in={expandedDuplicateGroups.has(group.file_hash)} timeout="auto" unmountOnExit>
+                              <Box sx={{ margin: 1, p: 2, bgcolor: 'grey.50' }}>
+                                <Typography variant="h6" gutterBottom>
+                                  Duplicate Files ({group.duplicate_count} total)
+                                </Typography>
+                                <Grid container spacing={2}>
+                                  {group.documents.map((doc, index) => (
+                                    <Grid item xs={12} md={6} key={doc.id}>
+                                      <Card variant="outlined">
+                                        <CardContent>
+                                          <Typography variant="body2" fontWeight="bold">
+                                            {doc.filename}
+                                          </Typography>
+                                          {doc.original_filename !== doc.filename && (
+                                            <Typography variant="caption" color="text.secondary">
+                                              Original: {doc.original_filename}
+                                            </Typography>
+                                          )}
+                                          <Typography variant="caption" display="block" color="text.secondary">
+                                            {formatFileSize(doc.file_size)} • {doc.mime_type}
+                                          </Typography>
+                                          <Typography variant="caption" display="block" color="text.secondary">
+                                            Uploaded: {format(new Date(doc.created_at), 'MMM dd, yyyy HH:mm')}
+                                          </Typography>
+                                          <Box mt={1}>
+                                            <Tooltip title="View Document">
+                                              <IconButton
+                                                size="small"
+                                                onClick={() => window.open(`/api/documents/${doc.id}/view`, '_blank')}
+                                              >
+                                                <VisibilityIcon />
+                                              </IconButton>
+                                            </Tooltip>
+                                            <Tooltip title="Download Document">
+                                              <IconButton
+                                                size="small"
+                                                onClick={() => window.open(`/api/documents/${doc.id}/download`, '_blank')}
+                                              >
+                                                <DownloadIcon />
+                                              </IconButton>
+                                            </Tooltip>
+                                          </Box>
+                                        </CardContent>
+                                      </Card>
+                                    </Grid>
+                                  ))}
+                                </Grid>
+                              </Box>
+                            </Collapse>
+                          </TableCell>
+                        </TableRow>
+                      </React.Fragment>
+                    ))}
+                  </TableBody>
+                </Table>
+              </TableContainer>
+
+              {/* Duplicates Pagination */}
+              {duplicatesTotalPages > 1 && (
+                <Box display="flex" justifyContent="center" mt={3}>
+                  <Pagination
+                    count={duplicatesTotalPages}
+                    page={duplicatesPagination.page}
+                    onChange={(_, page) => setDuplicatesPagination(prev => ({ ...prev, page }))}
+                    color="primary"
+                  />
+                </Box>
+              )}
+            </>
+          )}
+        </>
+      )}

      {/* Document Details Dialog */}
      <Dialog
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@ -183,6 +183,12 @@ export const documentService = {
    })
  },

+  getDuplicates: (limit = 25, offset = 0) => {
+    return api.get(`/documents/duplicates`, {
+      params: { limit, offset },
+    })
+  },
+
  search: (searchRequest: SearchRequest) => {
    return api.get<SearchResponse>('/search', {
      params: searchRequest,
--- a/src/db/documents.rs
+++ b/src/db/documents.rs
@ -991,6 +991,7 @@ impl Database {
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
                user_id: row.get("user_id"),
+                file_hash: row.get("file_hash"),
            });
        }

@ -1125,6 +1126,7 @@ impl Database {
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
                user_id: row.get("user_id"),
+                file_hash: row.get("file_hash"),
            })),
            None => Ok(None),
        }
@ -1170,4 +1172,124 @@ impl Database {
            None => Ok(None),
        }
    }
+
+    /// Get documents grouped by duplicate hashes for a user
+    pub async fn get_user_duplicates(&self, user_id: Uuid, user_role: crate::models::UserRole, limit: i64, offset: i64) -> Result<(Vec<serde_json::Value>, i64)> {
+        let (docs_query, count_query) = if user_role == crate::models::UserRole::Admin {
+            // Admins can see all duplicates
+            (
+                r#"
+                SELECT 
+                    file_hash,
+                    COUNT(*) as duplicate_count,
+                    MIN(created_at) as first_uploaded,
+                    MAX(created_at) as last_uploaded,
+                    json_agg(
+                        json_build_object(
+                            'id', id,
+                            'filename', filename, 
+                            'original_filename', original_filename,
+                            'file_size', file_size,
+                            'mime_type', mime_type,
+                            'created_at', created_at,
+                            'user_id', user_id
+                        ) ORDER BY created_at
+                    ) as documents
+                FROM documents 
+                WHERE file_hash IS NOT NULL
+                GROUP BY file_hash 
+                HAVING COUNT(*) > 1
+                ORDER BY duplicate_count DESC, first_uploaded DESC
+                LIMIT $1 OFFSET $2
+                "#,
+                r#"
+                SELECT COUNT(*) as total FROM (
+                    SELECT file_hash 
+                    FROM documents 
+                    WHERE file_hash IS NOT NULL
+                    GROUP BY file_hash 
+                    HAVING COUNT(*) > 1
+                ) as duplicate_groups
+                "#
+            )
+        } else {
+            // Regular users see only their own duplicates
+            (
+                r#"
+                SELECT 
+                    file_hash,
+                    COUNT(*) as duplicate_count,
+                    MIN(created_at) as first_uploaded,
+                    MAX(created_at) as last_uploaded,
+                    json_agg(
+                        json_build_object(
+                            'id', id,
+                            'filename', filename,
+                            'original_filename', original_filename,
+                            'file_size', file_size,
+                            'mime_type', mime_type,
+                            'created_at', created_at,
+                            'user_id', user_id
+                        ) ORDER BY created_at
+                    ) as documents
+                FROM documents 
+                WHERE user_id = $3 AND file_hash IS NOT NULL
+                GROUP BY file_hash 
+                HAVING COUNT(*) > 1
+                ORDER BY duplicate_count DESC, first_uploaded DESC
+                LIMIT $1 OFFSET $2
+                "#,
+                r#"
+                SELECT COUNT(*) as total FROM (
+                    SELECT file_hash 
+                    FROM documents 
+                    WHERE user_id = $1 AND file_hash IS NOT NULL
+                    GROUP BY file_hash 
+                    HAVING COUNT(*) > 1
+                ) as duplicate_groups
+                "#
+            )
+        };
+
+        let rows = if user_role == crate::models::UserRole::Admin {
+            sqlx::query(docs_query)
+                .bind(limit)
+                .bind(offset)
+                .fetch_all(&self.pool)
+                .await?
+        } else {
+            sqlx::query(docs_query)
+                .bind(limit)
+                .bind(offset)
+                .bind(user_id)
+                .fetch_all(&self.pool)
+                .await?
+        };
+
+        let duplicates: Vec<serde_json::Value> = rows
+            .into_iter()
+            .map(|row| {
+                serde_json::json!({
+                    "file_hash": row.get::<String, _>("file_hash"),
+                    "duplicate_count": row.get::<i64, _>("duplicate_count"),
+                    "first_uploaded": row.get::<chrono::DateTime<chrono::Utc>, _>("first_uploaded"),
+                    "last_uploaded": row.get::<chrono::DateTime<chrono::Utc>, _>("last_uploaded"),
+                    "documents": row.get::<serde_json::Value, _>("documents")
+                })
+            })
+            .collect();
+
+        let total = if user_role == crate::models::UserRole::Admin {
+            sqlx::query_scalar::<_, i64>(count_query)
+                .fetch_one(&self.pool)
+                .await?
+        } else {
+            sqlx::query_scalar::<_, i64>(count_query)
+                .bind(user_id)
+                .fetch_one(&self.pool)
+                .await?
+        };
+
+        Ok((duplicates, total))
+    }
 }
--- a/src/routes/documents.rs
+++ b/src/routes/documents.rs
@ -37,6 +37,7 @@ pub fn router() -> Router<Arc<AppState>> {
        .route("/{id}/processed-image", get(get_processed_image))
        .route("/{id}/retry-ocr", post(retry_ocr))
        .route("/failed-ocr", get(get_failed_ocr_documents))
+        .route("/duplicates", get(get_user_duplicates))
 }

 #[utoipa::path(
@ -226,7 +227,7 @@ fn calculate_file_hash(data: &[u8]) -> String {
        ("ocr_status" = Option<String>, Query, description = "Filter by OCR status (pending, processing, completed, failed)")
    ),
    responses(
-        (status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
+        (status = 200, description = "Paginated list of user documents with metadata", body = String),
        (status = 401, description = "Unauthorized")
    )
 )]
@ -809,4 +810,50 @@ async fn get_failure_statistics(
        .collect();
    
    Ok(serde_json::json!(categories))
+}
+
+#[utoipa::path(
+    get,
+    path = "/api/documents/duplicates",
+    tag = "documents",
+    security(
+        ("bearer_auth" = [])
+    ),
+    params(
+        ("limit" = Option<i64>, Query, description = "Number of duplicate groups to return per page"),
+        ("offset" = Option<i64>, Query, description = "Number of duplicate groups to skip")
+    ),
+    responses(
+        (status = 200, description = "User's duplicate documents grouped by hash", body = String),
+        (status = 401, description = "Unauthorized")
+    )
+)]
+async fn get_user_duplicates(
+    State(state): State<Arc<AppState>>,
+    auth_user: AuthUser,
+    Query(query): Query<PaginationQuery>,
+) -> Result<Json<serde_json::Value>, StatusCode> {
+    let limit = query.limit.unwrap_or(25);
+    let offset = query.offset.unwrap_or(0);
+
+    let (duplicates, total_count) = state
+        .db
+        .get_user_duplicates(auth_user.user.id, auth_user.user.role, limit, offset)
+        .await
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+    let response = serde_json::json!({
+        "duplicates": duplicates,
+        "pagination": {
+            "total": total_count,
+            "limit": limit,
+            "offset": offset,
+            "has_more": offset + limit < total_count
+        },
+        "statistics": {
+            "total_duplicate_groups": total_count
+        }
+    });
+
+    Ok(Json(response))
 }
--- a/src/swagger.rs
+++ b/src/swagger.rs
@ -8,7 +8,12 @@ use crate::{
    models::{
        CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser,
        DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
-        SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange
+        SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange,
+        FacetItem, SearchFacetsResponse, Notification, NotificationSummary, CreateNotification,
+        Source, SourceResponse, CreateSource, UpdateSource, SourceWithStats,
+        WebDAVSourceConfig, LocalFolderSourceConfig, S3SourceConfig,
+        WebDAVCrawlEstimate, WebDAVTestConnection, WebDAVConnectionResult, WebDAVSyncStatus,
+        ProcessedImage, CreateProcessedImage
    },
    routes::metrics::{
        SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
@ -26,10 +31,19 @@ use crate::{
        // Document endpoints
        crate::routes::documents::upload_document,
        crate::routes::documents::list_documents,
+        crate::routes::documents::get_document_by_id,
        crate::routes::documents::download_document,
+        crate::routes::documents::view_document,
+        crate::routes::documents::get_document_thumbnail,
+        crate::routes::documents::get_document_ocr,
+        crate::routes::documents::get_processed_image,
+        crate::routes::documents::retry_ocr,
+        crate::routes::documents::get_failed_ocr_documents,
+        crate::routes::documents::get_user_duplicates,
        // Search endpoints
        crate::routes::search::search_documents,
        crate::routes::search::enhanced_search_documents,
+        crate::routes::search::get_search_facets,
        // Settings endpoints
        crate::routes::settings::get_settings,
        crate::routes::settings::update_settings,
@ -42,14 +56,46 @@ use crate::{
        // Queue endpoints
        crate::routes::queue::get_queue_stats,
        crate::routes::queue::requeue_failed,
+        crate::routes::queue::get_ocr_status,
+        crate::routes::queue::pause_ocr_processing,
+        crate::routes::queue::resume_ocr_processing,
        // Metrics endpoints
        crate::routes::metrics::get_system_metrics,
+        // Notifications endpoints
+        crate::routes::notifications::get_notifications,
+        crate::routes::notifications::get_notification_summary,
+        crate::routes::notifications::mark_notification_read,
+        crate::routes::notifications::mark_all_notifications_read,
+        crate::routes::notifications::delete_notification,
+        // Sources endpoints
+        crate::routes::sources::list_sources,
+        crate::routes::sources::create_source,
+        crate::routes::sources::get_source,
+        crate::routes::sources::update_source,
+        crate::routes::sources::delete_source,
+        crate::routes::sources::trigger_sync,
+        crate::routes::sources::stop_sync,
+        crate::routes::sources::test_connection,
+        crate::routes::sources::estimate_crawl,
+        crate::routes::sources::estimate_crawl_with_config,
+        crate::routes::sources::test_connection_with_config,
+        // WebDAV endpoints
+        crate::routes::webdav::start_webdav_sync,
+        crate::routes::webdav::cancel_webdav_sync,
+        crate::routes::webdav::get_webdav_sync_status,
+        crate::routes::webdav::test_webdav_connection,
+        crate::routes::webdav::estimate_webdav_crawl,
    ),
    components(
        schemas(
            CreateUser, LoginRequest, LoginResponse, UserResponse, UpdateUser,
            DocumentResponse, SearchRequest, SearchResponse, EnhancedDocumentResponse,
            SettingsResponse, UpdateSettings, SearchMode, SearchSnippet, HighlightRange,
+            FacetItem, SearchFacetsResponse, Notification, NotificationSummary, CreateNotification,
+            Source, SourceResponse, CreateSource, UpdateSource, SourceWithStats,
+            WebDAVSourceConfig, LocalFolderSourceConfig, S3SourceConfig,
+            WebDAVCrawlEstimate, WebDAVTestConnection, WebDAVConnectionResult, WebDAVSyncStatus,
+            ProcessedImage, CreateProcessedImage,
            SystemMetrics, DatabaseMetrics, OcrMetrics, DocumentMetrics, UserMetrics, GeneralSystemMetrics
        )
    ),
@ -61,6 +107,9 @@ use crate::{
        (name = "users", description = "User management endpoints"),
        (name = "queue", description = "OCR queue management endpoints"),
        (name = "metrics", description = "System metrics and monitoring endpoints"),
+        (name = "notifications", description = "User notification endpoints"),
+        (name = "sources", description = "Document source management endpoints"),
+        (name = "webdav", description = "WebDAV synchronization endpoints"),
    ),
    modifiers(&SecurityAddon),
    info(
--- a/src/tests/db_tests.rs
+++ b/src/tests/db_tests.rs
@ -48,6 +48,7 @@ mod tests {
            created_at: Utc::now(),
            updated_at: Utc::now(),
            user_id,
+            file_hash: Some("abcd1234567890123456789012345678901234567890123456789012345678".to_string()),
        }
    }

--- a/src/tests/documents_tests.rs
+++ b/src/tests/documents_tests.rs
@ -25,6 +25,7 @@ mod tests {
            created_at: Utc::now(),
            updated_at: Utc::now(),
            user_id,
+            file_hash: Some("1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef".to_string()),
        }
    }

@ -48,6 +49,7 @@ mod tests {
            created_at: Utc::now(),
            updated_at: Utc::now(),
            user_id,
+            file_hash: Some("fedcba0987654321fedcba0987654321fedcba0987654321fedcba0987654321".to_string()),
        }
    }

@ -71,6 +73,7 @@ mod tests {
            created_at: Utc::now(),
            updated_at: Utc::now(),
            user_id,
+            file_hash: Some("abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890".to_string()),
        }
    }

--- a/src/tests/enhanced_search_tests.rs
+++ b/src/tests/enhanced_search_tests.rs
@ -938,6 +938,7 @@ mod tests {
            created_at: Utc::now(),
            updated_at: Utc::now(),
            user_id: user.id,
+            file_hash: Some("0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef".to_string()),
        };
        
        db.create_document(document).await.unwrap();
--- a/tests/document_upload_hash_duplicate_tests.rs
+++ b/tests/document_upload_hash_duplicate_tests.rs
@ -0,0 +1,391 @@
+use anyhow::Result;
+use chrono::Utc;
+use std::sync::Arc;
+use uuid::Uuid;
+use sha2::{Sha256, Digest};
+
+use readur::{
+    AppState,
+    db::Database,
+    config::Config,
+    models::{Document, CreateUser, UserRole},
+};
+
+// Helper function to calculate file hash
+fn calculate_file_hash(data: &[u8]) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    let result = hasher.finalize();
+    format!("{:x}", result)
+}
+
+// Helper function to create test document
+fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Document {
+    Document {
+        id: Uuid::new_v4(),
+        filename: filename.to_string(),
+        original_filename: filename.to_string(),
+        file_path: format!("/tmp/{}", filename),
+        file_size: 1024,
+        mime_type: "application/pdf".to_string(),
+        content: None,
+        ocr_text: None,
+        ocr_confidence: None,
+        ocr_word_count: None,
+        ocr_processing_time_ms: None,
+        ocr_status: Some("pending".to_string()),
+        ocr_error: None,
+        ocr_completed_at: None,
+        tags: Vec::new(),
+        created_at: Utc::now(),
+        updated_at: Utc::now(),
+        user_id,
+        file_hash: Some(file_hash),
+    }
+}
+
+// Helper function to create test user with unique identifier
+fn create_test_user_with_suffix(suffix: &str) -> CreateUser {
+    CreateUser {
+        username: format!("testuser_{}", suffix),
+        email: format!("test_{}@example.com", suffix),
+        password: "test_password".to_string(),
+        role: Some(UserRole::User),
+    }
+}
+
+async fn create_test_app_state() -> Result<Arc<AppState>> {
+    let config = Config::from_env().unwrap_or_else(|_| {
+        // Create a test config if env fails
+        Config {
+            database_url: "postgresql://readur:readur@localhost:5432/readur".to_string(),
+            server_address: "127.0.0.1:8000".to_string(),
+            jwt_secret: "test-secret".to_string(),
+            upload_path: "./test-uploads".to_string(),
+            watch_folder: "./test-watch".to_string(),
+            allowed_file_types: vec!["pdf".to_string(), "txt".to_string()],
+            watch_interval_seconds: Some(30),
+            file_stability_check_ms: Some(500),
+            max_file_age_hours: None,
+            ocr_language: "eng".to_string(),
+            concurrent_ocr_jobs: 2,
+            ocr_timeout_seconds: 60,
+            max_file_size_mb: 10,
+            memory_limit_mb: 256,
+            cpu_priority: "normal".to_string(),
+        }
+    });
+    let db = Database::new("postgresql://readur:readur@localhost:5432/readur").await?;
+    let queue_service = std::sync::Arc::new(
+        readur::ocr_queue::OcrQueueService::new(db.clone(), db.get_pool().clone(), 1)
+    );
+    
+    Ok(Arc::new(AppState {
+        db,
+        config,
+        webdav_scheduler: None,
+        source_scheduler: None,
+        queue_service,
+    }))
+}
+
+#[tokio::test]
+async fn test_document_upload_duplicate_detection_returns_existing() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    
+    // Create user in database
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    // Test content
+    let test_content = b"This is test PDF content for upload duplicate detection";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create existing document with same hash
+    let existing_doc = create_test_document(user_id, "existing.pdf", file_hash.clone());
+    let created_doc = state.db.create_document(existing_doc).await?;
+    
+    // Test that the hash lookup would find the existing document
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    assert!(duplicate_check.is_some(), "Should find existing document with same hash");
+    
+    let found_doc = duplicate_check.unwrap();
+    assert_eq!(found_doc.id, created_doc.id);
+    assert_eq!(found_doc.file_hash, Some(file_hash));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_unique_content_processed() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    
+    // Create user in database
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    // Test content
+    let test_content = b"This is unique PDF content for upload processing";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Verify no existing document with this hash
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "Should not find any existing document with this hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_different_users_same_content() -> Result<()> {
+    let state = create_test_app_state().await?;
+    
+    // Create two users
+    let user1 = create_test_user_with_suffix(&format!("different_users_1_{}", Uuid::new_v4().simple()));
+    let created_user1 = state.db.create_user(user1).await?;
+    let user1_id = created_user1.id;
+    
+    let user2 = create_test_user_with_suffix(&format!("different_users_2_{}", Uuid::new_v4().simple()));
+    let created_user2 = state.db.create_user(user2).await?;
+    let user2_id = created_user2.id;
+    
+    // Test content
+    let test_content = b"Shared content between different users for upload";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create document for user1 with this hash
+    let user1_doc = create_test_document(user1_id, "user1.pdf", file_hash.clone());
+    state.db.create_document(user1_doc).await?;
+    
+    // Check that user2 doesn't see user1's document as duplicate
+    let duplicate_check = state.db.get_document_by_user_and_hash(user2_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "User2 should not see user1's document as duplicate");
+    
+    // User2 should be able to create their own document with same hash
+    let user2_doc = create_test_document(user2_id, "user2.pdf", file_hash.clone());
+    let result = state.db.create_document(user2_doc).await;
+    assert!(result.is_ok(), "User2 should be able to create document with same hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_hash_calculation_accuracy() -> Result<()> {
+    // Test various file contents and ensure hash calculation is accurate
+    let test_cases = vec![
+        (b"" as &[u8], "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), // Empty
+        (b"a", "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb"), // Single char
+        (b"Hello, World!", "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f"), // Text
+    ];
+    
+    for (content, expected_hash) in test_cases {
+        let calculated_hash = calculate_file_hash(content);
+        assert_eq!(calculated_hash, expected_hash, "Hash mismatch for content: {:?}", content);
+    }
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_large_file_hash() -> Result<()> {
+    // Test hash calculation for larger files
+    let large_content = vec![b'X'; 1_000_000]; // 1MB of 'X' characters
+    
+    let hash1 = calculate_file_hash(&large_content);
+    let hash2 = calculate_file_hash(&large_content);
+    
+    // Hash should be consistent
+    assert_eq!(hash1, hash2);
+    assert_eq!(hash1.len(), 64); // SHA256 hex length
+    assert!(hash1.chars().all(|c| c.is_ascii_hexdigit()));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_binary_content_hash() -> Result<()> {
+    // Test hash calculation for binary content
+    let mut binary_content = Vec::new();
+    for i in 0..256 {
+        binary_content.push(i as u8);
+    }
+    
+    let hash = calculate_file_hash(&binary_content);
+    
+    assert_eq!(hash.len(), 64);
+    assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
+    
+    // Same binary content should produce same hash
+    let hash2 = calculate_file_hash(&binary_content);
+    assert_eq!(hash, hash2);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_duplicate_prevention_database_constraint() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    // Create user in database and get the created user
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    let test_hash = "duplicate_upload_test_hash_123456789012345678901234567890123456";
+    
+    // Create first document with the hash
+    let doc1 = create_test_document(user_id, "test1.pdf", test_hash.to_string());
+    let result1 = state.db.create_document(doc1).await;
+    assert!(result1.is_ok(), "First document should be created successfully");
+    
+    // Try to create second document with same hash for same user
+    let doc2 = create_test_document(user_id, "test2.pdf", test_hash.to_string());
+    let result2 = state.db.create_document(doc2).await;
+    
+    // This should fail due to unique constraint
+    assert!(result2.is_err(), "Second document with same hash should fail");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_filename_vs_content_duplicate() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    // Create user in database and get the created user
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    // Same content, different filenames
+    let content = b"Same content, different names";
+    let hash = calculate_file_hash(content);
+    
+    // Create first document
+    let doc1 = create_test_document(user_id, "document_v1.pdf", hash.clone());
+    state.db.create_document(doc1).await?;
+    
+    // Check that same content is detected as duplicate regardless of filename
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &hash).await?;
+    assert!(duplicate_check.is_some(), "Same content should be detected as duplicate regardless of filename");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_unicode_content_hash() -> Result<()> {
+    // Test hash calculation with unicode content
+    let unicode_content = "Hello 世界 🌍 café naïve résumé".as_bytes();
+    
+    let hash1 = calculate_file_hash(unicode_content);
+    let hash2 = calculate_file_hash(unicode_content);
+    
+    // Hash should be consistent for unicode content
+    assert_eq!(hash1, hash2);
+    assert_eq!(hash1.len(), 64);
+    assert!(hash1.chars().all(|c| c.is_ascii_hexdigit()));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_concurrent_same_content() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    // Create user in database and get the created user
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    let test_content = b"Concurrent upload test content";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Simulate concurrent uploads of same content
+    let mut handles = Vec::new();
+    
+    for i in 0..5 {
+        let state_clone = state.clone();
+        let hash_clone = file_hash.clone();
+        
+        let handle = tokio::spawn(async move {
+            let doc = create_test_document(user_id, &format!("concurrent{}.pdf", i), hash_clone);
+            state_clone.db.create_document(doc).await
+        });
+        
+        handles.push(handle);
+    }
+    
+    // Wait for all operations and count results
+    let mut success_count = 0;
+    let mut error_count = 0;
+    
+    for handle in handles {
+        match handle.await? {
+            Ok(_) => success_count += 1,
+            Err(_) => error_count += 1,
+        }
+    }
+    
+    // Only one should succeed due to unique constraint
+    assert_eq!(success_count, 1, "Only one document should be created successfully");
+    assert_eq!(error_count, 4, "Four operations should fail due to duplicate hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_mime_type_independence() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    // Create user in database and get the created user
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    let content = b"Same content, different perceived types";
+    let hash = calculate_file_hash(content);
+    
+    // Create document as PDF
+    let mut pdf_doc = create_test_document(user_id, "test.pdf", hash.clone());
+    pdf_doc.mime_type = "application/pdf".to_string();
+    state.db.create_document(pdf_doc).await?;
+    
+    // Try to upload same content as text file - should be detected as duplicate
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &hash).await?;
+    assert!(duplicate_check.is_some(), "Same content should be detected as duplicate regardless of MIME type");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_document_upload_performance_hash_lookup() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user = create_test_user_with_suffix(&format!("upload_{}", uuid::Uuid::new_v4().simple()));
+    // Create user in database and get the created user
+    let created_user = state.db.create_user(user).await?;
+    let user_id = created_user.id;
+    
+    // Create multiple documents with different hashes
+    let mut test_hashes = Vec::new();
+    
+    for i in 0..50 {
+        let content = format!("Performance test content {}", i);
+        let hash = calculate_file_hash(content.as_bytes());
+        test_hashes.push(hash.clone());
+        
+        let doc = create_test_document(user_id, &format!("perf_test_{}.pdf", i), hash);
+        state.db.create_document(doc).await?;
+    }
+    
+    // Measure hash lookup performance
+    let start = std::time::Instant::now();
+    
+    for hash in &test_hashes {
+        let result = state.db.get_document_by_user_and_hash(user_id, hash).await?;
+        assert!(result.is_some(), "Should find document with hash: {}", hash);
+    }
+    
+    let duration = start.elapsed();
+    
+    // Hash lookups should be very fast
+    assert!(duration.as_millis() < 2000, "Hash lookups should be fast even with many documents: {:?}", duration);
+
+    Ok(())
+}
--- a/tests/hash_duplicate_detection_tests.rs
+++ b/tests/hash_duplicate_detection_tests.rs
@ -0,0 +1,276 @@
+use anyhow::Result;
+use chrono::Utc;
+use uuid::Uuid;
+use sha2::{Sha256, Digest};
+use tempfile::TempDir;
+
+use readur::{
+    db::Database,
+    file_service::FileService,
+    models::{Document, CreateUser, UserRole},
+};
+
+const TEST_DB_URL: &str = "postgresql://readur:readur@localhost:5432/readur";
+
+// Helper function to create a test user with unique identifier
+async fn create_test_user(db: &Database, username: &str) -> Result<Uuid> {
+    let unique_suffix = Uuid::new_v4().simple();
+    let user = CreateUser {
+        username: format!("{}_{}", username, unique_suffix),
+        email: format!("{}_{}@example.com", username, unique_suffix),
+        password: "password123".to_string(),
+        role: Some(UserRole::User),
+    };
+    let created_user = db.create_user(user).await?;
+    Ok(created_user.id)
+}
+
+// Helper function to calculate file hash
+fn calculate_file_hash(data: &[u8]) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    let result = hasher.finalize();
+    format!("{:x}", result)
+}
+
+// Helper function to create a test document
+fn create_test_document(user_id: Uuid, filename: &str, file_hash: Option<String>) -> Document {
+    Document {
+        id: Uuid::new_v4(),
+        filename: filename.to_string(),
+        original_filename: filename.to_string(),
+        file_path: format!("/tmp/{}", filename),
+        file_size: 1024,
+        mime_type: "application/pdf".to_string(),
+        content: None,
+        ocr_text: None,
+        ocr_confidence: None,
+        ocr_word_count: None,
+        ocr_processing_time_ms: None,
+        ocr_status: Some("pending".to_string()),
+        ocr_error: None,
+        ocr_completed_at: None,
+        tags: Vec::new(),
+        created_at: Utc::now(),
+        updated_at: Utc::now(),
+        user_id,
+        file_hash,
+    }
+}
+
+#[tokio::test]
+async fn test_get_document_by_user_and_hash_found() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user_id = create_test_user(&db, "testuser1").await?;
+    let file_hash = "abcd1234567890";
+
+    // Create a document with the hash
+    let document = create_test_document(user_id, "test.pdf", Some(file_hash.to_string()));
+    let created_doc = db.create_document(document).await?;
+
+    // Test finding the document by hash
+    let found_doc = db.get_document_by_user_and_hash(user_id, file_hash).await?;
+
+    assert!(found_doc.is_some());
+    let found_doc = found_doc.unwrap();
+    assert_eq!(found_doc.id, created_doc.id);
+    assert_eq!(found_doc.file_hash, Some(file_hash.to_string()));
+    assert_eq!(found_doc.user_id, user_id);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_get_document_by_user_and_hash_not_found() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user_id = Uuid::new_v4();
+    let non_existent_hash = "nonexistent1234567890";
+
+    // Test finding a non-existent hash
+    let found_doc = db.get_document_by_user_and_hash(user_id, non_existent_hash).await?;
+
+    assert!(found_doc.is_none());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_get_document_by_user_and_hash_different_user() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user1_id = create_test_user(&db, "testuser2").await?;
+    let user2_id = create_test_user(&db, "testuser3").await?;
+    let file_hash = "shared_hash_1234567890";
+
+    // Create a document for user1 with the hash
+    let document = create_test_document(user1_id, "test.pdf", Some(file_hash.to_string()));
+    db.create_document(document).await?;
+
+    // Test that user2 cannot find user1's document by hash
+    let found_doc = db.get_document_by_user_and_hash(user2_id, file_hash).await?;
+
+    assert!(found_doc.is_none(), "User should not be able to access another user's documents");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_duplicate_hash_prevention_same_user() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user_id = create_test_user(&db, "testuser4").await?;
+    let file_hash = "duplicate_hash_1234567890";
+
+    // Create first document with the hash
+    let document1 = create_test_document(user_id, "test1.pdf", Some(file_hash.to_string()));
+    let result1 = db.create_document(document1).await;
+    assert!(result1.is_ok(), "First document with hash should be created successfully");
+
+    // Try to create second document with same hash for same user
+    let document2 = create_test_document(user_id, "test2.pdf", Some(file_hash.to_string()));
+    let result2 = db.create_document(document2).await;
+    
+    // This should fail due to unique constraint
+    assert!(result2.is_err(), "Second document with same hash for same user should fail");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_same_hash_different_users_allowed() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user1_id = create_test_user(&db, "testuser5").await?;
+    let user2_id = create_test_user(&db, "testuser6").await?;
+    let file_hash = "shared_content_hash_1234567890";
+
+    // Create document for user1 with the hash
+    let document1 = create_test_document(user1_id, "test1.pdf", Some(file_hash.to_string()));
+    let result1 = db.create_document(document1).await;
+    assert!(result1.is_ok(), "First user's document should be created successfully");
+
+    // Create document for user2 with same hash
+    let document2 = create_test_document(user2_id, "test2.pdf", Some(file_hash.to_string()));
+    let result2 = db.create_document(document2).await;
+    assert!(result2.is_ok(), "Second user's document with same hash should be allowed");
+
+    // Verify both users can find their respective documents
+    let found_doc1 = db.get_document_by_user_and_hash(user1_id, file_hash).await?;
+    let found_doc2 = db.get_document_by_user_and_hash(user2_id, file_hash).await?;
+
+    assert!(found_doc1.is_some());
+    assert!(found_doc2.is_some());
+    assert_ne!(found_doc1.unwrap().id, found_doc2.unwrap().id);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_null_hash_allowed_multiple() -> Result<()> {
+    let db = Database::new(TEST_DB_URL).await?;
+    let user_id = create_test_user(&db, "testuser7").await?;
+
+    // Create multiple documents with null hash (should be allowed)
+    let document1 = create_test_document(user_id, "test1.pdf", None);
+    let result1 = db.create_document(document1).await;
+    assert!(result1.is_ok(), "First document with null hash should be created");
+
+    let document2 = create_test_document(user_id, "test2.pdf", None);
+    let result2 = db.create_document(document2).await;
+    assert!(result2.is_ok(), "Second document with null hash should be created");
+
+    Ok(())
+}
+
+#[test]
+fn test_calculate_file_hash_consistency() {
+    let test_data = b"Hello, World! This is test content for hash calculation.";
+    
+    // Calculate hash multiple times
+    let hash1 = calculate_file_hash(test_data);
+    let hash2 = calculate_file_hash(test_data);
+    let hash3 = calculate_file_hash(test_data);
+
+    // All hashes should be identical
+    assert_eq!(hash1, hash2);
+    assert_eq!(hash2, hash3);
+    
+    // Hash should be 64 characters (SHA256 hex)
+    assert_eq!(hash1.len(), 64);
+    
+    // Should be valid hex
+    assert!(hash1.chars().all(|c| c.is_ascii_hexdigit()));
+}
+
+#[test]
+fn test_calculate_file_hash_different_content() {
+    let data1 = b"Content 1";
+    let data2 = b"Content 2";
+    let data3 = b"content 1"; // Different case
+
+    let hash1 = calculate_file_hash(data1);
+    let hash2 = calculate_file_hash(data2);
+    let hash3 = calculate_file_hash(data3);
+
+    // All hashes should be different
+    assert_ne!(hash1, hash2);
+    assert_ne!(hash1, hash3);
+    assert_ne!(hash2, hash3);
+}
+
+#[test]
+fn test_calculate_file_hash_empty_content() {
+    let empty_data = b"";
+    let hash = calculate_file_hash(empty_data);
+    
+    // Should produce a valid hash even for empty content
+    assert_eq!(hash.len(), 64);
+    assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
+    
+    // Known SHA256 hash of empty string
+    assert_eq!(hash, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+}
+
+#[tokio::test]
+async fn test_file_service_create_document_with_hash() {
+    let temp_dir = TempDir::new().unwrap();
+    let upload_path = temp_dir.path().to_string_lossy().to_string();
+    let file_service = FileService::new(upload_path);
+    let user_id = Uuid::new_v4();
+    let test_hash = "test_hash_1234567890";
+
+    let document = file_service.create_document(
+        "test.pdf",
+        "original.pdf",
+        "/path/to/file.pdf",
+        1024,
+        "application/pdf",
+        user_id,
+        Some(test_hash.to_string()),
+    );
+
+    assert_eq!(document.filename, "test.pdf");
+    assert_eq!(document.original_filename, "original.pdf");
+    assert_eq!(document.file_hash, Some(test_hash.to_string()));
+    assert_eq!(document.user_id, user_id);
+}
+
+#[tokio::test]
+async fn test_file_service_create_document_without_hash() {
+    let temp_dir = TempDir::new().unwrap();
+    let upload_path = temp_dir.path().to_string_lossy().to_string();
+    let file_service = FileService::new(upload_path);
+    let user_id = Uuid::new_v4();
+
+    let document = file_service.create_document(
+        "test.pdf",
+        "original.pdf",
+        "/path/to/file.pdf",
+        1024,
+        "application/pdf",
+        user_id,
+        None,
+    );
+
+    assert_eq!(document.filename, "test.pdf");
+    assert_eq!(document.original_filename, "original.pdf");
+    assert_eq!(document.file_hash, None);
+    assert_eq!(document.user_id, user_id);
+}
--- a/tests/source_sync_hash_duplicate_tests.rs
+++ b/tests/source_sync_hash_duplicate_tests.rs
@ -0,0 +1,440 @@
+use anyhow::Result;
+use chrono::Utc;
+use std::sync::Arc;
+use uuid::Uuid;
+use sha2::{Sha256, Digest};
+
+use readur::{
+    AppState,
+    db::Database,
+    config::Config,
+    models::{FileInfo, Document, Source, SourceType, SourceStatus},
+};
+
+// Helper function to calculate file hash
+fn calculate_file_hash(data: &[u8]) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    let result = hasher.finalize();
+    format!("{:x}", result)
+}
+
+// Helper function to create test file info
+fn create_test_file_info(name: &str, path: &str, content: &[u8]) -> FileInfo {
+    FileInfo {
+        name: name.to_string(),
+        path: path.to_string(),
+        size: content.len() as i64,
+        last_modified: Some(Utc::now()),
+        etag: "test-etag".to_string(),
+        mime_type: "application/pdf".to_string(),
+        is_directory: false,
+    }
+}
+
+// Helper function to create test document
+fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Document {
+    Document {
+        id: Uuid::new_v4(),
+        filename: filename.to_string(),
+        original_filename: filename.to_string(),
+        file_path: format!("/tmp/{}", filename),
+        file_size: 1024,
+        mime_type: "application/pdf".to_string(),
+        content: None,
+        ocr_text: None,
+        ocr_confidence: None,
+        ocr_word_count: None,
+        ocr_processing_time_ms: None,
+        ocr_status: Some("pending".to_string()),
+        ocr_error: None,
+        ocr_completed_at: None,
+        tags: Vec::new(),
+        created_at: Utc::now(),
+        updated_at: Utc::now(),
+        user_id,
+        file_hash: Some(file_hash),
+    }
+}
+
+// Helper function to create test source
+fn create_test_source(user_id: Uuid, source_type: SourceType) -> Source {
+    Source {
+        id: Uuid::new_v4(),
+        user_id,
+        name: "Test Source".to_string(),
+        source_type,
+        config: serde_json::json!({}),
+        status: SourceStatus::Idle,
+        enabled: true,
+        last_sync_at: None,
+        last_error: None,
+        last_error_at: None,
+        total_files_synced: 0,
+        total_files_pending: 0,
+        total_size_bytes: 0,
+        created_at: Utc::now(),
+        updated_at: Utc::now(),
+    }
+}
+
+// Helper function to create a test user with unique identifier
+async fn create_test_user(db: &Database, username: &str) -> Result<Uuid> {
+    use readur::models::{CreateUser, UserRole};
+    let unique_suffix = Uuid::new_v4().simple();
+    let user = CreateUser {
+        username: format!("{}_{}", username, unique_suffix),
+        email: format!("{}_{}@example.com", username, unique_suffix),
+        password: "password123".to_string(),
+        role: Some(UserRole::User),
+    };
+    let created_user = db.create_user(user).await?;
+    Ok(created_user.id)
+}
+
+async fn create_test_app_state() -> Result<Arc<AppState>> {
+    let config = Config::from_env().unwrap_or_else(|_| {
+        Config {
+            database_url: "postgresql://readur:readur@localhost:5432/readur".to_string(),
+            server_address: "127.0.0.1:8000".to_string(),
+            jwt_secret: "test-secret".to_string(),
+            upload_path: "./test-uploads".to_string(),
+            watch_folder: "./test-watch".to_string(),
+            allowed_file_types: vec!["pdf".to_string(), "txt".to_string()],
+            watch_interval_seconds: Some(30),
+            file_stability_check_ms: Some(500),
+            max_file_age_hours: None,
+            ocr_language: "eng".to_string(),
+            concurrent_ocr_jobs: 2,
+            ocr_timeout_seconds: 60,
+            max_file_size_mb: 10,
+            memory_limit_mb: 256,
+            cpu_priority: "normal".to_string(),
+        }
+    });
+    let db = Database::new("postgresql://readur:readur@localhost:5432/readur").await?;
+    let queue_service = std::sync::Arc::new(
+        readur::ocr_queue::OcrQueueService::new(db.clone(), db.get_pool().clone(), 1)
+    );
+    
+    Ok(Arc::new(AppState {
+        db,
+        config,
+        webdav_scheduler: None,
+        source_scheduler: None,
+        queue_service,
+    }))
+}
+
+#[tokio::test]
+async fn test_source_sync_duplicate_detection_skips_duplicate() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Test content
+    let test_content = b"This is test content for source sync duplicate detection";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create existing document with same hash
+    let existing_doc = create_test_document(user_id, "existing.pdf", file_hash.clone());
+    state.db.create_document(existing_doc).await?;
+    
+    // Check if duplicate exists using the efficient method
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    
+    assert!(duplicate_check.is_some(), "Should find existing document with same hash");
+    
+    let found_doc = duplicate_check.unwrap();
+    assert_eq!(found_doc.file_hash, Some(file_hash));
+    assert_eq!(found_doc.user_id, user_id);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_duplicate_detection_processes_unique() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Test content
+    let test_content = b"This is unique content that should be processed by source sync";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Verify no existing document with this hash
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "Should not find any existing document with this hash");
+    
+    // This indicates the file would be processed normally
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_duplicate_different_users() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user1_id = create_test_user(&state.db, "source_sync_user1").await?;
+    let user2_id = create_test_user(&state.db, "source_sync_user2").await?;
+    
+    // Test content
+    let test_content = b"Shared content between different users in source sync";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create document for user1 with this hash
+    let user1_doc = create_test_document(user1_id, "user1.pdf", file_hash.clone());
+    state.db.create_document(user1_doc).await?;
+    
+    // Check that user2 doesn't see user1's document as duplicate
+    let duplicate_check = state.db.get_document_by_user_and_hash(user2_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "User2 should not see user1's document as duplicate");
+    
+    // User2 should be able to create their own document with same hash
+    let user2_doc = create_test_document(user2_id, "user2.pdf", file_hash.clone());
+    let result = state.db.create_document(user2_doc).await;
+    assert!(result.is_ok(), "User2 should be able to create document with same hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_hash_calculation_consistency() -> Result<()> {
+    let test_content = b"Test content for hash consistency in source sync";
+    
+    // Calculate hash multiple times
+    let hash1 = calculate_file_hash(test_content);
+    let hash2 = calculate_file_hash(test_content);
+    let hash3 = calculate_file_hash(test_content);
+    
+    // All hashes should be identical
+    assert_eq!(hash1, hash2);
+    assert_eq!(hash2, hash3);
+    
+    // Hash should be 64 characters (SHA256 hex)
+    assert_eq!(hash1.len(), 64);
+    
+    // Should be valid hex
+    assert!(hash1.chars().all(|c| c.is_ascii_hexdigit()));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_duplicate_detection_performance() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Create multiple documents with different hashes
+    let mut created_hashes = Vec::new();
+    
+    for i in 0..10 {
+        let content = format!("Test content number {}", i);
+        let hash = calculate_file_hash(content.as_bytes());
+        created_hashes.push(hash.clone());
+        
+        let doc = create_test_document(user_id, &format!("test{}.pdf", i), hash);
+        state.db.create_document(doc).await?;
+    }
+    
+    // Test lookup performance - should be fast even with multiple documents
+    let start = std::time::Instant::now();
+    
+    for hash in &created_hashes {
+        let result = state.db.get_document_by_user_and_hash(user_id, hash).await?;
+        assert!(result.is_some(), "Should find document with hash: {}", hash);
+    }
+    
+    let duration = start.elapsed();
+    assert!(duration.as_millis() < 1000, "Hash lookups should be fast: {:?}", duration);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_file_modification_detection() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Original content
+    let original_content = b"Original file content";
+    let original_hash = calculate_file_hash(original_content);
+    
+    // Modified content (same file, different content)
+    let modified_content = b"Modified file content";
+    let modified_hash = calculate_file_hash(modified_content);
+    
+    // Create document with original content
+    let original_doc = create_test_document(user_id, "test.pdf", original_hash.clone());
+    state.db.create_document(original_doc).await?;
+    
+    // Check original content is found
+    let original_check = state.db.get_document_by_user_and_hash(user_id, &original_hash).await?;
+    assert!(original_check.is_some(), "Should find document with original hash");
+    
+    // Check modified content is not found (different hash)
+    let modified_check = state.db.get_document_by_user_and_hash(user_id, &modified_hash).await?;
+    assert!(modified_check.is_none(), "Should not find document with modified hash");
+    
+    // Verify hashes are actually different
+    assert_ne!(original_hash, modified_hash, "Original and modified content should have different hashes");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_edge_case_empty_files() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Empty file content
+    let empty_content = b"";
+    let empty_hash = calculate_file_hash(empty_content);
+    
+    // Create document with empty content
+    let empty_doc = create_test_document(user_id, "empty.pdf", empty_hash.clone());
+    state.db.create_document(empty_doc).await?;
+    
+    // Check empty file is found
+    let empty_check = state.db.get_document_by_user_and_hash(user_id, &empty_hash).await?;
+    assert!(empty_check.is_some(), "Should find document with empty content hash");
+    
+    // Verify empty hash is the known SHA256 empty string hash
+    assert_eq!(empty_hash, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_large_file_hash_consistency() -> Result<()> {
+    // Simulate large file content
+    let large_content = vec![b'A'; 10_000_000]; // 10MB of 'A' characters
+    
+    // Calculate hash
+    let hash = calculate_file_hash(&large_content);
+    
+    // Hash should still be 64 characters
+    assert_eq!(hash.len(), 64);
+    assert!(hash.chars().all(|c| c.is_ascii_hexdigit()));
+    
+    // Calculate same hash again to ensure consistency
+    let hash2 = calculate_file_hash(&large_content);
+    assert_eq!(hash, hash2);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_binary_file_handling() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Binary content (PDF header + some binary data)
+    let mut binary_content = b"%PDF-1.4\n".to_vec();
+    binary_content.extend_from_slice(&[0u8, 1u8, 2u8, 3u8, 255u8, 254u8, 253u8]);
+    
+    let binary_hash = calculate_file_hash(&binary_content);
+    
+    // Create document with binary content
+    let binary_doc = create_test_document(user_id, "binary.pdf", binary_hash.clone());
+    state.db.create_document(binary_doc).await?;
+    
+    // Check binary file is found
+    let binary_check = state.db.get_document_by_user_and_hash(user_id, &binary_hash).await?;
+    assert!(binary_check.is_some(), "Should find document with binary content hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_unicode_filename_handling() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Unicode content and filename
+    let unicode_content = "Test content with unicode: 测试内容 🚀 café".as_bytes();
+    let unicode_hash = calculate_file_hash(unicode_content);
+    
+    // Create document with unicode filename
+    let unicode_doc = create_test_document(user_id, "测试文档🚀.pdf", unicode_hash.clone());
+    state.db.create_document(unicode_doc).await?;
+    
+    // Check unicode file is found
+    let unicode_check = state.db.get_document_by_user_and_hash(user_id, &unicode_hash).await?;
+    assert!(unicode_check.is_some(), "Should find document with unicode content hash");
+    
+    let found_doc = unicode_check.unwrap();
+    assert_eq!(found_doc.filename, "测试文档🚀.pdf");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_concurrent_hash_operations() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    // Create multiple concurrent hash lookup operations
+    let mut handles = Vec::new();
+    
+    for i in 0..20 {
+        let state_clone = state.clone();
+        let hash = format!("{}test_hash_concurrent_{}", "a".repeat(40), i);
+        
+        let handle = tokio::spawn(async move {
+            state_clone.db.get_document_by_user_and_hash(user_id, &hash).await
+        });
+        
+        handles.push(handle);
+    }
+    
+    // Wait for all concurrent operations
+    let mut results = Vec::new();
+    for handle in handles {
+        let result = handle.await??;
+        results.push(result);
+    }
+    
+    // All should return None (no documents exist with these hashes)
+    for (i, result) in results.iter().enumerate() {
+        assert!(result.is_none(), "Concurrent operation {} should return None", i);
+    }
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_source_sync_duplicate_prevention_race_condition() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "source_sync_test").await?;
+    
+    let test_hash = "race_condition_test_hash_123456789012345678901234567890123456";
+    
+    // Try to create multiple documents with same hash concurrently
+    let mut handles = Vec::new();
+    
+    for i in 0..5 {
+        let state_clone = state.clone();
+        let hash_clone = test_hash.to_string();
+        
+        let handle = tokio::spawn(async move {
+            let doc = create_test_document(user_id, &format!("test{}.pdf", i), hash_clone);
+            state_clone.db.create_document(doc).await
+        });
+        
+        handles.push(handle);
+    }
+    
+    // Wait for all operations and count successes
+    let mut success_count = 0;
+    let mut error_count = 0;
+    
+    for handle in handles {
+        match handle.await? {
+            Ok(_) => success_count += 1,
+            Err(_) => error_count += 1,
+        }
+    }
+    
+    // Only one should succeed due to unique constraint
+    assert_eq!(success_count, 1, "Only one document should be created successfully");
+    assert_eq!(error_count, 4, "Four operations should fail due to duplicate hash");
+
+    Ok(())
+}
--- a/tests/webdav_hash_duplicate_tests.rs
+++ b/tests/webdav_hash_duplicate_tests.rs
@ -0,0 +1,389 @@
+use anyhow::Result;
+use chrono::Utc;
+use std::sync::Arc;
+use uuid::Uuid;
+use sha2::{Sha256, Digest};
+
+use readur::{
+    AppState,
+    db::Database,
+    config::Config,
+    models::{FileInfo, CreateWebDAVFile, Document},
+};
+
+// Helper function to calculate file hash
+fn calculate_file_hash(data: &[u8]) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    let result = hasher.finalize();
+    format!("{:x}", result)
+}
+
+// Helper function to create test file info
+fn create_test_file_info(name: &str, path: &str, size: i64) -> FileInfo {
+    FileInfo {
+        name: name.to_string(),
+        path: path.to_string(),
+        size,
+        last_modified: Some(Utc::now()),
+        etag: "test-etag".to_string(),
+        mime_type: "application/pdf".to_string(),
+        is_directory: false,
+    }
+}
+
+// Helper function to create test document
+fn create_test_document(user_id: Uuid, filename: &str, file_hash: String) -> Document {
+    Document {
+        id: Uuid::new_v4(),
+        filename: filename.to_string(),
+        original_filename: filename.to_string(),
+        file_path: format!("/tmp/{}", filename),
+        file_size: 1024,
+        mime_type: "application/pdf".to_string(),
+        content: None,
+        ocr_text: None,
+        ocr_confidence: None,
+        ocr_word_count: None,
+        ocr_processing_time_ms: None,
+        ocr_status: Some("pending".to_string()),
+        ocr_error: None,
+        ocr_completed_at: None,
+        tags: Vec::new(),
+        created_at: Utc::now(),
+        updated_at: Utc::now(),
+        user_id,
+        file_hash: Some(file_hash),
+    }
+}
+
+// Mock WebDAV service for testing
+#[derive(Clone)]
+struct MockWebDAVService {
+    pub test_files: std::collections::HashMap<String, Vec<u8>>,
+}
+
+impl MockWebDAVService {
+    fn new() -> Self {
+        Self {
+            test_files: std::collections::HashMap::new(),
+        }
+    }
+
+    fn add_test_file(&mut self, path: &str, content: Vec<u8>) {
+        self.test_files.insert(path.to_string(), content);
+    }
+
+    async fn download_file(&self, path: &str) -> Result<Vec<u8>> {
+        self.test_files
+            .get(path)
+            .cloned()
+            .ok_or_else(|| anyhow::anyhow!("File not found: {}", path))
+    }
+}
+
+// Helper function to create a test user with unique identifier
+async fn create_test_user(db: &Database, username: &str) -> Result<Uuid> {
+    use readur::models::{CreateUser, UserRole};
+    let unique_suffix = Uuid::new_v4().simple();
+    let user = CreateUser {
+        username: format!("{}_{}", username, unique_suffix),
+        email: format!("{}_{}@example.com", username, unique_suffix),
+        password: "password123".to_string(),
+        role: Some(UserRole::User),
+    };
+    let created_user = db.create_user(user).await?;
+    Ok(created_user.id)
+}
+
+async fn create_test_app_state() -> Result<Arc<AppState>> {
+    let config = Config::from_env().unwrap_or_else(|_| {
+        Config {
+            database_url: "postgresql://readur:readur@localhost:5432/readur".to_string(),
+            server_address: "127.0.0.1:8000".to_string(),
+            jwt_secret: "test-secret".to_string(),
+            upload_path: "./test-uploads".to_string(),
+            watch_folder: "./test-watch".to_string(),
+            allowed_file_types: vec!["pdf".to_string(), "txt".to_string()],
+            watch_interval_seconds: Some(30),
+            file_stability_check_ms: Some(500),
+            max_file_age_hours: None,
+            ocr_language: "eng".to_string(),
+            concurrent_ocr_jobs: 2,
+            ocr_timeout_seconds: 60,
+            max_file_size_mb: 10,
+            memory_limit_mb: 256,
+            cpu_priority: "normal".to_string(),
+        }
+    });
+    let db = Database::new("postgresql://readur:readur@localhost:5432/readur").await?;
+    let queue_service = std::sync::Arc::new(
+        readur::ocr_queue::OcrQueueService::new(db.clone(), db.get_pool().clone(), 1)
+    );
+    
+    Ok(Arc::new(AppState {
+        db,
+        config,
+        webdav_scheduler: None,
+        source_scheduler: None,
+        queue_service,
+    }))
+}
+
+#[tokio::test]
+async fn test_webdav_sync_duplicate_detection_skips_duplicate() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    // Test content
+    let test_content = b"This is test PDF content for duplicate detection";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create existing document with same hash
+    let existing_doc = create_test_document(user_id, "existing.pdf", file_hash.clone());
+    state.db.create_document(existing_doc).await?;
+    
+    // Setup mock WebDAV service
+    let mut webdav_service = MockWebDAVService::new();
+    webdav_service.add_test_file("/test/duplicate.pdf", test_content.to_vec());
+    
+    // Create file info for the duplicate file
+    let file_info = create_test_file_info("duplicate.pdf", "/test/duplicate.pdf", test_content.len() as i64);
+    
+    // Create a mock process_single_file function (since the actual one is private)
+    // We'll test the duplicate detection logic directly
+    
+    // Check if duplicate exists using the new efficient method
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    
+    assert!(duplicate_check.is_some(), "Should find existing document with same hash");
+    
+    let found_doc = duplicate_check.unwrap();
+    assert_eq!(found_doc.file_hash, Some(file_hash));
+    assert_eq!(found_doc.user_id, user_id);
+    
+    // Verify that WebDAV tracking would record this as a duplicate
+    let webdav_file = CreateWebDAVFile {
+        user_id,
+        webdav_path: file_info.path.clone(),
+        etag: file_info.etag.clone(),
+        last_modified: file_info.last_modified,
+        file_size: file_info.size,
+        mime_type: file_info.mime_type.clone(),
+        document_id: Some(found_doc.id),
+        sync_status: "duplicate_content".to_string(),
+        sync_error: None,
+    };
+    
+    let created_webdav_file = state.db.create_or_update_webdav_file(&webdav_file).await?;
+    assert_eq!(created_webdav_file.sync_status, "duplicate_content");
+    assert_eq!(created_webdav_file.document_id, Some(found_doc.id));
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_duplicate_detection_processes_unique() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    // Test content
+    let test_content = b"This is unique PDF content that should be processed";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Verify no existing document with this hash
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "Should not find any existing document with this hash");
+    
+    // This indicates the file would be processed normally
+    // In the actual sync, this would proceed to save the file and create a new document
+    
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_duplicate_different_users() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user1_id = create_test_user(&state.db, "webdav_user1").await?;
+    let user2_id = create_test_user(&state.db, "webdav_user2").await?;
+    
+    // Test content
+    let test_content = b"Shared content between different users";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Create document for user1 with this hash
+    let user1_doc = create_test_document(user1_id, "user1.pdf", file_hash.clone());
+    state.db.create_document(user1_doc).await?;
+    
+    // Check that user2 doesn't see user1's document as duplicate
+    let duplicate_check = state.db.get_document_by_user_and_hash(user2_id, &file_hash).await?;
+    assert!(duplicate_check.is_none(), "User2 should not see user1's document as duplicate");
+    
+    // User2 should be able to create their own document with same hash
+    let user2_doc = create_test_document(user2_id, "user2.pdf", file_hash.clone());
+    let result = state.db.create_document(user2_doc).await;
+    assert!(result.is_ok(), "User2 should be able to create document with same hash");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_etag_change_detection() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    let webdav_path = "/test/updated.pdf";
+    let old_etag = "old-etag-123";
+    let new_etag = "new-etag-456";
+    
+    // Create a document first
+    let test_doc = create_test_document(user_id, "updated.pdf", "etag_test_hash_1234567890".to_string());
+    let created_doc = state.db.create_document(test_doc).await?;
+    
+    // Create initial WebDAV file record
+    let initial_webdav_file = CreateWebDAVFile {
+        user_id,
+        webdav_path: webdav_path.to_string(),
+        etag: old_etag.to_string(),
+        last_modified: Some(Utc::now()),
+        file_size: 1024,
+        mime_type: "application/pdf".to_string(),
+        document_id: Some(created_doc.id),
+        sync_status: "synced".to_string(),
+        sync_error: None,
+    };
+    
+    state.db.create_or_update_webdav_file(&initial_webdav_file).await?;
+    
+    // Check existing WebDAV file
+    let existing_file = state.db.get_webdav_file_by_path(user_id, webdav_path).await?;
+    assert!(existing_file.is_some());
+    
+    let existing_file = existing_file.unwrap();
+    assert_eq!(existing_file.etag, old_etag);
+    
+    // Simulate file with new ETag (indicating change)
+    let file_info = FileInfo {
+        name: "updated.pdf".to_string(),
+        path: webdav_path.to_string(),
+        size: 1024,
+        last_modified: Some(Utc::now()),
+        etag: new_etag.to_string(),
+        mime_type: "application/pdf".to_string(),
+        is_directory: false,
+    };
+    
+    // ETag comparison should detect change
+    assert_ne!(existing_file.etag, file_info.etag, "ETag change should be detected");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_hash_collision_prevention() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    // Create document with specific hash
+    let test_hash = "abcd1234567890123456789012345678901234567890123456789012345678";
+    let document = create_test_document(user_id, "original.pdf", test_hash.to_string());
+    state.db.create_document(document).await?;
+    
+    // Try to create another document with same hash (should fail due to unique constraint)
+    let duplicate_document = create_test_document(user_id, "duplicate.pdf", test_hash.to_string());
+    let result = state.db.create_document(duplicate_document).await;
+    
+    assert!(result.is_err(), "Should not be able to create duplicate hash for same user");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_file_content_vs_metadata_change() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    // Original content and hash
+    let original_content = b"Original file content";
+    let original_hash = calculate_file_hash(original_content);
+    
+    // Create original document
+    let original_doc = create_test_document(user_id, "test.pdf", original_hash.clone());
+    state.db.create_document(original_doc).await?;
+    
+    // Same content but different metadata (name, etc.) - should still be detected as duplicate
+    let duplicate_check = state.db.get_document_by_user_and_hash(user_id, &original_hash).await?;
+    assert!(duplicate_check.is_some(), "Same content should be detected as duplicate regardless of filename");
+    
+    // Different content - should not be duplicate
+    let different_content = b"Different file content";
+    let different_hash = calculate_file_hash(different_content);
+    
+    let unique_check = state.db.get_document_by_user_and_hash(user_id, &different_hash).await?;
+    assert!(unique_check.is_none(), "Different content should not be detected as duplicate");
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_error_handling_invalid_hash() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    // Test with invalid hash formats
+    let invalid_g_hash = "g".repeat(64);
+    let invalid_hashes = vec![
+        "", // Empty
+        "short", // Too short
+        "invalid_characters_!@#$", // Invalid characters
+        &invalid_g_hash, // Invalid hex (contains 'g')
+    ];
+    
+    for invalid_hash in invalid_hashes {
+        let result = state.db.get_document_by_user_and_hash(user_id, invalid_hash).await;
+        // Should handle gracefully - either return None or proper error
+        match result {
+            Ok(doc) => assert!(doc.is_none(), "Invalid hash should not match any document"),
+            Err(_) => {} // Acceptable to return error for invalid input
+        }
+    }
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_webdav_sync_concurrent_duplicate_detection() -> Result<()> {
+    let state = create_test_app_state().await?;
+    let user_id = create_test_user(&state.db, "webdav_test").await?;
+    
+    let test_content = b"Concurrent test content";
+    let file_hash = calculate_file_hash(test_content);
+    
+    // Simulate concurrent duplicate checks
+    let mut handles = Vec::new();
+    
+    for i in 0..5 {
+        let state_clone = state.clone();
+        let hash_clone = file_hash.clone();
+        
+        let handle = tokio::spawn(async move {
+            state_clone.db.get_document_by_user_and_hash(user_id, &hash_clone).await
+        });
+        
+        handles.push(handle);
+    }
+    
+    // Wait for all concurrent operations
+    let mut all_none = true;
+    for handle in handles {
+        let result = handle.await??;
+        if result.is_some() {
+            all_none = false;
+        }
+    }
+    
+    // Since no document exists with this hash, all should return None
+    assert!(all_none, "All concurrent checks should return None for non-existent hash");
+
+    Ok(())
+}