feat(server/client): add pagination in client, resolve race condition in server

2025-06-15 21:48:59 +00:00 · 2025-06-15 21:48:59 +00:00 · a7cf67f90d
parent 42bc72ded4
commit a7cf67f90d
5 changed files with 823 additions and 25 deletions
--- a/frontend/src/pages/DocumentsPage.tsx
+++ b/frontend/src/pages/DocumentsPage.tsx
@ -22,6 +22,10 @@ import {
  Divider,
  CircularProgress,
  Alert,
  Pagination,
  FormControl,
  InputLabel,
  Select,
 } from '@mui/material';
 import {
  GridView as GridViewIcon,
@ -38,6 +42,8 @@ import {
  CalendarToday as DateIcon,
  Storage as SizeIcon,
  Visibility as ViewIcon,
  ChevronLeft as ChevronLeftIcon,
  ChevronRight as ChevronRightIcon,
 } from '@mui/icons-material';
 import { documentService } from '../services/api';
 import DocumentThumbnail from '../components/DocumentThumbnail';
@ -50,9 +56,23 @@ interface Document {
  mime_type: string;
  created_at: string;
  has_ocr_text?: boolean;
  ocr_status?: string;
  ocr_confidence?: number;
  tags: string[];
 }
 interface PaginationInfo {
  total: number;
  limit: number;
  offset: number;
  has_more: boolean;
 }
 interface DocumentsResponse {
  documents: Document[];
  pagination: PaginationInfo;
 }
 type ViewMode = 'grid' | 'list';
 type SortField = 'created_at' | 'original_filename' | 'file_size';
 type SortOrder = 'asc' | 'desc';
@ -60,12 +80,14 @@ type SortOrder = 'asc' | 'desc';
 const DocumentsPage: React.FC = () => {
  const navigate = useNavigate();
  const [documents, setDocuments] = useState<Document[]>([]);
  const [pagination, setPagination] = useState<PaginationInfo>({ total: 0, limit: 20, offset: 0, has_more: false });
  const [loading, setLoading] = useState<boolean>(true);
  const [error, setError] = useState<string | null>(null);
  const [viewMode, setViewMode] = useState<ViewMode>('grid');
  const [searchQuery, setSearchQuery] = useState<string>('');
  const [sortBy, setSortBy] = useState<SortField>('created_at');
  const [sortOrder, setSortOrder] = useState<SortOrder>('desc');
  const [ocrFilter, setOcrFilter] = useState<string>('');
  // Menu states
  const [sortMenuAnchor, setSortMenuAnchor] = useState<null | HTMLElement>(null);
@ -74,13 +96,18 @@ const DocumentsPage: React.FC = () => {
  useEffect(() => {
    fetchDocuments();
-  }, []);
+  }, [pagination.limit, pagination.offset, ocrFilter]);
  const fetchDocuments = async (): Promise<void> => {
    try {
      setLoading(true);
-      const response = await documentService.list(100, 0);
+      const response = await documentService.listWithPagination(
-      setDocuments(response.data);
+        pagination.limit, 
        pagination.offset, 
        ocrFilter || undefined
      );
      setDocuments(response.data.documents);
      setPagination(response.data.pagination);
    } catch (err) {
      setError('Failed to load documents');
      console.error(err);
@ -179,6 +206,39 @@ const DocumentsPage: React.FC = () => {
    handleSortMenuClose();
  };
  const handlePageChange = (event: React.ChangeEvent<unknown>, page: number): void => {
    const newOffset = (page - 1) * pagination.limit;
    setPagination(prev => ({ ...prev, offset: newOffset }));
  };
  const handleOcrFilterChange = (event: React.ChangeEvent<HTMLInputElement>): void => {
    setOcrFilter(event.target.value);
    setPagination(prev => ({ ...prev, offset: 0 })); // Reset to first page when filtering
  };
  const getOcrStatusChip = (doc: Document) => {
    if (!doc.ocr_status) return null;
    const statusConfig = {
      'completed': { color: 'success' as const, label: doc.ocr_confidence ? `OCR ${Math.round(doc.ocr_confidence)}%` : 'OCR Done' },
      'processing': { color: 'warning' as const, label: 'Processing...' },
      'failed': { color: 'error' as const, label: 'OCR Failed' },
      'pending': { color: 'default' as const, label: 'Pending' },
    };
    const config = statusConfig[doc.ocr_status as keyof typeof statusConfig];
    if (!config) return null;
    return (
      <Chip 
        label={config.label}
        size="small" 
        color={config.color}
        variant="outlined"
      />
    );
  };
  if (loading) {
    return (
      <Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
@ -257,6 +317,22 @@ const DocumentsPage: React.FC = () => {
          </ToggleButton>
        </ToggleButtonGroup>
        {/* OCR Filter */}
        <FormControl size="small" sx={{ minWidth: 120 }}>
          <InputLabel>OCR Status</InputLabel>
          <Select
            value={ocrFilter}
            label="OCR Status"
            onChange={handleOcrFilterChange}
          >
            <MenuItem value="">All</MenuItem>
            <MenuItem value="completed">Completed</MenuItem>
            <MenuItem value="processing">Processing</MenuItem>
            <MenuItem value="failed">Failed</MenuItem>
            <MenuItem value="pending">Pending</MenuItem>
          </Select>
        </FormControl>
        {/* Sort Button */}
        <Button
          variant="outlined"
@ -418,14 +494,7 @@ const DocumentsPage: React.FC = () => {
                          size="small" 
                          variant="outlined"
                        />
-                        {doc.has_ocr_text && (
+                        {getOcrStatusChip(doc)}
                          <Chip 
                            label="OCR" 
                            size="small" 
                            color="success"
                            variant="outlined"
                          />
                        )}
                      </Stack>
                      {doc.tags.length > 0 && (
@ -489,12 +558,29 @@ const DocumentsPage: React.FC = () => {
        </Grid>
      )}
-      {/* Results count */}
+      {/* Results count and pagination */}
-      <Box sx={{ mt: 3, textAlign: 'center' }}>
+      <Box sx={{ mt: 3 }}>
-        <Typography variant="body2" color="text.secondary">
+        <Box sx={{ textAlign: 'center', mb: 2 }}>
-          Showing {sortedDocuments.length} of {documents.length} documents
+          <Typography variant="body2" color="text.secondary">
-          {searchQuery && ` matching "${searchQuery}"`}
+            Showing {pagination.offset + 1}-{Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} documents
-        </Typography>
+            {ocrFilter && ` with OCR status: ${ocrFilter}`}
            {searchQuery && ` matching "${searchQuery}"`}
          </Typography>
        </Box>
        {pagination.total > pagination.limit && (
          <Box sx={{ display: 'flex', justifyContent: 'center' }}>
            <Pagination
              count={Math.ceil(pagination.total / pagination.limit)}
              page={Math.floor(pagination.offset / pagination.limit) + 1}
              onChange={handlePageChange}
              color="primary"
              size="large"
              showFirstButton
              showLastButton
            />
          </Box>
        )}
      </Box>
    </Box>
  );
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@ -111,6 +111,16 @@ export const documentService = {
    })
  },
  listWithPagination: (limit = 20, offset = 0, ocrStatus?: string) => {
    const params: any = { limit, offset };
    if (ocrStatus) {
      params.ocr_status = ocrStatus;
    }
    return api.get<{documents: Document[], pagination: {total: number, limit: number, offset: number, has_more: boolean}}>('/documents', {
      params,
    })
  },
  getById: (id: string) => {
    // Use the document list endpoint with pagination to find the specific document
    // This is a temporary solution until we have a proper document details endpoint
--- a/src/db.rs
+++ b/src/db.rs
@ -503,6 +503,147 @@ impl Database {
        Ok(documents)
    }
    pub async fn get_documents_by_user_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, limit: i64, offset: i64, ocr_status: Option<&str>) -> Result<Vec<Document>> {
        let rows = match (user_role == crate::models::UserRole::Admin, ocr_status) {
            (true, Some(status)) => {
                // Admin with OCR filter
                sqlx::query(
                    r#"
                    SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
                    FROM documents 
                    WHERE ocr_status = $3
                    ORDER BY created_at DESC 
                    LIMIT $1 OFFSET $2
                    "#
                )
                .bind(limit)
                .bind(offset)
                .bind(status)
                .fetch_all(&self.pool)
                .await?
            }
            (true, None) => {
                // Admin without OCR filter
                sqlx::query(
                    r#"
                    SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
                    FROM documents 
                    ORDER BY created_at DESC 
                    LIMIT $1 OFFSET $2
                    "#
                )
                .bind(limit)
                .bind(offset)
                .fetch_all(&self.pool)
                .await?
            }
            (false, Some(status)) => {
                // Regular user with OCR filter
                sqlx::query(
                    r#"
                    SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
                    FROM documents 
                    WHERE user_id = $3 AND ocr_status = $4
                    ORDER BY created_at DESC 
                    LIMIT $1 OFFSET $2
                    "#
                )
                .bind(limit)
                .bind(offset)
                .bind(user_id)
                .bind(status)
                .fetch_all(&self.pool)
                .await?
            }
            (false, None) => {
                // Regular user without OCR filter
                sqlx::query(
                    r#"
                    SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
                    FROM documents 
                    WHERE user_id = $3 
                    ORDER BY created_at DESC 
                    LIMIT $1 OFFSET $2
                    "#
                )
                .bind(limit)
                .bind(offset)
                .bind(user_id)
                .fetch_all(&self.pool)
                .await?
            }
        };
        let documents = rows
            .into_iter()
            .map(|row| Document {
                id: row.get("id"),
                filename: row.get("filename"),
                original_filename: row.get("original_filename"),
                file_path: row.get("file_path"),
                file_size: row.get("file_size"),
                mime_type: row.get("mime_type"),
                content: row.get("content"),
                ocr_text: row.get("ocr_text"),
                ocr_confidence: row.get("ocr_confidence"),
                ocr_word_count: row.get("ocr_word_count"),
                ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
                ocr_status: row.get("ocr_status"),
                ocr_error: row.get("ocr_error"),
                ocr_completed_at: row.get("ocr_completed_at"),
                tags: row.get("tags"),
                created_at: row.get("created_at"),
                updated_at: row.get("updated_at"),
                user_id: row.get("user_id"),
            })
            .collect();
        Ok(documents)
    }
    pub async fn get_documents_count_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, ocr_status: Option<&str>) -> Result<i64> {
        let count = match (user_role == crate::models::UserRole::Admin, ocr_status) {
            (true, Some(status)) => {
                // Admin with OCR filter
                sqlx::query_scalar::<_, i64>(
                    "SELECT COUNT(*) FROM documents WHERE ocr_status = $1"
                )
                .bind(status)
                .fetch_one(&self.pool)
                .await?
            }
            (true, None) => {
                // Admin without OCR filter
                sqlx::query_scalar::<_, i64>(
                    "SELECT COUNT(*) FROM documents"
                )
                .fetch_one(&self.pool)
                .await?
            }
            (false, Some(status)) => {
                // Regular user with OCR filter
                sqlx::query_scalar::<_, i64>(
                    "SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = $2"
                )
                .bind(user_id)
                .bind(status)
                .fetch_one(&self.pool)
                .await?
            }
            (false, None) => {
                // Regular user without OCR filter
                sqlx::query_scalar::<_, i64>(
                    "SELECT COUNT(*) FROM documents WHERE user_id = $1"
                )
                .bind(user_id)
                .fetch_one(&self.pool)
                .await?
            }
        };
        Ok(count)
    }
    pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
        let rows = sqlx::query(
            r#"
--- a/src/routes/documents.rs
+++ b/src/routes/documents.rs
@ -22,6 +22,7 @@ use crate::{
 struct PaginationQuery {
    limit: Option<i64>,
    offset: Option<i64>,
    ocr_status: Option<String>,
 }
 pub fn router() -> Router<Arc<AppState>> {
@ -174,7 +175,8 @@ fn calculate_file_hash(data: &[u8]) -> String {
    ),
    params(
        ("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
-        ("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)")
+        ("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)"),
        ("ocr_status" = Option<String>, Query, description = "Filter by OCR status (pending, processing, completed, failed)")
    ),
    responses(
        (status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
@ -185,17 +187,40 @@ async fn list_documents(
    State(state): State<Arc<AppState>>,
    auth_user: AuthUser,
    Query(pagination): Query<PaginationQuery>,
-) -> Result<Json<Vec<DocumentResponse>>, StatusCode> {
+) -> Result<Json<serde_json::Value>, StatusCode> {
    let limit = pagination.limit.unwrap_or(50);
    let offset = pagination.offset.unwrap_or(0);
-    let documents = state
+    let user_id = auth_user.user.id;
-        .db
+    let user_role = auth_user.user.role;
-        .get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, limit, offset)
+    let ocr_filter = pagination.ocr_status.as_deref();
        .await
        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    let response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
+    let (documents, total_count) = tokio::try_join!(
        state.db.get_documents_by_user_with_role_and_filter(
            user_id, 
            user_role.clone(), 
            limit, 
            offset, 
            ocr_filter
        ),
        state.db.get_documents_count_with_role_and_filter(
            user_id,
            user_role,
            ocr_filter
        )
    ).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
    let documents_response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
    let response = serde_json::json!({
        "documents": documents_response,
        "pagination": {
            "total": total_count,
            "limit": limit,
            "offset": offset,
            "has_more": offset + limit < total_count
        }
    });
    Ok(Json(response))
 }
--- a/tests/ocr_corruption_tests.rs
+++ b/tests/ocr_corruption_tests.rs
@ -0,0 +1,536 @@
 /*!
 * OCR Corruption Integration Tests
 * 
 * Tests for diagnosing and reproducing the issue where FileA's OCR text
 * gets corrupted when FileB is processed simultaneously.
 */
 use reqwest::Client;
 use serde_json::{json, Value};
 use std::time::{Duration, Instant};
 use tokio::time::sleep;
 use uuid::Uuid;
 use readur::models::{DocumentResponse, CreateUser, LoginRequest, LoginResponse};
 const BASE_URL: &str = "http://localhost:8000";
 const TIMEOUT: Duration = Duration::from_secs(60);
 /// Test client for OCR corruption scenarios
 struct OcrTestClient {
    client: Client,
    token: Option<String>,
    user_id: Option<Uuid>,
 }
 impl OcrTestClient {
    fn new() -> Self {
        Self {
            client: Client::new(),
            token: None,
            user_id: None,
        }
    }
    async fn check_server_health(&self) -> Result<(), Box<dyn std::error::Error>> {
        let response = self.client
            .get(&format!("{}/api/health", BASE_URL))
            .timeout(Duration::from_secs(5))
            .send()
            .await?;
        if !response.status().is_success() {
            return Err("Server health check failed".into());
        }
        Ok(())
    }
    async fn register_and_login(&mut self, username: &str, email: &str, password: &str) -> Result<String, Box<dyn std::error::Error>> {
        let user_data = CreateUser {
            username: username.to_string(),
            email: email.to_string(),
            password: password.to_string(),
            role: Some(readur::models::UserRole::User),
        };
        let register_response = self.client
            .post(&format!("{}/api/auth/register", BASE_URL))
            .json(&user_data)
            .send()
            .await?;
        if !register_response.status().is_success() {
            return Err(format!("Registration failed: {}", register_response.text().await?).into());
        }
        let login_data = LoginRequest {
            username: username.to_string(),
            password: password.to_string(),
        };
        let login_response = self.client
            .post(&format!("{}/api/auth/login", BASE_URL))
            .json(&login_data)
            .send()
            .await?;
        if !login_response.status().is_success() {
            return Err(format!("Login failed: {}", login_response.text().await?).into());
        }
        let login_result: LoginResponse = login_response.json().await?;
        self.token = Some(login_result.token.clone());
        Ok(login_result.token)
    }
    /// Upload a document and return its ID and expected content
    async fn upload_document(&self, content: &str, filename: &str) -> Result<(Uuid, String), Box<dyn std::error::Error>> {
        let token = self.token.as_ref().ok_or("Not authenticated")?;
        let part = reqwest::multipart::Part::text(content.to_string())
            .file_name(filename.to_string())
            .mime_str("text/plain")?;
        let form = reqwest::multipart::Form::new()
            .part("file", part);
        let response = self.client
            .post(&format!("{}/api/documents", BASE_URL))
            .header("Authorization", format!("Bearer {}", token))
            .multipart(form)
            .send()
            .await?;
        if !response.status().is_success() {
            return Err(format!("Upload failed: {}", response.text().await?).into());
        }
        let document: DocumentResponse = response.json().await?;
        Ok((document.id, content.to_string()))
    }
    /// Get document details including OCR status
    async fn get_document_details(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
        let token = self.token.as_ref().ok_or("Not authenticated")?;
        let response = self.client
            .get(&format!("{}/api/documents/{}/ocr", BASE_URL, doc_id))
            .header("Authorization", format!("Bearer {}", token))
            .send()
            .await?;
        if !response.status().is_success() {
            return Err(format!("Failed to get document details: {}", response.text().await?).into());
        }
        let doc_data: Value = response.json().await?;
        Ok(doc_data)
    }
    /// Wait for OCR to complete for a document
    async fn wait_for_ocr(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
        let start = Instant::now();
        while start.elapsed() < TIMEOUT {
            let doc_data = self.get_document_details(doc_id).await?;
            match doc_data["ocr_status"].as_str() {
                Some("completed") => {
                    println!("✅ OCR completed for document {}", doc_id);
                    return Ok(doc_data);
                },
                Some("failed") => {
                    return Err(format!("OCR failed for document {}: {}", 
                                     doc_id, 
                                     doc_data["ocr_error"].as_str().unwrap_or("unknown error")).into());
                },
                Some("processing") => {
                    println!("⏳ OCR still processing for document {}", doc_id);
                },
                _ => {
                    println!("📋 Document {} queued for OCR", doc_id);
                }
            }
            sleep(Duration::from_millis(200)).await;
        }
        Err(format!("OCR did not complete within {} seconds for document {}", TIMEOUT.as_secs(), doc_id).into())
    }
    /// Upload multiple documents simultaneously and track their OCR results
    async fn upload_documents_simultaneously(&self, documents: Vec<(&str, &str)>) -> Result<Vec<(Uuid, String, Value)>, Box<dyn std::error::Error>> {
        let mut upload_tasks = Vec::new();
        // Upload all documents simultaneously
        for (content, filename) in documents {
            let content_owned = content.to_string();
            let filename_owned = filename.to_string();
            let client_ref = self;
            let task = async move {
                client_ref.upload_document(&content_owned, &filename_owned).await
            };
            upload_tasks.push(task);
        }
        // Wait for all uploads to complete
        let mut uploaded_docs = Vec::new();
        for task in upload_tasks {
            let (doc_id, expected_content) = task.await?;
            uploaded_docs.push((doc_id, expected_content));
            println!("📄 Uploaded document: {}", doc_id);
        }
        // Now wait for OCR processing on all documents
        let mut ocr_tasks = Vec::new();
        for (doc_id, expected_content) in uploaded_docs {
            let client_ref = self;
            let task = async move {
                let ocr_result = client_ref.wait_for_ocr(doc_id).await?;
                Ok::<(Uuid, String, Value), Box<dyn std::error::Error>>((doc_id, expected_content, ocr_result))
            };
            ocr_tasks.push(task);
        }
        // Wait for all OCR to complete
        let mut results = Vec::new();
        for task in ocr_tasks {
            let result = task.await?;
            results.push(result);
        }
        Ok(results)
    }
 }
 #[tokio::test]
 async fn test_concurrent_ocr_corruption() {
    println!("🧪 Starting OCR corruption test with concurrent file processing");
    let mut client = OcrTestClient::new();
    // Check server health
    if let Err(e) = client.check_server_health().await {
        panic!("Server not running at {}: {}", BASE_URL, e);
    }
    // Create test user
    let timestamp = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap()
        .as_millis();
    let username = format!("ocr_corruption_test_{}", timestamp);
    let email = format!("ocr_corruption_{}@test.com", timestamp);
    let _token = client.register_and_login(&username, &email, "testpass123").await
        .expect("Failed to register and login");
    println!("✅ User registered: {}", username);
    // Create test documents with distinctive content
    let file_a_content = r#"
 === DOCUMENT A - IMPORTANT CONTRACT ===
 Contract Number: CONTRACT-A-001
 Party 1: Alice Corporation
 Party 2: Bob Industries
 Date: 2024-01-15
 Amount: $50,000
 Terms: This is the content for Document A. It contains specific legal text 
 that should remain associated with Document A only. Any corruption would 
 be immediately visible.
 DOCUMENT A SIGNATURE: Alice Smith, CEO
 UNIQUE IDENTIFIER FOR A: ALPHA-BRAVO-CHARLIE-001
 "#;
    let file_b_content = r#"
 === DOCUMENT B - TECHNICAL SPECIFICATION ===
 Specification ID: SPEC-B-002
 Product: Widget Manufacturing System
 Version: 2.0
 Author: Technical Team B
 Date: 2024-01-16
 This is Document B containing technical specifications. It has completely 
 different content from Document A. If OCR corruption occurs, Document A 
 might end up with this technical content instead of its contract text.
 DOCUMENT B SIGNATURE: Bob Johnson, CTO
 UNIQUE IDENTIFIER FOR B: DELTA-ECHO-FOXTROT-002
 "#;
    // Test documents to upload simultaneously
    let documents = vec![
        (file_a_content, "contract_a.txt"),
        (file_b_content, "specification_b.txt"),
    ];
    println!("📤 Uploading documents simultaneously...");
    let results = client.upload_documents_simultaneously(documents).await
        .expect("Failed to upload documents simultaneously");
    println!("🔍 Analyzing OCR results for corruption...");
    let mut corruption_detected = false;
    for (doc_id, expected_content, ocr_result) in results {
        let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
        let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
        println!("\n📋 Document: {} ({})", doc_id, filename);
        println!("📄 Expected content length: {} chars", expected_content.len());
        println!("🔤 Actual OCR text length: {} chars", actual_ocr_text.len());
        // Check for content mismatch (corruption)
        if filename.contains("contract_a") {
            // Document A should contain contract-specific terms
            let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001") 
                && actual_ocr_text.contains("Alice Corporation")
                && actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
            let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
                || actual_ocr_text.contains("Widget Manufacturing")
                || actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
            if !has_contract_content {
                println!("❌ CORRUPTION DETECTED: Document A missing its original contract content!");
                corruption_detected = true;
            }
            if has_spec_content {
                println!("❌ CORRUPTION DETECTED: Document A contains Document B's specification content!");
                corruption_detected = true;
            }
            if has_contract_content && !has_spec_content {
                println!("✅ Document A has correct content");
            }
        } else if filename.contains("specification_b") {
            // Document B should contain specification-specific terms
            let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
                && actual_ocr_text.contains("Widget Manufacturing")
                && actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
            let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001")
                || actual_ocr_text.contains("Alice Corporation")
                || actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
            if !has_spec_content {
                println!("❌ CORRUPTION DETECTED: Document B missing its original specification content!");
                corruption_detected = true;
            }
            if has_contract_content {
                println!("❌ CORRUPTION DETECTED: Document B contains Document A's contract content!");
                corruption_detected = true;
            }
            if has_spec_content && !has_contract_content {
                println!("✅ Document B has correct content");
            }
        }
        // Additional integrity checks
        if let Some(confidence) = ocr_result["ocr_confidence"].as_f64() {
            println!("📊 OCR Confidence: {:.1}%", confidence);
            if confidence < 50.0 {
                println!("⚠️  Low OCR confidence may indicate processing issues");
            }
        }
        if let Some(word_count) = ocr_result["ocr_word_count"].as_i64() {
            println!("📝 OCR Word Count: {}", word_count);
        }
        if let Some(processing_time) = ocr_result["ocr_processing_time_ms"].as_i64() {
            println!("⏱️  OCR Processing Time: {}ms", processing_time);
        }
    }
    if corruption_detected {
        panic!("🚨 OCR CORRUPTION DETECTED! FileA's content was overwritten with FileB's data or vice versa.");
    } else {
        println!("\n🎉 No OCR corruption detected - all documents retained their correct content!");
    }
 }
 #[tokio::test]
 async fn test_high_volume_concurrent_ocr() {
    println!("🧪 Starting high-volume concurrent OCR test");
    let mut client = OcrTestClient::new();
    if let Err(e) = client.check_server_health().await {
        panic!("Server not running at {}: {}", BASE_URL, e);
    }
    let timestamp = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap()
        .as_millis();
    let username = format!("high_volume_test_{}", timestamp);
    let email = format!("high_volume_{}@test.com", timestamp);
    let _token = client.register_and_login(&username, &email, "testpass123").await
        .expect("Failed to register and login");
    // Create 5 documents with unique identifiable content
    let mut documents = Vec::new();
    for i in 1..=5 {
        let content = format!(r#"
 === DOCUMENT {} - UNIQUE CONTENT ===
 Document Number: DOC-{:03}
 Unique Signature: SIGNATURE-{}-{}-{}
 Content: This is document number {} with completely unique content.
 Every document should retain its own unique signature and number.
 Any mixing of content between documents indicates corruption.
 Random data: {}
 End of Document {}
 "#, i, i, timestamp, i*7, i, timestamp * i, i, i);
        documents.push((content, format!("doc_{}.txt", i)));
    }
    println!("📤 Uploading {} documents simultaneously...", documents.len());
    let documents_ref: Vec<(&str, &str)> = documents.iter()
        .map(|(content, filename)| (content.as_str(), filename.as_str()))
        .collect();
    let results = client.upload_documents_simultaneously(documents_ref).await
        .expect("Failed to upload documents simultaneously");
    println!("🔍 Analyzing results for content mixing...");
    let mut all_signatures = Vec::new();
    let mut corruption_found = false;
    // Extract all unique signatures
    for i in 1..=5 {
        all_signatures.push(format!("SIGNATURE-{}-{}-{}", i, timestamp, i*7));
    }
    // Check each document for corruption
    for (doc_id, expected_content, ocr_result) in results {
        let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
        let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
        // Determine which document this should be based on filename
        if let Some(doc_num_str) = filename.strip_prefix("doc_").and_then(|s| s.strip_suffix(".txt")) {
            if let Ok(doc_num) = doc_num_str.parse::<i32>() {
                let expected_signature = format!("SIGNATURE-{}-{}-{}", doc_num, timestamp, doc_num*7);
                println!("\n📋 Checking Document {} ({})", doc_num, doc_id);
                // Check if it has its own signature
                let has_own_signature = actual_ocr_text.contains(&expected_signature);
                // Check if it has any other document's signature
                let mut has_other_signatures = Vec::new();
                for (i, sig) in all_signatures.iter().enumerate() {
                    if i + 1 != doc_num as usize && actual_ocr_text.contains(sig) {
                        has_other_signatures.push(i + 1);
                    }
                }
                if !has_own_signature {
                    println!("❌ CORRUPTION: Document {} missing its own signature!", doc_num);
                    corruption_found = true;
                }
                if !has_other_signatures.is_empty() {
                    println!("❌ CORRUPTION: Document {} contains signatures from documents: {:?}", doc_num, has_other_signatures);
                    corruption_found = true;
                }
                if has_own_signature && has_other_signatures.is_empty() {
                    println!("✅ Document {} has correct content", doc_num);
                }
            }
        }
    }
    if corruption_found {
        panic!("🚨 CONTENT CORRUPTION DETECTED in high-volume test!");
    } else {
        println!("\n🎉 High-volume test passed - no corruption detected!");
    }
 }
 #[tokio::test]
 async fn test_rapid_sequential_uploads() {
    println!("🧪 Testing rapid sequential uploads for race conditions");
    let mut client = OcrTestClient::new();
    if let Err(e) = client.check_server_health().await {
        panic!("Server not running at {}: {}", BASE_URL, e);
    }
    let timestamp = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .unwrap()
        .as_millis();
    let username = format!("rapid_test_{}", timestamp);
    let email = format!("rapid_{}@test.com", timestamp);
    let _token = client.register_and_login(&username, &email, "testpass123").await
        .expect("Failed to register and login");
    println!("📤 Uploading documents in rapid sequence...");
    // Upload documents one after another with minimal delay
    let mut doc_ids = Vec::new();
    let mut expected_contents = Vec::new();
    for i in 1..=3 {
        let content = format!("RAPID-TEST-DOCUMENT-{}-{}-UNIQUE-CONTENT", i, timestamp);
        let filename = format!("rapid_{}.txt", i);
        let (doc_id, expected) = client.upload_document(&content, &filename).await
            .expect("Failed to upload document");
        doc_ids.push(doc_id);
        expected_contents.push(expected);
        println!("📄 Uploaded rapid document {}: {}", i, doc_id);
        // Very short delay to create timing pressure
        sleep(Duration::from_millis(50)).await;
    }
    println!("⏳ Waiting for all OCR to complete...");
    // Wait for all to complete and check for corruption
    for (i, doc_id) in doc_ids.iter().enumerate() {
        let ocr_result = client.wait_for_ocr(*doc_id).await
            .expect("Failed to wait for OCR");
        let actual_text = ocr_result["ocr_text"].as_str().unwrap_or("");
        let expected_marker = format!("RAPID-TEST-DOCUMENT-{}", i + 1);
        if !actual_text.contains(&expected_marker) {
            panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} missing its unique marker '{}'", 
                   doc_id, expected_marker);
        }
        // Check it doesn't contain other documents' markers
        for j in 1..=3 {
            if j != (i + 1) {
                let other_marker = format!("RAPID-TEST-DOCUMENT-{}", j);
                if actual_text.contains(&other_marker) {
                    panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} contains marker from document {}", 
                           doc_id, j);
                }
            }
        }
        println!("✅ Rapid document {} has correct content", i + 1);
    }
    println!("🎉 Rapid sequential upload test passed!");
 }