feat(server/client): add pagination in client, resolve race condition in server

This commit is contained in:
perf3ct 2025-06-15 21:48:59 +00:00
parent 42bc72ded4
commit a7cf67f90d
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
5 changed files with 823 additions and 25 deletions

View File

@ -22,6 +22,10 @@ import {
Divider,
CircularProgress,
Alert,
Pagination,
FormControl,
InputLabel,
Select,
} from '@mui/material';
import {
GridView as GridViewIcon,
@ -38,6 +42,8 @@ import {
CalendarToday as DateIcon,
Storage as SizeIcon,
Visibility as ViewIcon,
ChevronLeft as ChevronLeftIcon,
ChevronRight as ChevronRightIcon,
} from '@mui/icons-material';
import { documentService } from '../services/api';
import DocumentThumbnail from '../components/DocumentThumbnail';
@ -50,9 +56,23 @@ interface Document {
mime_type: string;
created_at: string;
has_ocr_text?: boolean;
ocr_status?: string;
ocr_confidence?: number;
tags: string[];
}
interface PaginationInfo {
total: number;
limit: number;
offset: number;
has_more: boolean;
}
interface DocumentsResponse {
documents: Document[];
pagination: PaginationInfo;
}
type ViewMode = 'grid' | 'list';
type SortField = 'created_at' | 'original_filename' | 'file_size';
type SortOrder = 'asc' | 'desc';
@ -60,12 +80,14 @@ type SortOrder = 'asc' | 'desc';
const DocumentsPage: React.FC = () => {
const navigate = useNavigate();
const [documents, setDocuments] = useState<Document[]>([]);
const [pagination, setPagination] = useState<PaginationInfo>({ total: 0, limit: 20, offset: 0, has_more: false });
const [loading, setLoading] = useState<boolean>(true);
const [error, setError] = useState<string | null>(null);
const [viewMode, setViewMode] = useState<ViewMode>('grid');
const [searchQuery, setSearchQuery] = useState<string>('');
const [sortBy, setSortBy] = useState<SortField>('created_at');
const [sortOrder, setSortOrder] = useState<SortOrder>('desc');
const [ocrFilter, setOcrFilter] = useState<string>('');
// Menu states
const [sortMenuAnchor, setSortMenuAnchor] = useState<null | HTMLElement>(null);
@ -74,13 +96,18 @@ const DocumentsPage: React.FC = () => {
useEffect(() => {
fetchDocuments();
}, []);
}, [pagination.limit, pagination.offset, ocrFilter]);
const fetchDocuments = async (): Promise<void> => {
try {
setLoading(true);
const response = await documentService.list(100, 0);
setDocuments(response.data);
const response = await documentService.listWithPagination(
pagination.limit,
pagination.offset,
ocrFilter || undefined
);
setDocuments(response.data.documents);
setPagination(response.data.pagination);
} catch (err) {
setError('Failed to load documents');
console.error(err);
@ -179,6 +206,39 @@ const DocumentsPage: React.FC = () => {
handleSortMenuClose();
};
const handlePageChange = (event: React.ChangeEvent<unknown>, page: number): void => {
const newOffset = (page - 1) * pagination.limit;
setPagination(prev => ({ ...prev, offset: newOffset }));
};
const handleOcrFilterChange = (event: React.ChangeEvent<HTMLInputElement>): void => {
setOcrFilter(event.target.value);
setPagination(prev => ({ ...prev, offset: 0 })); // Reset to first page when filtering
};
const getOcrStatusChip = (doc: Document) => {
if (!doc.ocr_status) return null;
const statusConfig = {
'completed': { color: 'success' as const, label: doc.ocr_confidence ? `OCR ${Math.round(doc.ocr_confidence)}%` : 'OCR Done' },
'processing': { color: 'warning' as const, label: 'Processing...' },
'failed': { color: 'error' as const, label: 'OCR Failed' },
'pending': { color: 'default' as const, label: 'Pending' },
};
const config = statusConfig[doc.ocr_status as keyof typeof statusConfig];
if (!config) return null;
return (
<Chip
label={config.label}
size="small"
color={config.color}
variant="outlined"
/>
);
};
if (loading) {
return (
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
@ -257,6 +317,22 @@ const DocumentsPage: React.FC = () => {
</ToggleButton>
</ToggleButtonGroup>
{/* OCR Filter */}
<FormControl size="small" sx={{ minWidth: 120 }}>
<InputLabel>OCR Status</InputLabel>
<Select
value={ocrFilter}
label="OCR Status"
onChange={handleOcrFilterChange}
>
<MenuItem value="">All</MenuItem>
<MenuItem value="completed">Completed</MenuItem>
<MenuItem value="processing">Processing</MenuItem>
<MenuItem value="failed">Failed</MenuItem>
<MenuItem value="pending">Pending</MenuItem>
</Select>
</FormControl>
{/* Sort Button */}
<Button
variant="outlined"
@ -418,14 +494,7 @@ const DocumentsPage: React.FC = () => {
size="small"
variant="outlined"
/>
{doc.has_ocr_text && (
<Chip
label="OCR"
size="small"
color="success"
variant="outlined"
/>
)}
{getOcrStatusChip(doc)}
</Stack>
{doc.tags.length > 0 && (
@ -489,12 +558,29 @@ const DocumentsPage: React.FC = () => {
</Grid>
)}
{/* Results count */}
<Box sx={{ mt: 3, textAlign: 'center' }}>
<Typography variant="body2" color="text.secondary">
Showing {sortedDocuments.length} of {documents.length} documents
{searchQuery && ` matching "${searchQuery}"`}
</Typography>
{/* Results count and pagination */}
<Box sx={{ mt: 3 }}>
<Box sx={{ textAlign: 'center', mb: 2 }}>
<Typography variant="body2" color="text.secondary">
Showing {pagination.offset + 1}-{Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} documents
{ocrFilter && ` with OCR status: ${ocrFilter}`}
{searchQuery && ` matching "${searchQuery}"`}
</Typography>
</Box>
{pagination.total > pagination.limit && (
<Box sx={{ display: 'flex', justifyContent: 'center' }}>
<Pagination
count={Math.ceil(pagination.total / pagination.limit)}
page={Math.floor(pagination.offset / pagination.limit) + 1}
onChange={handlePageChange}
color="primary"
size="large"
showFirstButton
showLastButton
/>
</Box>
)}
</Box>
</Box>
);

View File

@ -111,6 +111,16 @@ export const documentService = {
})
},
listWithPagination: (limit = 20, offset = 0, ocrStatus?: string) => {
const params: any = { limit, offset };
if (ocrStatus) {
params.ocr_status = ocrStatus;
}
return api.get<{documents: Document[], pagination: {total: number, limit: number, offset: number, has_more: boolean}}>('/documents', {
params,
})
},
getById: (id: string) => {
// Use the document list endpoint with pagination to find the specific document
// This is a temporary solution until we have a proper document details endpoint

141
src/db.rs
View File

@ -503,6 +503,147 @@ impl Database {
Ok(documents)
}
pub async fn get_documents_by_user_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, limit: i64, offset: i64, ocr_status: Option<&str>) -> Result<Vec<Document>> {
let rows = match (user_role == crate::models::UserRole::Admin, ocr_status) {
(true, Some(status)) => {
// Admin with OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE ocr_status = $3
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#
)
.bind(limit)
.bind(offset)
.bind(status)
.fetch_all(&self.pool)
.await?
}
(true, None) => {
// Admin without OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#
)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await?
}
(false, Some(status)) => {
// Regular user with OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE user_id = $3 AND ocr_status = $4
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#
)
.bind(limit)
.bind(offset)
.bind(user_id)
.bind(status)
.fetch_all(&self.pool)
.await?
}
(false, None) => {
// Regular user without OCR filter
sqlx::query(
r#"
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
FROM documents
WHERE user_id = $3
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
"#
)
.bind(limit)
.bind(offset)
.bind(user_id)
.fetch_all(&self.pool)
.await?
}
};
let documents = rows
.into_iter()
.map(|row| Document {
id: row.get("id"),
filename: row.get("filename"),
original_filename: row.get("original_filename"),
file_path: row.get("file_path"),
file_size: row.get("file_size"),
mime_type: row.get("mime_type"),
content: row.get("content"),
ocr_text: row.get("ocr_text"),
ocr_confidence: row.get("ocr_confidence"),
ocr_word_count: row.get("ocr_word_count"),
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
ocr_status: row.get("ocr_status"),
ocr_error: row.get("ocr_error"),
ocr_completed_at: row.get("ocr_completed_at"),
tags: row.get("tags"),
created_at: row.get("created_at"),
updated_at: row.get("updated_at"),
user_id: row.get("user_id"),
})
.collect();
Ok(documents)
}
pub async fn get_documents_count_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, ocr_status: Option<&str>) -> Result<i64> {
let count = match (user_role == crate::models::UserRole::Admin, ocr_status) {
(true, Some(status)) => {
// Admin with OCR filter
sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE ocr_status = $1"
)
.bind(status)
.fetch_one(&self.pool)
.await?
}
(true, None) => {
// Admin without OCR filter
sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents"
)
.fetch_one(&self.pool)
.await?
}
(false, Some(status)) => {
// Regular user with OCR filter
sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = $2"
)
.bind(user_id)
.bind(status)
.fetch_one(&self.pool)
.await?
}
(false, None) => {
// Regular user without OCR filter
sqlx::query_scalar::<_, i64>(
"SELECT COUNT(*) FROM documents WHERE user_id = $1"
)
.bind(user_id)
.fetch_one(&self.pool)
.await?
}
};
Ok(count)
}
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
let rows = sqlx::query(
r#"

View File

@ -22,6 +22,7 @@ use crate::{
struct PaginationQuery {
limit: Option<i64>,
offset: Option<i64>,
ocr_status: Option<String>,
}
pub fn router() -> Router<Arc<AppState>> {
@ -174,7 +175,8 @@ fn calculate_file_hash(data: &[u8]) -> String {
),
params(
("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)")
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)"),
("ocr_status" = Option<String>, Query, description = "Filter by OCR status (pending, processing, completed, failed)")
),
responses(
(status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
@ -185,17 +187,40 @@ async fn list_documents(
State(state): State<Arc<AppState>>,
auth_user: AuthUser,
Query(pagination): Query<PaginationQuery>,
) -> Result<Json<Vec<DocumentResponse>>, StatusCode> {
) -> Result<Json<serde_json::Value>, StatusCode> {
let limit = pagination.limit.unwrap_or(50);
let offset = pagination.offset.unwrap_or(0);
let documents = state
.db
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, limit, offset)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let user_id = auth_user.user.id;
let user_role = auth_user.user.role;
let ocr_filter = pagination.ocr_status.as_deref();
let response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
let (documents, total_count) = tokio::try_join!(
state.db.get_documents_by_user_with_role_and_filter(
user_id,
user_role.clone(),
limit,
offset,
ocr_filter
),
state.db.get_documents_count_with_role_and_filter(
user_id,
user_role,
ocr_filter
)
).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let documents_response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
let response = serde_json::json!({
"documents": documents_response,
"pagination": {
"total": total_count,
"limit": limit,
"offset": offset,
"has_more": offset + limit < total_count
}
});
Ok(Json(response))
}

View File

@ -0,0 +1,536 @@
/*!
* OCR Corruption Integration Tests
*
* Tests for diagnosing and reproducing the issue where FileA's OCR text
* gets corrupted when FileB is processed simultaneously.
*/
use reqwest::Client;
use serde_json::{json, Value};
use std::time::{Duration, Instant};
use tokio::time::sleep;
use uuid::Uuid;
use readur::models::{DocumentResponse, CreateUser, LoginRequest, LoginResponse};
const BASE_URL: &str = "http://localhost:8000";
const TIMEOUT: Duration = Duration::from_secs(60);
/// Test client for OCR corruption scenarios
struct OcrTestClient {
client: Client,
token: Option<String>,
user_id: Option<Uuid>,
}
impl OcrTestClient {
fn new() -> Self {
Self {
client: Client::new(),
token: None,
user_id: None,
}
}
async fn check_server_health(&self) -> Result<(), Box<dyn std::error::Error>> {
let response = self.client
.get(&format!("{}/api/health", BASE_URL))
.timeout(Duration::from_secs(5))
.send()
.await?;
if !response.status().is_success() {
return Err("Server health check failed".into());
}
Ok(())
}
async fn register_and_login(&mut self, username: &str, email: &str, password: &str) -> Result<String, Box<dyn std::error::Error>> {
let user_data = CreateUser {
username: username.to_string(),
email: email.to_string(),
password: password.to_string(),
role: Some(readur::models::UserRole::User),
};
let register_response = self.client
.post(&format!("{}/api/auth/register", BASE_URL))
.json(&user_data)
.send()
.await?;
if !register_response.status().is_success() {
return Err(format!("Registration failed: {}", register_response.text().await?).into());
}
let login_data = LoginRequest {
username: username.to_string(),
password: password.to_string(),
};
let login_response = self.client
.post(&format!("{}/api/auth/login", BASE_URL))
.json(&login_data)
.send()
.await?;
if !login_response.status().is_success() {
return Err(format!("Login failed: {}", login_response.text().await?).into());
}
let login_result: LoginResponse = login_response.json().await?;
self.token = Some(login_result.token.clone());
Ok(login_result.token)
}
/// Upload a document and return its ID and expected content
async fn upload_document(&self, content: &str, filename: &str) -> Result<(Uuid, String), Box<dyn std::error::Error>> {
let token = self.token.as_ref().ok_or("Not authenticated")?;
let part = reqwest::multipart::Part::text(content.to_string())
.file_name(filename.to_string())
.mime_str("text/plain")?;
let form = reqwest::multipart::Form::new()
.part("file", part);
let response = self.client
.post(&format!("{}/api/documents", BASE_URL))
.header("Authorization", format!("Bearer {}", token))
.multipart(form)
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Upload failed: {}", response.text().await?).into());
}
let document: DocumentResponse = response.json().await?;
Ok((document.id, content.to_string()))
}
/// Get document details including OCR status
async fn get_document_details(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
let token = self.token.as_ref().ok_or("Not authenticated")?;
let response = self.client
.get(&format!("{}/api/documents/{}/ocr", BASE_URL, doc_id))
.header("Authorization", format!("Bearer {}", token))
.send()
.await?;
if !response.status().is_success() {
return Err(format!("Failed to get document details: {}", response.text().await?).into());
}
let doc_data: Value = response.json().await?;
Ok(doc_data)
}
/// Wait for OCR to complete for a document
async fn wait_for_ocr(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
let start = Instant::now();
while start.elapsed() < TIMEOUT {
let doc_data = self.get_document_details(doc_id).await?;
match doc_data["ocr_status"].as_str() {
Some("completed") => {
println!("✅ OCR completed for document {}", doc_id);
return Ok(doc_data);
},
Some("failed") => {
return Err(format!("OCR failed for document {}: {}",
doc_id,
doc_data["ocr_error"].as_str().unwrap_or("unknown error")).into());
},
Some("processing") => {
println!("⏳ OCR still processing for document {}", doc_id);
},
_ => {
println!("📋 Document {} queued for OCR", doc_id);
}
}
sleep(Duration::from_millis(200)).await;
}
Err(format!("OCR did not complete within {} seconds for document {}", TIMEOUT.as_secs(), doc_id).into())
}
/// Upload multiple documents simultaneously and track their OCR results
async fn upload_documents_simultaneously(&self, documents: Vec<(&str, &str)>) -> Result<Vec<(Uuid, String, Value)>, Box<dyn std::error::Error>> {
let mut upload_tasks = Vec::new();
// Upload all documents simultaneously
for (content, filename) in documents {
let content_owned = content.to_string();
let filename_owned = filename.to_string();
let client_ref = self;
let task = async move {
client_ref.upload_document(&content_owned, &filename_owned).await
};
upload_tasks.push(task);
}
// Wait for all uploads to complete
let mut uploaded_docs = Vec::new();
for task in upload_tasks {
let (doc_id, expected_content) = task.await?;
uploaded_docs.push((doc_id, expected_content));
println!("📄 Uploaded document: {}", doc_id);
}
// Now wait for OCR processing on all documents
let mut ocr_tasks = Vec::new();
for (doc_id, expected_content) in uploaded_docs {
let client_ref = self;
let task = async move {
let ocr_result = client_ref.wait_for_ocr(doc_id).await?;
Ok::<(Uuid, String, Value), Box<dyn std::error::Error>>((doc_id, expected_content, ocr_result))
};
ocr_tasks.push(task);
}
// Wait for all OCR to complete
let mut results = Vec::new();
for task in ocr_tasks {
let result = task.await?;
results.push(result);
}
Ok(results)
}
}
#[tokio::test]
async fn test_concurrent_ocr_corruption() {
println!("🧪 Starting OCR corruption test with concurrent file processing");
let mut client = OcrTestClient::new();
// Check server health
if let Err(e) = client.check_server_health().await {
panic!("Server not running at {}: {}", BASE_URL, e);
}
// Create test user
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
let username = format!("ocr_corruption_test_{}", timestamp);
let email = format!("ocr_corruption_{}@test.com", timestamp);
let _token = client.register_and_login(&username, &email, "testpass123").await
.expect("Failed to register and login");
println!("✅ User registered: {}", username);
// Create test documents with distinctive content
let file_a_content = r#"
=== DOCUMENT A - IMPORTANT CONTRACT ===
Contract Number: CONTRACT-A-001
Party 1: Alice Corporation
Party 2: Bob Industries
Date: 2024-01-15
Amount: $50,000
Terms: This is the content for Document A. It contains specific legal text
that should remain associated with Document A only. Any corruption would
be immediately visible.
DOCUMENT A SIGNATURE: Alice Smith, CEO
UNIQUE IDENTIFIER FOR A: ALPHA-BRAVO-CHARLIE-001
"#;
let file_b_content = r#"
=== DOCUMENT B - TECHNICAL SPECIFICATION ===
Specification ID: SPEC-B-002
Product: Widget Manufacturing System
Version: 2.0
Author: Technical Team B
Date: 2024-01-16
This is Document B containing technical specifications. It has completely
different content from Document A. If OCR corruption occurs, Document A
might end up with this technical content instead of its contract text.
DOCUMENT B SIGNATURE: Bob Johnson, CTO
UNIQUE IDENTIFIER FOR B: DELTA-ECHO-FOXTROT-002
"#;
// Test documents to upload simultaneously
let documents = vec![
(file_a_content, "contract_a.txt"),
(file_b_content, "specification_b.txt"),
];
println!("📤 Uploading documents simultaneously...");
let results = client.upload_documents_simultaneously(documents).await
.expect("Failed to upload documents simultaneously");
println!("🔍 Analyzing OCR results for corruption...");
let mut corruption_detected = false;
for (doc_id, expected_content, ocr_result) in results {
let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
println!("\n📋 Document: {} ({})", doc_id, filename);
println!("📄 Expected content length: {} chars", expected_content.len());
println!("🔤 Actual OCR text length: {} chars", actual_ocr_text.len());
// Check for content mismatch (corruption)
if filename.contains("contract_a") {
// Document A should contain contract-specific terms
let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001")
&& actual_ocr_text.contains("Alice Corporation")
&& actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
|| actual_ocr_text.contains("Widget Manufacturing")
|| actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
if !has_contract_content {
println!("❌ CORRUPTION DETECTED: Document A missing its original contract content!");
corruption_detected = true;
}
if has_spec_content {
println!("❌ CORRUPTION DETECTED: Document A contains Document B's specification content!");
corruption_detected = true;
}
if has_contract_content && !has_spec_content {
println!("✅ Document A has correct content");
}
} else if filename.contains("specification_b") {
// Document B should contain specification-specific terms
let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
&& actual_ocr_text.contains("Widget Manufacturing")
&& actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001")
|| actual_ocr_text.contains("Alice Corporation")
|| actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
if !has_spec_content {
println!("❌ CORRUPTION DETECTED: Document B missing its original specification content!");
corruption_detected = true;
}
if has_contract_content {
println!("❌ CORRUPTION DETECTED: Document B contains Document A's contract content!");
corruption_detected = true;
}
if has_spec_content && !has_contract_content {
println!("✅ Document B has correct content");
}
}
// Additional integrity checks
if let Some(confidence) = ocr_result["ocr_confidence"].as_f64() {
println!("📊 OCR Confidence: {:.1}%", confidence);
if confidence < 50.0 {
println!("⚠️ Low OCR confidence may indicate processing issues");
}
}
if let Some(word_count) = ocr_result["ocr_word_count"].as_i64() {
println!("📝 OCR Word Count: {}", word_count);
}
if let Some(processing_time) = ocr_result["ocr_processing_time_ms"].as_i64() {
println!("⏱️ OCR Processing Time: {}ms", processing_time);
}
}
if corruption_detected {
panic!("🚨 OCR CORRUPTION DETECTED! FileA's content was overwritten with FileB's data or vice versa.");
} else {
println!("\n🎉 No OCR corruption detected - all documents retained their correct content!");
}
}
#[tokio::test]
async fn test_high_volume_concurrent_ocr() {
println!("🧪 Starting high-volume concurrent OCR test");
let mut client = OcrTestClient::new();
if let Err(e) = client.check_server_health().await {
panic!("Server not running at {}: {}", BASE_URL, e);
}
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
let username = format!("high_volume_test_{}", timestamp);
let email = format!("high_volume_{}@test.com", timestamp);
let _token = client.register_and_login(&username, &email, "testpass123").await
.expect("Failed to register and login");
// Create 5 documents with unique identifiable content
let mut documents = Vec::new();
for i in 1..=5 {
let content = format!(r#"
=== DOCUMENT {} - UNIQUE CONTENT ===
Document Number: DOC-{:03}
Unique Signature: SIGNATURE-{}-{}-{}
Content: This is document number {} with completely unique content.
Every document should retain its own unique signature and number.
Any mixing of content between documents indicates corruption.
Random data: {}
End of Document {}
"#, i, i, timestamp, i*7, i, timestamp * i, i, i);
documents.push((content, format!("doc_{}.txt", i)));
}
println!("📤 Uploading {} documents simultaneously...", documents.len());
let documents_ref: Vec<(&str, &str)> = documents.iter()
.map(|(content, filename)| (content.as_str(), filename.as_str()))
.collect();
let results = client.upload_documents_simultaneously(documents_ref).await
.expect("Failed to upload documents simultaneously");
println!("🔍 Analyzing results for content mixing...");
let mut all_signatures = Vec::new();
let mut corruption_found = false;
// Extract all unique signatures
for i in 1..=5 {
all_signatures.push(format!("SIGNATURE-{}-{}-{}", i, timestamp, i*7));
}
// Check each document for corruption
for (doc_id, expected_content, ocr_result) in results {
let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
// Determine which document this should be based on filename
if let Some(doc_num_str) = filename.strip_prefix("doc_").and_then(|s| s.strip_suffix(".txt")) {
if let Ok(doc_num) = doc_num_str.parse::<i32>() {
let expected_signature = format!("SIGNATURE-{}-{}-{}", doc_num, timestamp, doc_num*7);
println!("\n📋 Checking Document {} ({})", doc_num, doc_id);
// Check if it has its own signature
let has_own_signature = actual_ocr_text.contains(&expected_signature);
// Check if it has any other document's signature
let mut has_other_signatures = Vec::new();
for (i, sig) in all_signatures.iter().enumerate() {
if i + 1 != doc_num as usize && actual_ocr_text.contains(sig) {
has_other_signatures.push(i + 1);
}
}
if !has_own_signature {
println!("❌ CORRUPTION: Document {} missing its own signature!", doc_num);
corruption_found = true;
}
if !has_other_signatures.is_empty() {
println!("❌ CORRUPTION: Document {} contains signatures from documents: {:?}", doc_num, has_other_signatures);
corruption_found = true;
}
if has_own_signature && has_other_signatures.is_empty() {
println!("✅ Document {} has correct content", doc_num);
}
}
}
}
if corruption_found {
panic!("🚨 CONTENT CORRUPTION DETECTED in high-volume test!");
} else {
println!("\n🎉 High-volume test passed - no corruption detected!");
}
}
#[tokio::test]
async fn test_rapid_sequential_uploads() {
println!("🧪 Testing rapid sequential uploads for race conditions");
let mut client = OcrTestClient::new();
if let Err(e) = client.check_server_health().await {
panic!("Server not running at {}: {}", BASE_URL, e);
}
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis();
let username = format!("rapid_test_{}", timestamp);
let email = format!("rapid_{}@test.com", timestamp);
let _token = client.register_and_login(&username, &email, "testpass123").await
.expect("Failed to register and login");
println!("📤 Uploading documents in rapid sequence...");
// Upload documents one after another with minimal delay
let mut doc_ids = Vec::new();
let mut expected_contents = Vec::new();
for i in 1..=3 {
let content = format!("RAPID-TEST-DOCUMENT-{}-{}-UNIQUE-CONTENT", i, timestamp);
let filename = format!("rapid_{}.txt", i);
let (doc_id, expected) = client.upload_document(&content, &filename).await
.expect("Failed to upload document");
doc_ids.push(doc_id);
expected_contents.push(expected);
println!("📄 Uploaded rapid document {}: {}", i, doc_id);
// Very short delay to create timing pressure
sleep(Duration::from_millis(50)).await;
}
println!("⏳ Waiting for all OCR to complete...");
// Wait for all to complete and check for corruption
for (i, doc_id) in doc_ids.iter().enumerate() {
let ocr_result = client.wait_for_ocr(*doc_id).await
.expect("Failed to wait for OCR");
let actual_text = ocr_result["ocr_text"].as_str().unwrap_or("");
let expected_marker = format!("RAPID-TEST-DOCUMENT-{}", i + 1);
if !actual_text.contains(&expected_marker) {
panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} missing its unique marker '{}'",
doc_id, expected_marker);
}
// Check it doesn't contain other documents' markers
for j in 1..=3 {
if j != (i + 1) {
let other_marker = format!("RAPID-TEST-DOCUMENT-{}", j);
if actual_text.contains(&other_marker) {
panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} contains marker from document {}",
doc_id, j);
}
}
}
println!("✅ Rapid document {} has correct content", i + 1);
}
println!("🎉 Rapid sequential upload test passed!");
}