feat(server/client): add pagination in client, resolve race condition in server
This commit is contained in:
parent
42bc72ded4
commit
a7cf67f90d
|
|
@ -22,6 +22,10 @@ import {
|
||||||
Divider,
|
Divider,
|
||||||
CircularProgress,
|
CircularProgress,
|
||||||
Alert,
|
Alert,
|
||||||
|
Pagination,
|
||||||
|
FormControl,
|
||||||
|
InputLabel,
|
||||||
|
Select,
|
||||||
} from '@mui/material';
|
} from '@mui/material';
|
||||||
import {
|
import {
|
||||||
GridView as GridViewIcon,
|
GridView as GridViewIcon,
|
||||||
|
|
@ -38,6 +42,8 @@ import {
|
||||||
CalendarToday as DateIcon,
|
CalendarToday as DateIcon,
|
||||||
Storage as SizeIcon,
|
Storage as SizeIcon,
|
||||||
Visibility as ViewIcon,
|
Visibility as ViewIcon,
|
||||||
|
ChevronLeft as ChevronLeftIcon,
|
||||||
|
ChevronRight as ChevronRightIcon,
|
||||||
} from '@mui/icons-material';
|
} from '@mui/icons-material';
|
||||||
import { documentService } from '../services/api';
|
import { documentService } from '../services/api';
|
||||||
import DocumentThumbnail from '../components/DocumentThumbnail';
|
import DocumentThumbnail from '../components/DocumentThumbnail';
|
||||||
|
|
@ -50,9 +56,23 @@ interface Document {
|
||||||
mime_type: string;
|
mime_type: string;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
has_ocr_text?: boolean;
|
has_ocr_text?: boolean;
|
||||||
|
ocr_status?: string;
|
||||||
|
ocr_confidence?: number;
|
||||||
tags: string[];
|
tags: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface PaginationInfo {
|
||||||
|
total: number;
|
||||||
|
limit: number;
|
||||||
|
offset: number;
|
||||||
|
has_more: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DocumentsResponse {
|
||||||
|
documents: Document[];
|
||||||
|
pagination: PaginationInfo;
|
||||||
|
}
|
||||||
|
|
||||||
type ViewMode = 'grid' | 'list';
|
type ViewMode = 'grid' | 'list';
|
||||||
type SortField = 'created_at' | 'original_filename' | 'file_size';
|
type SortField = 'created_at' | 'original_filename' | 'file_size';
|
||||||
type SortOrder = 'asc' | 'desc';
|
type SortOrder = 'asc' | 'desc';
|
||||||
|
|
@ -60,12 +80,14 @@ type SortOrder = 'asc' | 'desc';
|
||||||
const DocumentsPage: React.FC = () => {
|
const DocumentsPage: React.FC = () => {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
const [documents, setDocuments] = useState<Document[]>([]);
|
const [documents, setDocuments] = useState<Document[]>([]);
|
||||||
|
const [pagination, setPagination] = useState<PaginationInfo>({ total: 0, limit: 20, offset: 0, has_more: false });
|
||||||
const [loading, setLoading] = useState<boolean>(true);
|
const [loading, setLoading] = useState<boolean>(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
const [viewMode, setViewMode] = useState<ViewMode>('grid');
|
const [viewMode, setViewMode] = useState<ViewMode>('grid');
|
||||||
const [searchQuery, setSearchQuery] = useState<string>('');
|
const [searchQuery, setSearchQuery] = useState<string>('');
|
||||||
const [sortBy, setSortBy] = useState<SortField>('created_at');
|
const [sortBy, setSortBy] = useState<SortField>('created_at');
|
||||||
const [sortOrder, setSortOrder] = useState<SortOrder>('desc');
|
const [sortOrder, setSortOrder] = useState<SortOrder>('desc');
|
||||||
|
const [ocrFilter, setOcrFilter] = useState<string>('');
|
||||||
|
|
||||||
// Menu states
|
// Menu states
|
||||||
const [sortMenuAnchor, setSortMenuAnchor] = useState<null | HTMLElement>(null);
|
const [sortMenuAnchor, setSortMenuAnchor] = useState<null | HTMLElement>(null);
|
||||||
|
|
@ -74,13 +96,18 @@ const DocumentsPage: React.FC = () => {
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchDocuments();
|
fetchDocuments();
|
||||||
}, []);
|
}, [pagination.limit, pagination.offset, ocrFilter]);
|
||||||
|
|
||||||
const fetchDocuments = async (): Promise<void> => {
|
const fetchDocuments = async (): Promise<void> => {
|
||||||
try {
|
try {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
const response = await documentService.list(100, 0);
|
const response = await documentService.listWithPagination(
|
||||||
setDocuments(response.data);
|
pagination.limit,
|
||||||
|
pagination.offset,
|
||||||
|
ocrFilter || undefined
|
||||||
|
);
|
||||||
|
setDocuments(response.data.documents);
|
||||||
|
setPagination(response.data.pagination);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError('Failed to load documents');
|
setError('Failed to load documents');
|
||||||
console.error(err);
|
console.error(err);
|
||||||
|
|
@ -179,6 +206,39 @@ const DocumentsPage: React.FC = () => {
|
||||||
handleSortMenuClose();
|
handleSortMenuClose();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handlePageChange = (event: React.ChangeEvent<unknown>, page: number): void => {
|
||||||
|
const newOffset = (page - 1) * pagination.limit;
|
||||||
|
setPagination(prev => ({ ...prev, offset: newOffset }));
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleOcrFilterChange = (event: React.ChangeEvent<HTMLInputElement>): void => {
|
||||||
|
setOcrFilter(event.target.value);
|
||||||
|
setPagination(prev => ({ ...prev, offset: 0 })); // Reset to first page when filtering
|
||||||
|
};
|
||||||
|
|
||||||
|
const getOcrStatusChip = (doc: Document) => {
|
||||||
|
if (!doc.ocr_status) return null;
|
||||||
|
|
||||||
|
const statusConfig = {
|
||||||
|
'completed': { color: 'success' as const, label: doc.ocr_confidence ? `OCR ${Math.round(doc.ocr_confidence)}%` : 'OCR Done' },
|
||||||
|
'processing': { color: 'warning' as const, label: 'Processing...' },
|
||||||
|
'failed': { color: 'error' as const, label: 'OCR Failed' },
|
||||||
|
'pending': { color: 'default' as const, label: 'Pending' },
|
||||||
|
};
|
||||||
|
|
||||||
|
const config = statusConfig[doc.ocr_status as keyof typeof statusConfig];
|
||||||
|
if (!config) return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Chip
|
||||||
|
label={config.label}
|
||||||
|
size="small"
|
||||||
|
color={config.color}
|
||||||
|
variant="outlined"
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return (
|
return (
|
||||||
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
<Box display="flex" justifyContent="center" alignItems="center" minHeight="400px">
|
||||||
|
|
@ -257,6 +317,22 @@ const DocumentsPage: React.FC = () => {
|
||||||
</ToggleButton>
|
</ToggleButton>
|
||||||
</ToggleButtonGroup>
|
</ToggleButtonGroup>
|
||||||
|
|
||||||
|
{/* OCR Filter */}
|
||||||
|
<FormControl size="small" sx={{ minWidth: 120 }}>
|
||||||
|
<InputLabel>OCR Status</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={ocrFilter}
|
||||||
|
label="OCR Status"
|
||||||
|
onChange={handleOcrFilterChange}
|
||||||
|
>
|
||||||
|
<MenuItem value="">All</MenuItem>
|
||||||
|
<MenuItem value="completed">Completed</MenuItem>
|
||||||
|
<MenuItem value="processing">Processing</MenuItem>
|
||||||
|
<MenuItem value="failed">Failed</MenuItem>
|
||||||
|
<MenuItem value="pending">Pending</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
{/* Sort Button */}
|
{/* Sort Button */}
|
||||||
<Button
|
<Button
|
||||||
variant="outlined"
|
variant="outlined"
|
||||||
|
|
@ -418,14 +494,7 @@ const DocumentsPage: React.FC = () => {
|
||||||
size="small"
|
size="small"
|
||||||
variant="outlined"
|
variant="outlined"
|
||||||
/>
|
/>
|
||||||
{doc.has_ocr_text && (
|
{getOcrStatusChip(doc)}
|
||||||
<Chip
|
|
||||||
label="OCR"
|
|
||||||
size="small"
|
|
||||||
color="success"
|
|
||||||
variant="outlined"
|
|
||||||
/>
|
|
||||||
)}
|
|
||||||
</Stack>
|
</Stack>
|
||||||
|
|
||||||
{doc.tags.length > 0 && (
|
{doc.tags.length > 0 && (
|
||||||
|
|
@ -489,12 +558,29 @@ const DocumentsPage: React.FC = () => {
|
||||||
</Grid>
|
</Grid>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Results count */}
|
{/* Results count and pagination */}
|
||||||
<Box sx={{ mt: 3, textAlign: 'center' }}>
|
<Box sx={{ mt: 3 }}>
|
||||||
<Typography variant="body2" color="text.secondary">
|
<Box sx={{ textAlign: 'center', mb: 2 }}>
|
||||||
Showing {sortedDocuments.length} of {documents.length} documents
|
<Typography variant="body2" color="text.secondary">
|
||||||
{searchQuery && ` matching "${searchQuery}"`}
|
Showing {pagination.offset + 1}-{Math.min(pagination.offset + pagination.limit, pagination.total)} of {pagination.total} documents
|
||||||
</Typography>
|
{ocrFilter && ` with OCR status: ${ocrFilter}`}
|
||||||
|
{searchQuery && ` matching "${searchQuery}"`}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{pagination.total > pagination.limit && (
|
||||||
|
<Box sx={{ display: 'flex', justifyContent: 'center' }}>
|
||||||
|
<Pagination
|
||||||
|
count={Math.ceil(pagination.total / pagination.limit)}
|
||||||
|
page={Math.floor(pagination.offset / pagination.limit) + 1}
|
||||||
|
onChange={handlePageChange}
|
||||||
|
color="primary"
|
||||||
|
size="large"
|
||||||
|
showFirstButton
|
||||||
|
showLastButton
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
</Box>
|
</Box>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,16 @@ export const documentService = {
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
|
|
||||||
|
listWithPagination: (limit = 20, offset = 0, ocrStatus?: string) => {
|
||||||
|
const params: any = { limit, offset };
|
||||||
|
if (ocrStatus) {
|
||||||
|
params.ocr_status = ocrStatus;
|
||||||
|
}
|
||||||
|
return api.get<{documents: Document[], pagination: {total: number, limit: number, offset: number, has_more: boolean}}>('/documents', {
|
||||||
|
params,
|
||||||
|
})
|
||||||
|
},
|
||||||
|
|
||||||
getById: (id: string) => {
|
getById: (id: string) => {
|
||||||
// Use the document list endpoint with pagination to find the specific document
|
// Use the document list endpoint with pagination to find the specific document
|
||||||
// This is a temporary solution until we have a proper document details endpoint
|
// This is a temporary solution until we have a proper document details endpoint
|
||||||
|
|
|
||||||
141
src/db.rs
141
src/db.rs
|
|
@ -503,6 +503,147 @@ impl Database {
|
||||||
Ok(documents)
|
Ok(documents)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn get_documents_by_user_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, limit: i64, offset: i64, ocr_status: Option<&str>) -> Result<Vec<Document>> {
|
||||||
|
let rows = match (user_role == crate::models::UserRole::Admin, ocr_status) {
|
||||||
|
(true, Some(status)) => {
|
||||||
|
// Admin with OCR filter
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||||
|
FROM documents
|
||||||
|
WHERE ocr_status = $3
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $1 OFFSET $2
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.bind(status)
|
||||||
|
.fetch_all(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(true, None) => {
|
||||||
|
// Admin without OCR filter
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||||
|
FROM documents
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $1 OFFSET $2
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.fetch_all(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(false, Some(status)) => {
|
||||||
|
// Regular user with OCR filter
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||||
|
FROM documents
|
||||||
|
WHERE user_id = $3 AND ocr_status = $4
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $1 OFFSET $2
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(status)
|
||||||
|
.fetch_all(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(false, None) => {
|
||||||
|
// Regular user without OCR filter
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, ocr_confidence, ocr_word_count, ocr_processing_time_ms, ocr_status, ocr_error, ocr_completed_at, tags, created_at, updated_at, user_id
|
||||||
|
FROM documents
|
||||||
|
WHERE user_id = $3
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $1 OFFSET $2
|
||||||
|
"#
|
||||||
|
)
|
||||||
|
.bind(limit)
|
||||||
|
.bind(offset)
|
||||||
|
.bind(user_id)
|
||||||
|
.fetch_all(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let documents = rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|row| Document {
|
||||||
|
id: row.get("id"),
|
||||||
|
filename: row.get("filename"),
|
||||||
|
original_filename: row.get("original_filename"),
|
||||||
|
file_path: row.get("file_path"),
|
||||||
|
file_size: row.get("file_size"),
|
||||||
|
mime_type: row.get("mime_type"),
|
||||||
|
content: row.get("content"),
|
||||||
|
ocr_text: row.get("ocr_text"),
|
||||||
|
ocr_confidence: row.get("ocr_confidence"),
|
||||||
|
ocr_word_count: row.get("ocr_word_count"),
|
||||||
|
ocr_processing_time_ms: row.get("ocr_processing_time_ms"),
|
||||||
|
ocr_status: row.get("ocr_status"),
|
||||||
|
ocr_error: row.get("ocr_error"),
|
||||||
|
ocr_completed_at: row.get("ocr_completed_at"),
|
||||||
|
tags: row.get("tags"),
|
||||||
|
created_at: row.get("created_at"),
|
||||||
|
updated_at: row.get("updated_at"),
|
||||||
|
user_id: row.get("user_id"),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(documents)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_documents_count_with_role_and_filter(&self, user_id: Uuid, user_role: crate::models::UserRole, ocr_status: Option<&str>) -> Result<i64> {
|
||||||
|
let count = match (user_role == crate::models::UserRole::Admin, ocr_status) {
|
||||||
|
(true, Some(status)) => {
|
||||||
|
// Admin with OCR filter
|
||||||
|
sqlx::query_scalar::<_, i64>(
|
||||||
|
"SELECT COUNT(*) FROM documents WHERE ocr_status = $1"
|
||||||
|
)
|
||||||
|
.bind(status)
|
||||||
|
.fetch_one(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(true, None) => {
|
||||||
|
// Admin without OCR filter
|
||||||
|
sqlx::query_scalar::<_, i64>(
|
||||||
|
"SELECT COUNT(*) FROM documents"
|
||||||
|
)
|
||||||
|
.fetch_one(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(false, Some(status)) => {
|
||||||
|
// Regular user with OCR filter
|
||||||
|
sqlx::query_scalar::<_, i64>(
|
||||||
|
"SELECT COUNT(*) FROM documents WHERE user_id = $1 AND ocr_status = $2"
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.bind(status)
|
||||||
|
.fetch_one(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
(false, None) => {
|
||||||
|
// Regular user without OCR filter
|
||||||
|
sqlx::query_scalar::<_, i64>(
|
||||||
|
"SELECT COUNT(*) FROM documents WHERE user_id = $1"
|
||||||
|
)
|
||||||
|
.bind(user_id)
|
||||||
|
.fetch_one(&self.pool)
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result<Vec<Document>> {
|
||||||
let rows = sqlx::query(
|
let rows = sqlx::query(
|
||||||
r#"
|
r#"
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ use crate::{
|
||||||
struct PaginationQuery {
|
struct PaginationQuery {
|
||||||
limit: Option<i64>,
|
limit: Option<i64>,
|
||||||
offset: Option<i64>,
|
offset: Option<i64>,
|
||||||
|
ocr_status: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn router() -> Router<Arc<AppState>> {
|
pub fn router() -> Router<Arc<AppState>> {
|
||||||
|
|
@ -174,7 +175,8 @@ fn calculate_file_hash(data: &[u8]) -> String {
|
||||||
),
|
),
|
||||||
params(
|
params(
|
||||||
("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
|
("limit" = Option<i64>, Query, description = "Number of documents to return (default: 50)"),
|
||||||
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)")
|
("offset" = Option<i64>, Query, description = "Number of documents to skip (default: 0)"),
|
||||||
|
("ocr_status" = Option<String>, Query, description = "Filter by OCR status (pending, processing, completed, failed)")
|
||||||
),
|
),
|
||||||
responses(
|
responses(
|
||||||
(status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
|
(status = 200, description = "List of user documents", body = Vec<DocumentResponse>),
|
||||||
|
|
@ -185,17 +187,40 @@ async fn list_documents(
|
||||||
State(state): State<Arc<AppState>>,
|
State(state): State<Arc<AppState>>,
|
||||||
auth_user: AuthUser,
|
auth_user: AuthUser,
|
||||||
Query(pagination): Query<PaginationQuery>,
|
Query(pagination): Query<PaginationQuery>,
|
||||||
) -> Result<Json<Vec<DocumentResponse>>, StatusCode> {
|
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||||
let limit = pagination.limit.unwrap_or(50);
|
let limit = pagination.limit.unwrap_or(50);
|
||||||
let offset = pagination.offset.unwrap_or(0);
|
let offset = pagination.offset.unwrap_or(0);
|
||||||
|
|
||||||
let documents = state
|
let user_id = auth_user.user.id;
|
||||||
.db
|
let user_role = auth_user.user.role;
|
||||||
.get_documents_by_user_with_role(auth_user.user.id, auth_user.user.role, limit, offset)
|
let ocr_filter = pagination.ocr_status.as_deref();
|
||||||
.await
|
|
||||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
|
||||||
|
|
||||||
let response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
|
let (documents, total_count) = tokio::try_join!(
|
||||||
|
state.db.get_documents_by_user_with_role_and_filter(
|
||||||
|
user_id,
|
||||||
|
user_role.clone(),
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
ocr_filter
|
||||||
|
),
|
||||||
|
state.db.get_documents_count_with_role_and_filter(
|
||||||
|
user_id,
|
||||||
|
user_role,
|
||||||
|
ocr_filter
|
||||||
|
)
|
||||||
|
).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||||
|
|
||||||
|
let documents_response: Vec<DocumentResponse> = documents.into_iter().map(|doc| doc.into()).collect();
|
||||||
|
|
||||||
|
let response = serde_json::json!({
|
||||||
|
"documents": documents_response,
|
||||||
|
"pagination": {
|
||||||
|
"total": total_count,
|
||||||
|
"limit": limit,
|
||||||
|
"offset": offset,
|
||||||
|
"has_more": offset + limit < total_count
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
Ok(Json(response))
|
Ok(Json(response))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,536 @@
|
||||||
|
/*!
|
||||||
|
* OCR Corruption Integration Tests
|
||||||
|
*
|
||||||
|
* Tests for diagnosing and reproducing the issue where FileA's OCR text
|
||||||
|
* gets corrupted when FileB is processed simultaneously.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use readur::models::{DocumentResponse, CreateUser, LoginRequest, LoginResponse};
|
||||||
|
|
||||||
|
const BASE_URL: &str = "http://localhost:8000";
|
||||||
|
const TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
|
/// Test client for OCR corruption scenarios
|
||||||
|
struct OcrTestClient {
|
||||||
|
client: Client,
|
||||||
|
token: Option<String>,
|
||||||
|
user_id: Option<Uuid>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OcrTestClient {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
client: Client::new(),
|
||||||
|
token: None,
|
||||||
|
user_id: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn check_server_health(&self) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let response = self.client
|
||||||
|
.get(&format!("{}/api/health", BASE_URL))
|
||||||
|
.timeout(Duration::from_secs(5))
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err("Server health check failed".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn register_and_login(&mut self, username: &str, email: &str, password: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
|
let user_data = CreateUser {
|
||||||
|
username: username.to_string(),
|
||||||
|
email: email.to_string(),
|
||||||
|
password: password.to_string(),
|
||||||
|
role: Some(readur::models::UserRole::User),
|
||||||
|
};
|
||||||
|
|
||||||
|
let register_response = self.client
|
||||||
|
.post(&format!("{}/api/auth/register", BASE_URL))
|
||||||
|
.json(&user_data)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !register_response.status().is_success() {
|
||||||
|
return Err(format!("Registration failed: {}", register_response.text().await?).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let login_data = LoginRequest {
|
||||||
|
username: username.to_string(),
|
||||||
|
password: password.to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let login_response = self.client
|
||||||
|
.post(&format!("{}/api/auth/login", BASE_URL))
|
||||||
|
.json(&login_data)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !login_response.status().is_success() {
|
||||||
|
return Err(format!("Login failed: {}", login_response.text().await?).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let login_result: LoginResponse = login_response.json().await?;
|
||||||
|
self.token = Some(login_result.token.clone());
|
||||||
|
|
||||||
|
Ok(login_result.token)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Upload a document and return its ID and expected content
|
||||||
|
async fn upload_document(&self, content: &str, filename: &str) -> Result<(Uuid, String), Box<dyn std::error::Error>> {
|
||||||
|
let token = self.token.as_ref().ok_or("Not authenticated")?;
|
||||||
|
|
||||||
|
let part = reqwest::multipart::Part::text(content.to_string())
|
||||||
|
.file_name(filename.to_string())
|
||||||
|
.mime_str("text/plain")?;
|
||||||
|
let form = reqwest::multipart::Form::new()
|
||||||
|
.part("file", part);
|
||||||
|
|
||||||
|
let response = self.client
|
||||||
|
.post(&format!("{}/api/documents", BASE_URL))
|
||||||
|
.header("Authorization", format!("Bearer {}", token))
|
||||||
|
.multipart(form)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(format!("Upload failed: {}", response.text().await?).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let document: DocumentResponse = response.json().await?;
|
||||||
|
Ok((document.id, content.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get document details including OCR status
|
||||||
|
async fn get_document_details(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
|
||||||
|
let token = self.token.as_ref().ok_or("Not authenticated")?;
|
||||||
|
|
||||||
|
let response = self.client
|
||||||
|
.get(&format!("{}/api/documents/{}/ocr", BASE_URL, doc_id))
|
||||||
|
.header("Authorization", format!("Bearer {}", token))
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(format!("Failed to get document details: {}", response.text().await?).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let doc_data: Value = response.json().await?;
|
||||||
|
Ok(doc_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wait for OCR to complete for a document
|
||||||
|
async fn wait_for_ocr(&self, doc_id: Uuid) -> Result<Value, Box<dyn std::error::Error>> {
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
while start.elapsed() < TIMEOUT {
|
||||||
|
let doc_data = self.get_document_details(doc_id).await?;
|
||||||
|
|
||||||
|
match doc_data["ocr_status"].as_str() {
|
||||||
|
Some("completed") => {
|
||||||
|
println!("✅ OCR completed for document {}", doc_id);
|
||||||
|
return Ok(doc_data);
|
||||||
|
},
|
||||||
|
Some("failed") => {
|
||||||
|
return Err(format!("OCR failed for document {}: {}",
|
||||||
|
doc_id,
|
||||||
|
doc_data["ocr_error"].as_str().unwrap_or("unknown error")).into());
|
||||||
|
},
|
||||||
|
Some("processing") => {
|
||||||
|
println!("⏳ OCR still processing for document {}", doc_id);
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
println!("📋 Document {} queued for OCR", doc_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(200)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!("OCR did not complete within {} seconds for document {}", TIMEOUT.as_secs(), doc_id).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Upload multiple documents simultaneously and track their OCR results
|
||||||
|
async fn upload_documents_simultaneously(&self, documents: Vec<(&str, &str)>) -> Result<Vec<(Uuid, String, Value)>, Box<dyn std::error::Error>> {
|
||||||
|
let mut upload_tasks = Vec::new();
|
||||||
|
|
||||||
|
// Upload all documents simultaneously
|
||||||
|
for (content, filename) in documents {
|
||||||
|
let content_owned = content.to_string();
|
||||||
|
let filename_owned = filename.to_string();
|
||||||
|
let client_ref = self;
|
||||||
|
|
||||||
|
let task = async move {
|
||||||
|
client_ref.upload_document(&content_owned, &filename_owned).await
|
||||||
|
};
|
||||||
|
|
||||||
|
upload_tasks.push(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all uploads to complete
|
||||||
|
let mut uploaded_docs = Vec::new();
|
||||||
|
for task in upload_tasks {
|
||||||
|
let (doc_id, expected_content) = task.await?;
|
||||||
|
uploaded_docs.push((doc_id, expected_content));
|
||||||
|
println!("📄 Uploaded document: {}", doc_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now wait for OCR processing on all documents
|
||||||
|
let mut ocr_tasks = Vec::new();
|
||||||
|
for (doc_id, expected_content) in uploaded_docs {
|
||||||
|
let client_ref = self;
|
||||||
|
let task = async move {
|
||||||
|
let ocr_result = client_ref.wait_for_ocr(doc_id).await?;
|
||||||
|
Ok::<(Uuid, String, Value), Box<dyn std::error::Error>>((doc_id, expected_content, ocr_result))
|
||||||
|
};
|
||||||
|
ocr_tasks.push(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all OCR to complete
|
||||||
|
let mut results = Vec::new();
|
||||||
|
for task in ocr_tasks {
|
||||||
|
let result = task.await?;
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(results)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_concurrent_ocr_corruption() {
|
||||||
|
println!("🧪 Starting OCR corruption test with concurrent file processing");
|
||||||
|
|
||||||
|
let mut client = OcrTestClient::new();
|
||||||
|
|
||||||
|
// Check server health
|
||||||
|
if let Err(e) = client.check_server_health().await {
|
||||||
|
panic!("Server not running at {}: {}", BASE_URL, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create test user
|
||||||
|
let timestamp = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_millis();
|
||||||
|
let username = format!("ocr_corruption_test_{}", timestamp);
|
||||||
|
let email = format!("ocr_corruption_{}@test.com", timestamp);
|
||||||
|
|
||||||
|
let _token = client.register_and_login(&username, &email, "testpass123").await
|
||||||
|
.expect("Failed to register and login");
|
||||||
|
|
||||||
|
println!("✅ User registered: {}", username);
|
||||||
|
|
||||||
|
// Create test documents with distinctive content
|
||||||
|
let file_a_content = r#"
|
||||||
|
=== DOCUMENT A - IMPORTANT CONTRACT ===
|
||||||
|
Contract Number: CONTRACT-A-001
|
||||||
|
Party 1: Alice Corporation
|
||||||
|
Party 2: Bob Industries
|
||||||
|
Date: 2024-01-15
|
||||||
|
Amount: $50,000
|
||||||
|
Terms: This is the content for Document A. It contains specific legal text
|
||||||
|
that should remain associated with Document A only. Any corruption would
|
||||||
|
be immediately visible.
|
||||||
|
|
||||||
|
DOCUMENT A SIGNATURE: Alice Smith, CEO
|
||||||
|
UNIQUE IDENTIFIER FOR A: ALPHA-BRAVO-CHARLIE-001
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let file_b_content = r#"
|
||||||
|
=== DOCUMENT B - TECHNICAL SPECIFICATION ===
|
||||||
|
Specification ID: SPEC-B-002
|
||||||
|
Product: Widget Manufacturing System
|
||||||
|
Version: 2.0
|
||||||
|
Author: Technical Team B
|
||||||
|
Date: 2024-01-16
|
||||||
|
|
||||||
|
This is Document B containing technical specifications. It has completely
|
||||||
|
different content from Document A. If OCR corruption occurs, Document A
|
||||||
|
might end up with this technical content instead of its contract text.
|
||||||
|
|
||||||
|
DOCUMENT B SIGNATURE: Bob Johnson, CTO
|
||||||
|
UNIQUE IDENTIFIER FOR B: DELTA-ECHO-FOXTROT-002
|
||||||
|
"#;
|
||||||
|
|
||||||
|
// Test documents to upload simultaneously
|
||||||
|
let documents = vec![
|
||||||
|
(file_a_content, "contract_a.txt"),
|
||||||
|
(file_b_content, "specification_b.txt"),
|
||||||
|
];
|
||||||
|
|
||||||
|
println!("📤 Uploading documents simultaneously...");
|
||||||
|
|
||||||
|
let results = client.upload_documents_simultaneously(documents).await
|
||||||
|
.expect("Failed to upload documents simultaneously");
|
||||||
|
|
||||||
|
println!("🔍 Analyzing OCR results for corruption...");
|
||||||
|
|
||||||
|
let mut corruption_detected = false;
|
||||||
|
|
||||||
|
for (doc_id, expected_content, ocr_result) in results {
|
||||||
|
let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
|
||||||
|
let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
|
||||||
|
|
||||||
|
println!("\n📋 Document: {} ({})", doc_id, filename);
|
||||||
|
println!("📄 Expected content length: {} chars", expected_content.len());
|
||||||
|
println!("🔤 Actual OCR text length: {} chars", actual_ocr_text.len());
|
||||||
|
|
||||||
|
// Check for content mismatch (corruption)
|
||||||
|
if filename.contains("contract_a") {
|
||||||
|
// Document A should contain contract-specific terms
|
||||||
|
let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001")
|
||||||
|
&& actual_ocr_text.contains("Alice Corporation")
|
||||||
|
&& actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
|
||||||
|
|
||||||
|
let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
|
||||||
|
|| actual_ocr_text.contains("Widget Manufacturing")
|
||||||
|
|| actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
|
||||||
|
|
||||||
|
if !has_contract_content {
|
||||||
|
println!("❌ CORRUPTION DETECTED: Document A missing its original contract content!");
|
||||||
|
corruption_detected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_spec_content {
|
||||||
|
println!("❌ CORRUPTION DETECTED: Document A contains Document B's specification content!");
|
||||||
|
corruption_detected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_contract_content && !has_spec_content {
|
||||||
|
println!("✅ Document A has correct content");
|
||||||
|
}
|
||||||
|
} else if filename.contains("specification_b") {
|
||||||
|
// Document B should contain specification-specific terms
|
||||||
|
let has_spec_content = actual_ocr_text.contains("SPEC-B-002")
|
||||||
|
&& actual_ocr_text.contains("Widget Manufacturing")
|
||||||
|
&& actual_ocr_text.contains("DELTA-ECHO-FOXTROT-002");
|
||||||
|
|
||||||
|
let has_contract_content = actual_ocr_text.contains("CONTRACT-A-001")
|
||||||
|
|| actual_ocr_text.contains("Alice Corporation")
|
||||||
|
|| actual_ocr_text.contains("ALPHA-BRAVO-CHARLIE-001");
|
||||||
|
|
||||||
|
if !has_spec_content {
|
||||||
|
println!("❌ CORRUPTION DETECTED: Document B missing its original specification content!");
|
||||||
|
corruption_detected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_contract_content {
|
||||||
|
println!("❌ CORRUPTION DETECTED: Document B contains Document A's contract content!");
|
||||||
|
corruption_detected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_spec_content && !has_contract_content {
|
||||||
|
println!("✅ Document B has correct content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Additional integrity checks
|
||||||
|
if let Some(confidence) = ocr_result["ocr_confidence"].as_f64() {
|
||||||
|
println!("📊 OCR Confidence: {:.1}%", confidence);
|
||||||
|
if confidence < 50.0 {
|
||||||
|
println!("⚠️ Low OCR confidence may indicate processing issues");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(word_count) = ocr_result["ocr_word_count"].as_i64() {
|
||||||
|
println!("📝 OCR Word Count: {}", word_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(processing_time) = ocr_result["ocr_processing_time_ms"].as_i64() {
|
||||||
|
println!("⏱️ OCR Processing Time: {}ms", processing_time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if corruption_detected {
|
||||||
|
panic!("🚨 OCR CORRUPTION DETECTED! FileA's content was overwritten with FileB's data or vice versa.");
|
||||||
|
} else {
|
||||||
|
println!("\n🎉 No OCR corruption detected - all documents retained their correct content!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_high_volume_concurrent_ocr() {
|
||||||
|
println!("🧪 Starting high-volume concurrent OCR test");
|
||||||
|
|
||||||
|
let mut client = OcrTestClient::new();
|
||||||
|
|
||||||
|
if let Err(e) = client.check_server_health().await {
|
||||||
|
panic!("Server not running at {}: {}", BASE_URL, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
let timestamp = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_millis();
|
||||||
|
let username = format!("high_volume_test_{}", timestamp);
|
||||||
|
let email = format!("high_volume_{}@test.com", timestamp);
|
||||||
|
|
||||||
|
let _token = client.register_and_login(&username, &email, "testpass123").await
|
||||||
|
.expect("Failed to register and login");
|
||||||
|
|
||||||
|
// Create 5 documents with unique identifiable content
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
for i in 1..=5 {
|
||||||
|
let content = format!(r#"
|
||||||
|
=== DOCUMENT {} - UNIQUE CONTENT ===
|
||||||
|
Document Number: DOC-{:03}
|
||||||
|
Unique Signature: SIGNATURE-{}-{}-{}
|
||||||
|
Content: This is document number {} with completely unique content.
|
||||||
|
Every document should retain its own unique signature and number.
|
||||||
|
Any mixing of content between documents indicates corruption.
|
||||||
|
Random data: {}
|
||||||
|
End of Document {}
|
||||||
|
"#, i, i, timestamp, i*7, i, timestamp * i, i, i);
|
||||||
|
|
||||||
|
documents.push((content, format!("doc_{}.txt", i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("📤 Uploading {} documents simultaneously...", documents.len());
|
||||||
|
|
||||||
|
let documents_ref: Vec<(&str, &str)> = documents.iter()
|
||||||
|
.map(|(content, filename)| (content.as_str(), filename.as_str()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let results = client.upload_documents_simultaneously(documents_ref).await
|
||||||
|
.expect("Failed to upload documents simultaneously");
|
||||||
|
|
||||||
|
println!("🔍 Analyzing results for content mixing...");
|
||||||
|
|
||||||
|
let mut all_signatures = Vec::new();
|
||||||
|
let mut corruption_found = false;
|
||||||
|
|
||||||
|
// Extract all unique signatures
|
||||||
|
for i in 1..=5 {
|
||||||
|
all_signatures.push(format!("SIGNATURE-{}-{}-{}", i, timestamp, i*7));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check each document for corruption
|
||||||
|
for (doc_id, expected_content, ocr_result) in results {
|
||||||
|
let actual_ocr_text = ocr_result["ocr_text"].as_str().unwrap_or("");
|
||||||
|
let filename = ocr_result["filename"].as_str().unwrap_or("unknown");
|
||||||
|
|
||||||
|
// Determine which document this should be based on filename
|
||||||
|
if let Some(doc_num_str) = filename.strip_prefix("doc_").and_then(|s| s.strip_suffix(".txt")) {
|
||||||
|
if let Ok(doc_num) = doc_num_str.parse::<i32>() {
|
||||||
|
let expected_signature = format!("SIGNATURE-{}-{}-{}", doc_num, timestamp, doc_num*7);
|
||||||
|
|
||||||
|
println!("\n📋 Checking Document {} ({})", doc_num, doc_id);
|
||||||
|
|
||||||
|
// Check if it has its own signature
|
||||||
|
let has_own_signature = actual_ocr_text.contains(&expected_signature);
|
||||||
|
|
||||||
|
// Check if it has any other document's signature
|
||||||
|
let mut has_other_signatures = Vec::new();
|
||||||
|
for (i, sig) in all_signatures.iter().enumerate() {
|
||||||
|
if i + 1 != doc_num as usize && actual_ocr_text.contains(sig) {
|
||||||
|
has_other_signatures.push(i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !has_own_signature {
|
||||||
|
println!("❌ CORRUPTION: Document {} missing its own signature!", doc_num);
|
||||||
|
corruption_found = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !has_other_signatures.is_empty() {
|
||||||
|
println!("❌ CORRUPTION: Document {} contains signatures from documents: {:?}", doc_num, has_other_signatures);
|
||||||
|
corruption_found = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if has_own_signature && has_other_signatures.is_empty() {
|
||||||
|
println!("✅ Document {} has correct content", doc_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if corruption_found {
|
||||||
|
panic!("🚨 CONTENT CORRUPTION DETECTED in high-volume test!");
|
||||||
|
} else {
|
||||||
|
println!("\n🎉 High-volume test passed - no corruption detected!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_rapid_sequential_uploads() {
|
||||||
|
println!("🧪 Testing rapid sequential uploads for race conditions");
|
||||||
|
|
||||||
|
let mut client = OcrTestClient::new();
|
||||||
|
|
||||||
|
if let Err(e) = client.check_server_health().await {
|
||||||
|
panic!("Server not running at {}: {}", BASE_URL, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
let timestamp = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_millis();
|
||||||
|
let username = format!("rapid_test_{}", timestamp);
|
||||||
|
let email = format!("rapid_{}@test.com", timestamp);
|
||||||
|
|
||||||
|
let _token = client.register_and_login(&username, &email, "testpass123").await
|
||||||
|
.expect("Failed to register and login");
|
||||||
|
|
||||||
|
println!("📤 Uploading documents in rapid sequence...");
|
||||||
|
|
||||||
|
// Upload documents one after another with minimal delay
|
||||||
|
let mut doc_ids = Vec::new();
|
||||||
|
let mut expected_contents = Vec::new();
|
||||||
|
|
||||||
|
for i in 1..=3 {
|
||||||
|
let content = format!("RAPID-TEST-DOCUMENT-{}-{}-UNIQUE-CONTENT", i, timestamp);
|
||||||
|
let filename = format!("rapid_{}.txt", i);
|
||||||
|
|
||||||
|
let (doc_id, expected) = client.upload_document(&content, &filename).await
|
||||||
|
.expect("Failed to upload document");
|
||||||
|
|
||||||
|
doc_ids.push(doc_id);
|
||||||
|
expected_contents.push(expected);
|
||||||
|
|
||||||
|
println!("📄 Uploaded rapid document {}: {}", i, doc_id);
|
||||||
|
|
||||||
|
// Very short delay to create timing pressure
|
||||||
|
sleep(Duration::from_millis(50)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("⏳ Waiting for all OCR to complete...");
|
||||||
|
|
||||||
|
// Wait for all to complete and check for corruption
|
||||||
|
for (i, doc_id) in doc_ids.iter().enumerate() {
|
||||||
|
let ocr_result = client.wait_for_ocr(*doc_id).await
|
||||||
|
.expect("Failed to wait for OCR");
|
||||||
|
|
||||||
|
let actual_text = ocr_result["ocr_text"].as_str().unwrap_or("");
|
||||||
|
let expected_marker = format!("RAPID-TEST-DOCUMENT-{}", i + 1);
|
||||||
|
|
||||||
|
if !actual_text.contains(&expected_marker) {
|
||||||
|
panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} missing its unique marker '{}'",
|
||||||
|
doc_id, expected_marker);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check it doesn't contain other documents' markers
|
||||||
|
for j in 1..=3 {
|
||||||
|
if j != (i + 1) {
|
||||||
|
let other_marker = format!("RAPID-TEST-DOCUMENT-{}", j);
|
||||||
|
if actual_text.contains(&other_marker) {
|
||||||
|
panic!("🚨 RAPID UPLOAD CORRUPTION: Document {} contains marker from document {}",
|
||||||
|
doc_id, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("✅ Rapid document {} has correct content", i + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("🎉 Rapid sequential upload test passed!");
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue