fix(ci): try to fix the ocr_corruption_tests
This commit is contained in:
parent
48ab02b3a3
commit
44878acb0e
|
|
@ -163,45 +163,110 @@ impl OcrTestClient {
|
||||||
|
|
||||||
/// Upload multiple documents simultaneously and track their OCR results
|
/// Upload multiple documents simultaneously and track their OCR results
|
||||||
async fn upload_documents_simultaneously(&self, documents: Vec<(&str, &str)>) -> Result<Vec<(Uuid, String, Value)>, Box<dyn std::error::Error>> {
|
async fn upload_documents_simultaneously(&self, documents: Vec<(&str, &str)>) -> Result<Vec<(Uuid, String, Value)>, Box<dyn std::error::Error>> {
|
||||||
let mut upload_tasks = Vec::new();
|
use futures::future::join_all;
|
||||||
|
|
||||||
// Upload all documents simultaneously
|
let token = self.token.as_ref().ok_or("Not authenticated")?.clone();
|
||||||
for (content, filename) in documents {
|
|
||||||
|
// Create upload futures
|
||||||
|
let upload_futures: Vec<_> = documents.into_iter()
|
||||||
|
.map(|(content, filename)| {
|
||||||
let content_owned = content.to_string();
|
let content_owned = content.to_string();
|
||||||
let filename_owned = filename.to_string();
|
let filename_owned = filename.to_string();
|
||||||
let client_ref = self;
|
let client = self.client.clone();
|
||||||
|
let token = token.clone();
|
||||||
|
let base_url = get_base_url();
|
||||||
|
|
||||||
let task = async move {
|
async move {
|
||||||
client_ref.upload_document(&content_owned, &filename_owned).await
|
// Create multipart form
|
||||||
};
|
let form = reqwest::multipart::Form::new()
|
||||||
|
.text("file", content_owned.clone())
|
||||||
|
.text("filename", filename_owned);
|
||||||
|
|
||||||
upload_tasks.push(task);
|
let response = client
|
||||||
|
.post(&format!("{}/api/documents", base_url))
|
||||||
|
.header("Authorization", format!("Bearer {}", token))
|
||||||
|
.multipart(form)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(format!("Upload failed: {}", response.text().await?).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all uploads to complete
|
let document: DocumentResponse = response.json().await?;
|
||||||
|
Ok::<(Uuid, String), Box<dyn std::error::Error>>((document.id, content_owned))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Execute all uploads concurrently
|
||||||
|
let upload_results = join_all(upload_futures).await;
|
||||||
|
|
||||||
|
// Collect successfully uploaded documents
|
||||||
let mut uploaded_docs = Vec::new();
|
let mut uploaded_docs = Vec::new();
|
||||||
for task in upload_tasks {
|
for result in upload_results {
|
||||||
let (doc_id, expected_content) = task.await?;
|
let (doc_id, expected_content) = result?;
|
||||||
uploaded_docs.push((doc_id, expected_content));
|
|
||||||
println!("📄 Uploaded document: {}", doc_id);
|
println!("📄 Uploaded document: {}", doc_id);
|
||||||
|
uploaded_docs.push((doc_id, expected_content));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now wait for OCR processing on all documents
|
// Create OCR waiting futures
|
||||||
let mut ocr_tasks = Vec::new();
|
let ocr_futures: Vec<_> = uploaded_docs.into_iter()
|
||||||
for (doc_id, expected_content) in uploaded_docs {
|
.map(|(doc_id, expected_content)| {
|
||||||
let client_ref = self;
|
let client = self.client.clone();
|
||||||
let task = async move {
|
let token = token.clone();
|
||||||
let ocr_result = client_ref.wait_for_ocr(doc_id).await?;
|
let base_url = get_base_url();
|
||||||
Ok::<(Uuid, String, Value), Box<dyn std::error::Error>>((doc_id, expected_content, ocr_result))
|
|
||||||
};
|
async move {
|
||||||
ocr_tasks.push(task);
|
// Wait for OCR with polling
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
while start.elapsed() < TIMEOUT {
|
||||||
|
let response = client
|
||||||
|
.get(&format!("{}/api/documents/{}/ocr", base_url, doc_id))
|
||||||
|
.header("Authorization", format!("Bearer {}", token))
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
return Err(format!("Failed to get document details: {}", response.text().await?).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all OCR to complete
|
let doc_data: Value = response.json().await?;
|
||||||
|
|
||||||
|
match doc_data["ocr_status"].as_str() {
|
||||||
|
Some("completed") => {
|
||||||
|
println!("✅ OCR completed for document {}", doc_id);
|
||||||
|
return Ok::<(Uuid, String, Value), Box<dyn std::error::Error>>((doc_id, expected_content, doc_data));
|
||||||
|
},
|
||||||
|
Some("failed") => {
|
||||||
|
return Err(format!("OCR failed for document {}: {}",
|
||||||
|
doc_id,
|
||||||
|
doc_data["ocr_error"].as_str().unwrap_or("unknown error")).into());
|
||||||
|
},
|
||||||
|
Some("processing") => {
|
||||||
|
println!("⏳ OCR still processing for document {}", doc_id);
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
println!("📋 Document {} queued for OCR", doc_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(200)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!("OCR did not complete within {} seconds for document {}", TIMEOUT.as_secs(), doc_id).into())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Execute all OCR waiting concurrently
|
||||||
|
let ocr_results = join_all(ocr_futures).await;
|
||||||
|
|
||||||
|
// Collect results
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
for task in ocr_tasks {
|
for result in ocr_results {
|
||||||
let result = task.await?;
|
results.push(result?);
|
||||||
results.push(result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue