From e03e9daeed0a5dcb18bbd80585fda5385aa687d9 Mon Sep 17 00:00:00 2001 From: perfectra1n Date: Wed, 11 Jun 2025 22:10:02 -0700 Subject: [PATCH] feat(client/server): search works again, maybe only works after restart? --- frontend/src/pages/SearchPage.jsx | 446 +++++++++-- .../src/pages/__tests__/SearchPage.test.jsx | 460 ++++++++++++ frontend/src/services/api.ts | 43 +- src/db.rs | 165 ++++- src/models.rs | 53 +- src/routes/search.rs | 68 +- src/tests/db_tests.rs | 3 + src/tests/enhanced_search_tests.rs | 695 ++++++++++++++++++ src/tests/mod.rs | 1 + 9 files changed, 1860 insertions(+), 74 deletions(-) create mode 100644 frontend/src/pages/__tests__/SearchPage.test.jsx create mode 100644 src/tests/enhanced_search_tests.rs diff --git a/frontend/src/pages/SearchPage.jsx b/frontend/src/pages/SearchPage.jsx index 56a4f60..95ec5f3 100644 --- a/frontend/src/pages/SearchPage.jsx +++ b/frontend/src/pages/SearchPage.jsx @@ -29,6 +29,12 @@ import { Divider, IconButton, Tooltip, + Autocomplete, + LinearProgress, + FormControlLabel, + Switch, + Paper, + Skeleton, } from '@mui/material'; import { Search as SearchIcon, @@ -46,6 +52,10 @@ import { Storage as SizeIcon, Tag as TagIcon, Visibility as ViewIcon, + Settings as SettingsIcon, + Speed as SpeedIcon, + AccessTime as TimeIcon, + TrendingUp as TrendingIcon, } from '@mui/icons-material'; import { documentService } from '../services/api'; @@ -56,6 +66,16 @@ const SearchPage = () => { const [loading, setLoading] = useState(false); const [error, setError] = useState(null); const [viewMode, setViewMode] = useState('grid'); + const [queryTime, setQueryTime] = useState(0); + const [totalResults, setTotalResults] = useState(0); + const [suggestions, setSuggestions] = useState([]); + + // Search settings + const [useEnhancedSearch, setUseEnhancedSearch] = useState(true); + const [searchMode, setSearchMode] = useState('simple'); + const [includeSnippets, setIncludeSnippets] = useState(true); + const [snippetLength, setSnippetLength] = useState(200); + const [showAdvanced, setShowAdvanced] = useState(false); // Filter states const [selectedTags, setSelectedTags] = useState([]); @@ -86,6 +106,9 @@ const SearchPage = () => { const performSearch = useCallback(async (query, filters = {}) => { if (!query.trim()) { setSearchResults([]); + setTotalResults(0); + setQueryTime(0); + setSuggestions([]); return; } @@ -99,9 +122,14 @@ const SearchPage = () => { mime_types: filters.mimeTypes?.length ? filters.mimeTypes : undefined, limit: 100, offset: 0, + include_snippets: includeSnippets, + snippet_length: snippetLength, + search_mode: searchMode, }; - const response = await documentService.search(searchRequest); + const response = useEnhancedSearch + ? await documentService.enhancedSearch(searchRequest) + : await documentService.search(searchRequest); // Apply additional client-side filters let results = response.data.documents || []; @@ -134,6 +162,9 @@ const SearchPage = () => { } setSearchResults(results); + setTotalResults(response.data.total || results.length); + setQueryTime(response.data.query_time_ms || 0); + setSuggestions(response.data.suggestions || []); // Extract unique tags for filter options const tags = [...new Set(results.flatMap(doc => doc.tags))]; @@ -145,7 +176,7 @@ const SearchPage = () => { } finally { setLoading(false); } - }, []); + }, [useEnhancedSearch, includeSnippets, snippetLength, searchMode]); const debouncedSearch = useCallback( debounce((query, filters) => performSearch(query, filters), 500), @@ -209,9 +240,63 @@ const SearchPage = () => { } }; + const renderHighlightedText = (text, highlightRanges) => { + if (!highlightRanges || highlightRanges.length === 0) { + return text; + } + + const parts = []; + let lastIndex = 0; + + highlightRanges.forEach((range, index) => { + // Add text before highlight + if (range.start > lastIndex) { + parts.push( + + {text.substring(lastIndex, range.start)} + + ); + } + + // Add highlighted text + parts.push( + + {text.substring(range.start, range.end)} + + ); + + lastIndex = range.end; + }); + + // Add remaining text + if (lastIndex < text.length) { + parts.push( + + {text.substring(lastIndex)} + + ); + } + + return parts; + }; + + const handleSuggestionClick = (suggestion) => { + setSearchQuery(suggestion); + }; + return ( - {/* Header */} + {/* Header with Prominent Search */} { backgroundClip: 'text', WebkitBackgroundClip: 'text', color: 'transparent', - mb: 1, + mb: 2, }} > Search Documents - - Find documents using full-text search and advanced filters - + + {/* Enhanced Search Bar */} + + + setSearchQuery(e.target.value)} + InputProps={{ + startAdornment: ( + + + + ), + endAdornment: ( + + + {loading && } + {searchQuery && ( + setSearchQuery('')} + > + + + )} + setShowAdvanced(!showAdvanced)} + color={showAdvanced ? 'primary' : 'default'} + > + + + + + ), + }} + sx={{ + '& .MuiOutlinedInput-root': { + '& fieldset': { + borderWidth: 2, + }, + '&:hover fieldset': { + borderColor: 'primary.main', + }, + '&.Mui-focused fieldset': { + borderColor: 'primary.main', + }, + }, + '& .MuiInputBase-input': { + fontSize: '1.1rem', + py: 2, + }, + }} + /> + + {/* Loading Progress Bar */} + {loading && ( + + )} + + + {/* Quick Stats */} + {(searchQuery && !loading) && ( + + + } + label={`${totalResults} results`} + size="small" + color="primary" + variant="outlined" + /> + } + label={`${queryTime}ms`} + size="small" + variant="outlined" + /> + {useEnhancedSearch && ( + } + label="Enhanced" + size="small" + color="success" + variant="outlined" + /> + )} + + + {/* Search Mode Selector */} + newMode && setSearchMode(newMode)} + size="small" + > + Simple + Phrase + Fuzzy + Boolean + + + )} + + {/* Suggestions */} + {suggestions.length > 0 && ( + + + Suggestions: + + + {suggestions.map((suggestion, index) => ( + handleSuggestionClick(suggestion)} + clickable + variant="outlined" + sx={{ + '&:hover': { + backgroundColor: 'primary.light', + color: 'primary.contrastText', + } + }} + /> + ))} + + + )} + + {/* Advanced Search Options */} + {showAdvanced && ( + + + Search Options + + + + setUseEnhancedSearch(e.target.checked)} + color="primary" + /> + } + label="Enhanced Search" + /> + + + setIncludeSnippets(e.target.checked)} + color="primary" + /> + } + label="Show Snippets" + /> + + + + Snippet Length + + + + + + )} + @@ -391,66 +680,34 @@ const SearchPage = () => { {/* Search Results */} - {/* Search Bar */} - - setSearchQuery(e.target.value)} - InputProps={{ - startAdornment: ( - - - - ), - endAdornment: searchQuery && ( - - setSearchQuery('')} - > - - - - ), - }} - sx={{ - '& .MuiOutlinedInput-root': { - '& fieldset': { - borderWidth: 2, - }, - }, - }} - /> - {/* Toolbar */} - - - {loading ? 'Searching...' : `${searchResults.length} results found`} - - - newView && setViewMode(newView)} - size="small" - > - - - - - - - - + {searchQuery && ( + + + {loading ? 'Searching...' : `${searchResults.length} results found`} + + + newView && setViewMode(newView)} + size="small" + > + + + + + + + + + )} {/* Results */} {loading && ( @@ -496,11 +753,16 @@ const SearchPage = () => { > - Start searching + Start searching your documents - - Enter keywords to search through your documents + + Use the enhanced search bar above to find documents by content, filename, or tags + + + + + )} @@ -610,6 +872,54 @@ const SearchPage = () => { )} )} + + {/* Search Snippets */} + {doc.snippets && doc.snippets.length > 0 && ( + + {doc.snippets.slice(0, 2).map((snippet, index) => ( + + + ...{renderHighlightedText(snippet.text, snippet.highlight_ranges)}... + + + ))} + {doc.snippets.length > 2 && ( + + +{doc.snippets.length - 2} more matches + + )} + + )} + + {/* Search Rank */} + {doc.search_rank && ( + + + + )} diff --git a/frontend/src/pages/__tests__/SearchPage.test.jsx b/frontend/src/pages/__tests__/SearchPage.test.jsx new file mode 100644 index 0000000..ba9b336 --- /dev/null +++ b/frontend/src/pages/__tests__/SearchPage.test.jsx @@ -0,0 +1,460 @@ +import React from 'react'; +import { render, screen, fireEvent, waitFor, act } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; +import { BrowserRouter } from 'react-router-dom'; +import SearchPage from '../SearchPage'; +import { documentService } from '../../services/api'; + +// Mock the API service +jest.mock('../../services/api', () => ({ + documentService: { + enhancedSearch: jest.fn(), + search: jest.fn(), + download: jest.fn(), + } +})); + +// Mock useNavigate +const mockNavigate = jest.fn(); +jest.mock('react-router-dom', () => ({ + ...jest.requireActual('react-router-dom'), + useNavigate: () => mockNavigate, +})); + +// Mock data +const mockSearchResponse = { + data: { + documents: [ + { + id: '1', + filename: 'test.pdf', + original_filename: 'test.pdf', + file_size: 1024, + mime_type: 'application/pdf', + tags: ['test', 'document'], + created_at: '2023-01-01T00:00:00Z', + has_ocr_text: true, + search_rank: 0.85, + snippets: [ + { + text: 'This is a test document with important information', + start_offset: 0, + end_offset: 48, + highlight_ranges: [ + { start: 10, end: 14 } + ] + } + ] + } + ], + total: 1, + query_time_ms: 45, + suggestions: ['\"test\"', 'test*'] + } +}; + +// Helper to render component with router +const renderWithRouter = (component) => { + return render( + + {component} + + ); +}; + +describe('SearchPage', () => { + beforeEach(() => { + jest.clearAllMocks(); + documentService.enhancedSearch.mockResolvedValue(mockSearchResponse); + documentService.search.mockResolvedValue(mockSearchResponse); + }); + + test('renders search page with prominent search bar', () => { + renderWithRouter(); + + expect(screen.getByText('Search Documents')).toBeInTheDocument(); + expect(screen.getByPlaceholderText(/Search documents by content, filename, or tags/)).toBeInTheDocument(); + expect(screen.getByText('Start searching your documents')).toBeInTheDocument(); + }); + + test('displays search suggestions when no query is entered', () => { + renderWithRouter(); + + expect(screen.getByText('Try: invoice')).toBeInTheDocument(); + expect(screen.getByText('Try: contract')).toBeInTheDocument(); + expect(screen.getByText('Try: tag:important')).toBeInTheDocument(); + }); + + test('performs search when user types in search box', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test query'); + }); + + // Wait for debounced search + await waitFor(() => { + expect(documentService.enhancedSearch).toHaveBeenCalledWith( + expect.objectContaining({ + query: 'test query', + include_snippets: true, + snippet_length: 200, + search_mode: 'simple' + }) + ); + }, { timeout: 2000 }); + }); + + test('displays search results with snippets', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('test.pdf')).toBeInTheDocument(); + expect(screen.getByText(/This is a test document/)).toBeInTheDocument(); + expect(screen.getByText('1 results')).toBeInTheDocument(); + expect(screen.getByText('45ms')).toBeInTheDocument(); + }); + }); + + test('shows search suggestions when available', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('Suggestions:')).toBeInTheDocument(); + expect(screen.getByText('\"test\"')).toBeInTheDocument(); + expect(screen.getByText('test*')).toBeInTheDocument(); + }); + }); + + test('toggles advanced search options', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const settingsButton = screen.getByRole('button', { name: /settings/i }); + + await user.click(settingsButton); + + expect(screen.getByText('Search Options')).toBeInTheDocument(); + expect(screen.getByText('Enhanced Search')).toBeInTheDocument(); + expect(screen.getByText('Show Snippets')).toBeInTheDocument(); + }); + + test('changes search mode', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + // Type a search query first to show the search mode selector + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + const phraseButton = screen.getByRole('button', { name: 'Phrase' }); + expect(phraseButton).toBeInTheDocument(); + }); + + const phraseButton = screen.getByRole('button', { name: 'Phrase' }); + await user.click(phraseButton); + + // Wait for search to be called with new mode + await waitFor(() => { + expect(documentService.enhancedSearch).toHaveBeenCalledWith( + expect.objectContaining({ + search_mode: 'phrase' + }) + ); + }); + }); + + test('handles search suggestions click', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('\"test\"')).toBeInTheDocument(); + }); + + const suggestionChip = screen.getByText('\"test\"'); + await user.click(suggestionChip); + + expect(searchInput.value).toBe('\"test\"'); + }); + + test('clears search input', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test query'); + }); + + const clearButton = screen.getByRole('button', { name: /clear/i }); + await user.click(clearButton); + + expect(searchInput.value).toBe(''); + }); + + test('toggles enhanced search setting', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + // Open advanced options + const settingsButton = screen.getByRole('button', { name: /settings/i }); + await user.click(settingsButton); + + const enhancedSearchSwitch = screen.getByRole('checkbox', { name: /enhanced search/i }); + await user.click(enhancedSearchSwitch); + + // Type a search to trigger API call + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + await act(async () => { + await user.type(searchInput, 'test'); + }); + + // Should use regular search instead of enhanced search + await waitFor(() => { + expect(documentService.search).toHaveBeenCalled(); + }); + }); + + test('changes snippet length setting', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + // Open advanced options + const settingsButton = screen.getByRole('button', { name: /settings/i }); + await user.click(settingsButton); + + const snippetSelect = screen.getByLabelText('Snippet Length'); + await user.click(snippetSelect); + + const longOption = screen.getByText('Long (400)'); + await user.click(longOption); + + // Type a search to trigger API call + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(documentService.enhancedSearch).toHaveBeenCalledWith( + expect.objectContaining({ + snippet_length: 400 + }) + ); + }); + }); + + test('displays loading state during search', async () => { + const user = userEvent.setup(); + + // Mock a delayed response + documentService.enhancedSearch.mockImplementation(() => + new Promise(resolve => setTimeout(() => resolve(mockSearchResponse), 100)) + ); + + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + // Should show loading indicator + expect(screen.getByRole('progressbar')).toBeInTheDocument(); + + await waitFor(() => { + expect(screen.getByText('test.pdf')).toBeInTheDocument(); + }); + }); + + test('handles search error gracefully', async () => { + const user = userEvent.setup(); + + documentService.enhancedSearch.mockRejectedValue(new Error('Search failed')); + + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('Search failed. Please try again.')).toBeInTheDocument(); + }); + }); + + test('navigates to document details on view click', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('test.pdf')).toBeInTheDocument(); + }); + + const viewButton = screen.getByLabelText('View Details'); + await user.click(viewButton); + + expect(mockNavigate).toHaveBeenCalledWith('/documents/1'); + }); + + test('handles document download', async () => { + const user = userEvent.setup(); + const mockBlob = new Blob(['test content'], { type: 'application/pdf' }); + documentService.download.mockResolvedValue({ data: mockBlob }); + + // Mock URL.createObjectURL + global.URL.createObjectURL = jest.fn(() => 'mock-url'); + global.URL.revokeObjectURL = jest.fn(); + + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('test.pdf')).toBeInTheDocument(); + }); + + const downloadButton = screen.getByLabelText('Download'); + await user.click(downloadButton); + + expect(documentService.download).toHaveBeenCalledWith('1'); + }); + + test('switches between grid and list view modes', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('test.pdf')).toBeInTheDocument(); + }); + + const listViewButton = screen.getByRole('button', { name: /list view/i }); + await user.click(listViewButton); + + // The view should change (this would be more thoroughly tested with visual regression tests) + expect(listViewButton).toHaveAttribute('aria-pressed', 'true'); + }); + + test('displays file type icons correctly', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + // Should show PDF icon for PDF file + expect(screen.getByTestId('PictureAsPdfIcon')).toBeInTheDocument(); + }); + }); + + test('displays OCR badge when document has OCR text', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('OCR')).toBeInTheDocument(); + }); + }); + + test('highlights search terms in snippets', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + // Should render the snippet with highlighted text + expect(screen.getByText(/This is a test document/)).toBeInTheDocument(); + }); + }); + + test('shows relevance score when available', async () => { + const user = userEvent.setup(); + renderWithRouter(); + + const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/); + + await act(async () => { + await user.type(searchInput, 'test'); + }); + + await waitFor(() => { + expect(screen.getByText('Relevance: 85.0%')).toBeInTheDocument(); + }); + }); +}); + +// Test helper functions +describe('Search Helper Functions', () => { + test('formats file sizes correctly', () => { + // These would test utility functions if they were exported + // For now, we test the component behavior + expect(true).toBe(true); + }); + + test('formats dates correctly', () => { + // These would test utility functions if they were exported + expect(true).toBe(true); + }); +}); \ No newline at end of file diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 372506d..93330f0 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -27,11 +27,41 @@ export interface SearchRequest { mime_types?: string[] limit?: number offset?: number + include_snippets?: boolean + snippet_length?: number + search_mode?: 'simple' | 'phrase' | 'fuzzy' | 'boolean' +} + +export interface HighlightRange { + start: number + end: number +} + +export interface SearchSnippet { + text: string + start_offset: number + end_offset: number + highlight_ranges: HighlightRange[] +} + +export interface EnhancedDocument { + id: string + filename: string + original_filename: string + file_size: number + mime_type: string + tags: string[] + created_at: string + has_ocr_text: boolean + search_rank?: number + snippets: SearchSnippet[] } export interface SearchResponse { - documents: Document[] + documents: EnhancedDocument[] total: number + query_time_ms: number + suggestions: string[] } export const documentService = { @@ -62,4 +92,15 @@ export const documentService = { params: searchRequest, }) }, + + enhancedSearch: (searchRequest: SearchRequest) => { + return api.get('/search/enhanced', { + params: { + ...searchRequest, + include_snippets: searchRequest.include_snippets ?? true, + snippet_length: searchRequest.snippet_length ?? 200, + search_mode: searchRequest.search_mode ?? 'simple', + }, + }) + }, } \ No newline at end of file diff --git a/src/db.rs b/src/db.rs index 0648021..1aab516 100644 --- a/src/db.rs +++ b/src/db.rs @@ -3,7 +3,7 @@ use chrono::Utc; use sqlx::{PgPool, Row}; use uuid::Uuid; -use crate::models::{CreateUser, Document, SearchRequest, User}; +use crate::models::{CreateUser, Document, SearchRequest, SearchMode, SearchSnippet, HighlightRange, EnhancedDocumentResponse, User}; #[derive(Clone)] pub struct Database { @@ -328,6 +328,169 @@ impl Database { Ok((documents, total)) } + pub async fn enhanced_search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec, i64, u64)> { + let start_time = std::time::Instant::now(); + + // Build search query based on search mode + let search_mode = search.search_mode.as_ref().unwrap_or(&SearchMode::Simple); + let query_function = match search_mode { + SearchMode::Simple => "plainto_tsquery", + SearchMode::Phrase => "phraseto_tsquery", + SearchMode::Fuzzy => "plainto_tsquery", // Could be enhanced with similarity + SearchMode::Boolean => "to_tsquery", + }; + + let mut query_builder = sqlx::QueryBuilder::new(&format!( + r#" + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id, + ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), {}('english', "#, + query_function + )); + + query_builder.push_bind(&search.query); + query_builder.push(&format!(")) as rank FROM documents WHERE user_id = ")); + query_builder.push_bind(user_id); + query_builder.push(&format!(" AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ {}('english', ", query_function)); + query_builder.push_bind(&search.query); + query_builder.push(")"); + + if let Some(tags) = &search.tags { + if !tags.is_empty() { + query_builder.push(" AND tags && "); + query_builder.push_bind(tags); + } + } + + if let Some(mime_types) = &search.mime_types { + if !mime_types.is_empty() { + query_builder.push(" AND mime_type = ANY("); + query_builder.push_bind(mime_types); + query_builder.push(")"); + } + } + + query_builder.push(" ORDER BY rank DESC, created_at DESC"); + + if let Some(limit) = search.limit { + query_builder.push(" LIMIT "); + query_builder.push_bind(limit); + } + + if let Some(offset) = search.offset { + query_builder.push(" OFFSET "); + query_builder.push_bind(offset); + } + + let rows = query_builder.build().fetch_all(&self.pool).await?; + + let include_snippets = search.include_snippets.unwrap_or(true); + let snippet_length = search.snippet_length.unwrap_or(200); + + let mut documents = Vec::new(); + for row in rows { + let doc_id: Uuid = row.get("id"); + let content: Option = row.get("content"); + let ocr_text: Option = row.get("ocr_text"); + let rank: f32 = row.get("rank"); + + let snippets = if include_snippets { + self.generate_snippets(&search.query, content.as_deref(), ocr_text.as_deref(), snippet_length) + } else { + Vec::new() + }; + + documents.push(EnhancedDocumentResponse { + id: doc_id, + filename: row.get("filename"), + original_filename: row.get("original_filename"), + file_size: row.get("file_size"), + mime_type: row.get("mime_type"), + tags: row.get("tags"), + created_at: row.get("created_at"), + has_ocr_text: ocr_text.is_some(), + search_rank: Some(rank), + snippets, + }); + } + + let total_row = sqlx::query(&format!( + r#" + SELECT COUNT(*) as total FROM documents + WHERE user_id = $1 + AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ {}('english', $2) + "#, query_function + )) + .bind(user_id) + .bind(&search.query) + .fetch_one(&self.pool) + .await?; + + let total: i64 = total_row.get("total"); + let query_time = start_time.elapsed().as_millis() as u64; + + Ok((documents, total, query_time)) + } + + fn generate_snippets(&self, query: &str, content: Option<&str>, ocr_text: Option<&str>, snippet_length: i32) -> Vec { + let mut snippets = Vec::new(); + + // Combine content and OCR text + let full_text = match (content, ocr_text) { + (Some(c), Some(o)) => format!("{} {}", c, o), + (Some(c), None) => c.to_string(), + (None, Some(o)) => o.to_string(), + (None, None) => return snippets, + }; + + // Simple keyword matching for snippets (could be enhanced with better search algorithms) + let _query_terms: Vec<&str> = query.split_whitespace().collect(); + let text_lower = full_text.to_lowercase(); + let query_lower = query.to_lowercase(); + + // Find matches + for (i, _) in text_lower.match_indices(&query_lower) { + let snippet_start = if i >= snippet_length as usize / 2 { + i - snippet_length as usize / 2 + } else { + 0 + }; + + let snippet_end = std::cmp::min( + snippet_start + snippet_length as usize, + full_text.len() + ); + + if snippet_start < full_text.len() { + let snippet_text = &full_text[snippet_start..snippet_end]; + + // Find highlight ranges within this snippet + let mut highlight_ranges = Vec::new(); + let snippet_lower = snippet_text.to_lowercase(); + + for (match_start, _) in snippet_lower.match_indices(&query_lower) { + highlight_ranges.push(HighlightRange { + start: match_start as i32, + end: (match_start + query.len()) as i32, + }); + } + + snippets.push(SearchSnippet { + text: snippet_text.to_string(), + start_offset: snippet_start as i32, + end_offset: snippet_end as i32, + highlight_ranges, + }); + + // Limit to a few snippets per document + if snippets.len() >= 3 { + break; + } + } + } + + snippets + } + pub async fn update_document_ocr(&self, id: Uuid, ocr_text: &str) -> Result<()> { sqlx::query("UPDATE documents SET ocr_text = $1, updated_at = NOW() WHERE id = $2") .bind(ocr_text) diff --git a/src/models.rs b/src/models.rs index f796905..2cf7266 100644 --- a/src/models.rs +++ b/src/models.rs @@ -74,12 +74,63 @@ pub struct SearchRequest { pub mime_types: Option>, pub limit: Option, pub offset: Option, + pub include_snippets: Option, + pub snippet_length: Option, + pub search_mode: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum SearchMode { + #[serde(rename = "simple")] + Simple, + #[serde(rename = "phrase")] + Phrase, + #[serde(rename = "fuzzy")] + Fuzzy, + #[serde(rename = "boolean")] + Boolean, +} + +impl Default for SearchMode { + fn default() -> Self { + SearchMode::Simple + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SearchSnippet { + pub text: String, + pub start_offset: i32, + pub end_offset: i32, + pub highlight_ranges: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct HighlightRange { + pub start: i32, + pub end: i32, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct EnhancedDocumentResponse { + pub id: Uuid, + pub filename: String, + pub original_filename: String, + pub file_size: i64, + pub mime_type: String, + pub tags: Vec, + pub created_at: DateTime, + pub has_ocr_text: bool, + pub search_rank: Option, + pub snippets: Vec, } #[derive(Debug, Serialize, Deserialize)] pub struct SearchResponse { - pub documents: Vec, + pub documents: Vec, pub total: i64, + pub query_time_ms: u64, + pub suggestions: Vec, } impl From for DocumentResponse { diff --git a/src/routes/search.rs b/src/routes/search.rs index 2fd0039..2a768a3 100644 --- a/src/routes/search.rs +++ b/src/routes/search.rs @@ -9,12 +9,14 @@ use std::sync::Arc; use crate::{ auth::AuthUser, - models::{SearchRequest, SearchResponse}, + models::{SearchRequest, SearchResponse, EnhancedDocumentResponse}, AppState, }; pub fn router() -> Router> { - Router::new().route("/", get(search_documents)) + Router::new() + .route("/", get(search_documents)) + .route("/enhanced", get(enhanced_search_documents)) } async fn search_documents( @@ -29,9 +31,69 @@ async fn search_documents( .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; let response = SearchResponse { - documents: documents.into_iter().map(|doc| doc.into()).collect(), + documents: documents.into_iter().map(|doc| EnhancedDocumentResponse { + id: doc.id, + filename: doc.filename, + original_filename: doc.original_filename, + file_size: doc.file_size, + mime_type: doc.mime_type, + tags: doc.tags, + created_at: doc.created_at, + has_ocr_text: doc.ocr_text.is_some(), + search_rank: None, + snippets: Vec::new(), + }).collect(), total, + query_time_ms: 0, + suggestions: Vec::new(), }; Ok(Json(response)) +} + +async fn enhanced_search_documents( + State(state): State>, + auth_user: AuthUser, + Query(search_request): Query, +) -> Result, StatusCode> { + // Generate suggestions before moving search_request + let suggestions = generate_search_suggestions(&search_request.query); + + let (documents, total, query_time) = state + .db + .enhanced_search_documents(auth_user.user.id, search_request) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let response = SearchResponse { + documents, + total, + query_time_ms: query_time, + suggestions, + }; + + Ok(Json(response)) +} + +fn generate_search_suggestions(query: &str) -> Vec { + // Simple suggestion generation - could be enhanced with a proper suggestion system + let mut suggestions = Vec::new(); + + if query.len() > 3 { + // Common search variations + suggestions.push(format!("\"{}\"", query)); // Exact phrase + + // Add wildcard suggestions + if !query.contains('*') { + suggestions.push(format!("{}*", query)); + } + + // Add similar terms (this would typically come from a thesaurus or ML model) + if query.contains("document") { + suggestions.push(query.replace("document", "file")); + suggestions.push(query.replace("document", "paper")); + } + } + + suggestions.into_iter().take(3).collect() } \ No newline at end of file diff --git a/src/tests/db_tests.rs b/src/tests/db_tests.rs index 19ba1db..fb924f6 100644 --- a/src/tests/db_tests.rs +++ b/src/tests/db_tests.rs @@ -147,6 +147,9 @@ mod tests { mime_types: None, limit: Some(10), offset: Some(0), + include_snippets: Some(true), + snippet_length: Some(200), + search_mode: None, }; let result = db.search_documents(user.id, search_request).await; diff --git a/src/tests/enhanced_search_tests.rs b/src/tests/enhanced_search_tests.rs new file mode 100644 index 0000000..7ae0225 --- /dev/null +++ b/src/tests/enhanced_search_tests.rs @@ -0,0 +1,695 @@ +#[cfg(test)] +mod tests { + use crate::db::Database; + use crate::models::{ + CreateUser, Document, SearchRequest, SearchMode, + EnhancedDocumentResponse, SearchSnippet, HighlightRange + }; + use chrono::Utc; + use uuid::Uuid; + + // Mock database for testing snippet generation without PostgreSQL dependency + struct MockDatabase; + + impl MockDatabase { + fn new() -> Self { + Self + } + + // Test the snippet generation logic directly + fn generate_snippets(&self, query: &str, content: Option<&str>, ocr_text: Option<&str>, snippet_length: i32) -> Vec { + let mut snippets = Vec::new(); + + // Combine content and OCR text + let full_text = match (content, ocr_text) { + (Some(c), Some(o)) => format!("{} {}", c, o), + (Some(c), None) => c.to_string(), + (None, Some(o)) => o.to_string(), + (None, None) => return snippets, + }; + + // Simple keyword matching for snippets + let text_lower = full_text.to_lowercase(); + let query_lower = query.to_lowercase(); + + // Find matches + for (i, _) in text_lower.match_indices(&query_lower) { + let snippet_start = if i >= snippet_length as usize / 2 { + i - snippet_length as usize / 2 + } else { + 0 + }; + + let snippet_end = std::cmp::min( + snippet_start + snippet_length as usize, + full_text.len() + ); + + if snippet_start < full_text.len() { + let snippet_text = &full_text[snippet_start..snippet_end]; + + // Find highlight ranges within this snippet + let mut highlight_ranges = Vec::new(); + let snippet_lower = snippet_text.to_lowercase(); + + for (match_start, _) in snippet_lower.match_indices(&query_lower) { + highlight_ranges.push(HighlightRange { + start: match_start as i32, + end: (match_start + query.len()) as i32, + }); + } + + snippets.push(SearchSnippet { + text: snippet_text.to_string(), + start_offset: snippet_start as i32, + end_offset: snippet_end as i32, + highlight_ranges, + }); + + // Limit to a few snippets per document + if snippets.len() >= 3 { + break; + } + } + } + + snippets + } + } + + #[test] + fn test_snippet_generation_basic() { + let mock_db = MockDatabase::new(); + let content = "This is a test document with some important information about testing and quality assurance."; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 50); + + assert!(!snippets.is_empty()); + assert!(snippets[0].text.contains("test")); + assert!(!snippets[0].highlight_ranges.is_empty()); + + // Check that highlight range is correct + let highlight = &snippets[0].highlight_ranges[0]; + let highlighted_text = &snippets[0].text[highlight.start as usize..highlight.end as usize]; + assert_eq!(highlighted_text.to_lowercase(), "test"); + } + + #[test] + fn test_snippet_generation_multiple_matches() { + let mock_db = MockDatabase::new(); + let content = "The first test shows that testing is important. Another test demonstrates test effectiveness."; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 100); + + assert!(!snippets.is_empty()); + + // Should find multiple highlight ranges in the snippet + let total_highlights: usize = snippets.iter() + .map(|s| s.highlight_ranges.len()) + .sum(); + assert!(total_highlights >= 2); + } + + #[test] + fn test_snippet_generation_with_ocr_text() { + let mock_db = MockDatabase::new(); + let content = "Document content with information"; + let ocr_text = "OCR extracted text with important data"; + + let snippets = mock_db.generate_snippets("important", Some(content), Some(ocr_text), 100); + + assert!(!snippets.is_empty()); + assert!(snippets[0].text.contains("important")); + } + + #[test] + fn test_snippet_generation_case_insensitive() { + let mock_db = MockDatabase::new(); + let content = "This Document contains IMPORTANT Information"; + + let snippets = mock_db.generate_snippets("important", Some(content), None, 50); + + assert!(!snippets.is_empty()); + let highlight = &snippets[0].highlight_ranges[0]; + let highlighted_text = &snippets[0].text[highlight.start as usize..highlight.end as usize]; + assert_eq!(highlighted_text, "IMPORTANT"); + } + + #[test] + fn test_snippet_generation_empty_content() { + let mock_db = MockDatabase::new(); + + let snippets = mock_db.generate_snippets("test", None, None, 100); + assert!(snippets.is_empty()); + } + + #[test] + fn test_snippet_generation_no_matches() { + let mock_db = MockDatabase::new(); + let content = "This document has no matching terms"; + + let snippets = mock_db.generate_snippets("xyzabc", Some(content), None, 100); + assert!(snippets.is_empty()); + } + + #[test] + fn test_snippet_length_limits() { + let mock_db = MockDatabase::new(); + let content = "A very long document with lots of text that should be truncated when generating snippets to test the length limiting functionality of the snippet generation system."; + + let short_snippets = mock_db.generate_snippets("text", Some(content), None, 50); + let long_snippets = mock_db.generate_snippets("text", Some(content), None, 150); + + assert!(!short_snippets.is_empty()); + assert!(!long_snippets.is_empty()); + assert!(short_snippets[0].text.len() <= 50); + assert!(long_snippets[0].text.len() > short_snippets[0].text.len()); + } + + #[test] + fn test_snippet_positioning() { + let mock_db = MockDatabase::new(); + let content = "Start of document. This is the middle part with test content. End of document."; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 40); + + assert!(!snippets.is_empty()); + let snippet = &snippets[0]; + + // Should have reasonable start and end offsets + assert!(snippet.start_offset >= 0); + assert!(snippet.end_offset > snippet.start_offset); + assert!(snippet.end_offset <= content.len() as i32); + } + + #[test] + fn test_search_request_defaults() { + let request = SearchRequest { + query: "test".to_string(), + tags: None, + mime_types: None, + limit: None, + offset: None, + include_snippets: None, + snippet_length: None, + search_mode: None, + }; + + // Test that default values work correctly + assert_eq!(request.query, "test"); + assert!(request.include_snippets.is_none()); + assert!(request.search_mode.is_none()); + } + + #[test] + fn test_search_request_with_options() { + let request = SearchRequest { + query: "test query".to_string(), + tags: Some(vec!["tag1".to_string(), "tag2".to_string()]), + mime_types: Some(vec!["application/pdf".to_string()]), + limit: Some(10), + offset: Some(0), + include_snippets: Some(true), + snippet_length: Some(300), + search_mode: Some(SearchMode::Phrase), + }; + + assert_eq!(request.query, "test query"); + assert_eq!(request.tags.as_ref().unwrap().len(), 2); + assert_eq!(request.include_snippets, Some(true)); + assert_eq!(request.snippet_length, Some(300)); + assert!(matches!(request.search_mode, Some(SearchMode::Phrase))); + } + + #[test] + fn test_search_mode_variants() { + // Test all search mode variants + let simple = SearchMode::Simple; + let phrase = SearchMode::Phrase; + let fuzzy = SearchMode::Fuzzy; + let boolean = SearchMode::Boolean; + + // Test serialization names + assert_eq!(format!("{:?}", simple), "Simple"); + assert_eq!(format!("{:?}", phrase), "Phrase"); + assert_eq!(format!("{:?}", fuzzy), "Fuzzy"); + assert_eq!(format!("{:?}", boolean), "Boolean"); + } + + #[test] + fn test_search_mode_default() { + let default_mode = SearchMode::default(); + assert!(matches!(default_mode, SearchMode::Simple)); + } + + #[test] + fn test_highlight_range_creation() { + let range = HighlightRange { + start: 10, + end: 20, + }; + + assert_eq!(range.start, 10); + assert_eq!(range.end, 20); + assert!(range.end > range.start); + } + + #[test] + fn test_enhanced_document_response_creation() { + let doc_id = Uuid::new_v4(); + let now = Utc::now(); + + let snippets = vec![ + SearchSnippet { + text: "This is a test snippet".to_string(), + start_offset: 0, + end_offset: 22, + highlight_ranges: vec![ + HighlightRange { start: 10, end: 14 } + ], + } + ]; + + let response = EnhancedDocumentResponse { + id: doc_id, + filename: "test.pdf".to_string(), + original_filename: "test.pdf".to_string(), + file_size: 1024, + mime_type: "application/pdf".to_string(), + tags: vec!["test".to_string()], + created_at: now, + has_ocr_text: true, + search_rank: Some(0.75), + snippets, + }; + + assert_eq!(response.id, doc_id); + assert_eq!(response.filename, "test.pdf"); + assert_eq!(response.search_rank, Some(0.75)); + assert!(response.has_ocr_text); + assert_eq!(response.snippets.len(), 1); + assert_eq!(response.snippets[0].text, "This is a test snippet"); + } + + #[test] + fn test_snippet_overlap_handling() { + let mock_db = MockDatabase::new(); + // Content with multiple overlapping matches + let content = "test testing tested test"; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 30); + + assert!(!snippets.is_empty()); + + // Should handle overlapping matches gracefully + for snippet in &snippets { + assert!(!snippet.text.is_empty()); + assert!(!snippet.highlight_ranges.is_empty()); + } + } + + #[test] + fn test_snippet_boundary_conditions() { + let mock_db = MockDatabase::new(); + + // Test with very short content + let short_content = "test"; + let snippets = mock_db.generate_snippets("test", Some(short_content), None, 100); + assert!(!snippets.is_empty()); + assert_eq!(snippets[0].text, "test"); + + // Test with match at the beginning + let start_content = "test document content"; + let snippets = mock_db.generate_snippets("test", Some(start_content), None, 50); + assert!(!snippets.is_empty()); + assert!(snippets[0].text.starts_with("test")); + + // Test with match at the end + let end_content = "document content test"; + let snippets = mock_db.generate_snippets("test", Some(end_content), None, 50); + assert!(!snippets.is_empty()); + assert!(snippets[0].text.ends_with("test")); + } + + #[test] + fn test_complex_search_scenarios() { + let mock_db = MockDatabase::new(); + + // Test with content that has multiple search terms + let complex_content = "This is a comprehensive test document that contains testing methodologies and test cases for quality assurance testing procedures."; + + let snippets = mock_db.generate_snippets("test", Some(complex_content), None, 80); + + assert!(!snippets.is_empty()); + + // Verify that highlights are properly positioned + for snippet in &snippets { + for highlight in &snippet.highlight_ranges { + assert!(highlight.start >= 0); + assert!(highlight.end > highlight.start); + assert!(highlight.end <= snippet.text.len() as i32); + + let highlighted_text = &snippet.text[highlight.start as usize..highlight.end as usize]; + assert_eq!(highlighted_text.to_lowercase(), "test"); + } + } + } + + #[test] + fn test_unicode_content_handling() { + let mock_db = MockDatabase::new(); + let unicode_content = "Это тест документ с важной информацией для тестирования"; + + let snippets = mock_db.generate_snippets("тест", Some(unicode_content), None, 50); + + assert!(!snippets.is_empty()); + assert!(snippets[0].text.contains("тест")); + } + + #[test] + fn test_special_characters_in_query() { + let mock_db = MockDatabase::new(); + let content = "Document with special chars: test@example.com and test-case"; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 60); + + assert!(!snippets.is_empty()); + // Should find both occurrences of "test" + let total_highlights: usize = snippets.iter() + .map(|s| s.highlight_ranges.len()) + .sum(); + assert!(total_highlights >= 2); + } + + // Test search suggestions functionality + fn generate_search_suggestions(query: &str) -> Vec { + // Copy of the function from search.rs for testing + let mut suggestions = Vec::new(); + + if query.len() > 3 { + // Common search variations + suggestions.push(format!("\"{}\"", query)); // Exact phrase + + // Add wildcard suggestions + if !query.contains('*') { + suggestions.push(format!("{}*", query)); + } + + // Add similar terms (this would typically come from a thesaurus or ML model) + if query.contains("document") { + suggestions.push(query.replace("document", "file")); + suggestions.push(query.replace("document", "paper")); + } + } + + suggestions.into_iter().take(3).collect() + } + + #[test] + fn test_search_suggestions_basic() { + let suggestions = generate_search_suggestions("invoice"); + + assert!(!suggestions.is_empty()); + assert!(suggestions.contains(&"\"invoice\"".to_string())); + assert!(suggestions.contains(&"invoice*".to_string())); + } + + #[test] + fn test_search_suggestions_short_query() { + let suggestions = generate_search_suggestions("ab"); + + // Should not generate suggestions for very short queries + assert!(suggestions.is_empty()); + } + + #[test] + fn test_search_suggestions_document_replacement() { + let suggestions = generate_search_suggestions("document search"); + + assert!(!suggestions.is_empty()); + assert!(suggestions.iter().any(|s| s.contains("file search"))); + assert!(suggestions.iter().any(|s| s.contains("paper search"))); + } + + #[test] + fn test_search_suggestions_with_wildcard() { + let suggestions = generate_search_suggestions("test*"); + + assert!(!suggestions.is_empty()); + // Should not add another wildcard if one already exists + assert!(!suggestions.iter().any(|s| s.contains("test**"))); + } + + #[test] + fn test_search_suggestions_limit() { + let suggestions = generate_search_suggestions("document test example"); + + // Should limit to 3 suggestions + assert!(suggestions.len() <= 3); + } + + #[test] + fn test_search_suggestions_case_sensitivity() { + let suggestions = generate_search_suggestions("Document"); + + assert!(!suggestions.is_empty()); + // Should work with different cases + assert!(suggestions.iter().any(|s| s.contains("file") || s.contains("File"))); + } + + // Performance and error handling tests + #[test] + fn test_snippet_generation_performance() { + let mock_db = MockDatabase::new(); + + // Test with large content + let large_content = "test ".repeat(10000); // 50KB of repeated "test " + + let start_time = std::time::Instant::now(); + let snippets = mock_db.generate_snippets("test", Some(&large_content), None, 200); + let duration = start_time.elapsed(); + + // Should complete within reasonable time (100ms for this size) + assert!(duration.as_millis() < 100); + assert!(!snippets.is_empty()); + + // Should still limit snippets even with many matches + assert!(snippets.len() <= 3); + } + + #[test] + fn test_snippet_generation_memory_usage() { + let mock_db = MockDatabase::new(); + + // Test with content that could cause memory issues + let content_with_many_matches = (0..1000) + .map(|i| format!("test{} ", i)) + .collect::(); + + let snippets = mock_db.generate_snippets("test", Some(&content_with_many_matches), None, 100); + + // Should handle gracefully without consuming excessive memory + assert!(!snippets.is_empty()); + assert!(snippets.len() <= 3); // Should still limit results + } + + #[test] + fn test_search_request_validation() { + // Test with empty query + let empty_request = SearchRequest { + query: "".to_string(), + tags: None, + mime_types: None, + limit: None, + offset: None, + include_snippets: None, + snippet_length: None, + search_mode: None, + }; + + // Should handle empty query gracefully + assert_eq!(empty_request.query, ""); + + // Test with extreme values + let extreme_request = SearchRequest { + query: "a".repeat(10000), // Very long query + tags: Some(vec!["tag".to_string(); 1000]), // Many tags + mime_types: Some(vec!["type".to_string(); 100]), // Many mime types + limit: Some(i64::MAX), + offset: Some(i64::MAX), + include_snippets: Some(true), + snippet_length: Some(i32::MAX), + search_mode: Some(SearchMode::Boolean), + }; + + // Should handle extreme values without panicking + assert!(extreme_request.query.len() == 10000); + assert!(extreme_request.tags.as_ref().unwrap().len() == 1000); + } + + #[test] + fn test_highlight_range_validation() { + let mock_db = MockDatabase::new(); + let content = "This is a test document for validation"; + + let snippets = mock_db.generate_snippets("test", Some(content), None, 50); + + assert!(!snippets.is_empty()); + + // Validate all highlight ranges + for snippet in &snippets { + for highlight in &snippet.highlight_ranges { + // Ranges should be valid + assert!(highlight.start >= 0); + assert!(highlight.end > highlight.start); + assert!(highlight.end <= snippet.text.len() as i32); + + // Highlighted text should match query (case insensitive) + let highlighted_text = &snippet.text[highlight.start as usize..highlight.end as usize]; + assert_eq!(highlighted_text.to_lowercase(), "test"); + } + } + } + + #[test] + fn test_search_mode_query_function_mapping() { + // Test that different search modes would map to correct PostgreSQL functions + let modes = vec![ + (SearchMode::Simple, "plainto_tsquery"), + (SearchMode::Phrase, "phraseto_tsquery"), + (SearchMode::Fuzzy, "plainto_tsquery"), // Same as simple for now + (SearchMode::Boolean, "to_tsquery"), + ]; + + for (mode, expected_function) in modes { + // This tests the logic that would be used in the database layer + let query_function = match mode { + SearchMode::Simple => "plainto_tsquery", + SearchMode::Phrase => "phraseto_tsquery", + SearchMode::Fuzzy => "plainto_tsquery", + SearchMode::Boolean => "to_tsquery", + }; + + assert_eq!(query_function, expected_function); + } + } + + #[test] + fn test_enhanced_document_response_serialization() { + let doc_id = Uuid::new_v4(); + let now = Utc::now(); + + let response = EnhancedDocumentResponse { + id: doc_id, + filename: "test.pdf".to_string(), + original_filename: "test.pdf".to_string(), + file_size: 1024, + mime_type: "application/pdf".to_string(), + tags: vec!["test".to_string(), "document".to_string()], + created_at: now, + has_ocr_text: true, + search_rank: Some(0.85), + snippets: vec![ + SearchSnippet { + text: "Test snippet".to_string(), + start_offset: 0, + end_offset: 12, + highlight_ranges: vec![ + HighlightRange { start: 0, end: 4 } + ], + } + ], + }; + + // Test that all fields are properly accessible + assert_eq!(response.id, doc_id); + assert_eq!(response.tags.len(), 2); + assert_eq!(response.snippets.len(), 1); + assert!(response.search_rank.unwrap() > 0.8); + } + + #[test] + fn test_snippet_edge_cases() { + let mock_db = MockDatabase::new(); + + // Test with query longer than content + let short_content = "hi"; + let snippets = mock_db.generate_snippets("hello world", Some(short_content), None, 100); + assert!(snippets.is_empty()); + + // Test with whitespace-only content + let whitespace_content = " \t\n "; + let snippets = mock_db.generate_snippets("test", Some(whitespace_content), None, 100); + assert!(snippets.is_empty()); + + // Test with special characters in content + let special_content = "test@example.com, test-case, test/path, test(1)"; + let snippets = mock_db.generate_snippets("test", Some(special_content), None, 100); + assert!(!snippets.is_empty()); + assert!(snippets[0].highlight_ranges.len() >= 3); // Should find multiple "test" instances + } + + // Integration tests that would work with actual database + #[tokio::test] + #[ignore = "Requires PostgreSQL database for integration testing"] + async fn test_enhanced_search_integration() { + // This would test the actual database integration + // Similar to existing db_tests but for enhanced search + let db_url = std::env::var("TEST_DATABASE_URL") + .unwrap_or_else(|_| "postgresql://postgres:postgres@localhost:5432/readur_test".to_string()); + + let db = Database::new(&db_url).await.expect("Failed to connect to test database"); + db.migrate().await.expect("Failed to migrate test database"); + + // Create test user + let user_data = CreateUser { + username: "test_enhanced_search".to_string(), + email: "enhanced@test.com".to_string(), + password: "password123".to_string(), + }; + let user = db.create_user(user_data).await.unwrap(); + + // Create test document with rich content + let document = Document { + id: Uuid::new_v4(), + filename: "enhanced_test.pdf".to_string(), + original_filename: "enhanced_test.pdf".to_string(), + file_path: "/path/to/enhanced_test.pdf".to_string(), + file_size: 2048, + mime_type: "application/pdf".to_string(), + content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()), + ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()), + tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()], + created_at: Utc::now(), + updated_at: Utc::now(), + user_id: user.id, + }; + + db.create_document(document).await.unwrap(); + + // Test enhanced search with snippets + let search_request = SearchRequest { + query: "test".to_string(), + tags: None, + mime_types: None, + limit: Some(10), + offset: Some(0), + include_snippets: Some(true), + snippet_length: Some(100), + search_mode: Some(SearchMode::Simple), + }; + + let result = db.enhanced_search_documents(user.id, search_request).await; + assert!(result.is_ok()); + + let (documents, total, query_time) = result.unwrap(); + assert_eq!(total, 1); + assert_eq!(documents.len(), 1); + assert!(query_time > 0); + + let doc = &documents[0]; + assert!(!doc.snippets.is_empty()); + assert!(doc.search_rank.is_some()); + assert!(doc.search_rank.unwrap() > 0.0); + } +} \ No newline at end of file diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 0368f80..6780955 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -3,5 +3,6 @@ mod auth_tests; mod db_tests; mod file_service_tests; mod ocr_tests; +mod enhanced_search_tests; mod settings_tests; mod users_tests; \ No newline at end of file