diff --git a/frontend/src/pages/SearchPage.jsx b/frontend/src/pages/SearchPage.jsx
index 56a4f60..95ec5f3 100644
--- a/frontend/src/pages/SearchPage.jsx
+++ b/frontend/src/pages/SearchPage.jsx
@@ -29,6 +29,12 @@ import {
Divider,
IconButton,
Tooltip,
+ Autocomplete,
+ LinearProgress,
+ FormControlLabel,
+ Switch,
+ Paper,
+ Skeleton,
} from '@mui/material';
import {
Search as SearchIcon,
@@ -46,6 +52,10 @@ import {
Storage as SizeIcon,
Tag as TagIcon,
Visibility as ViewIcon,
+ Settings as SettingsIcon,
+ Speed as SpeedIcon,
+ AccessTime as TimeIcon,
+ TrendingUp as TrendingIcon,
} from '@mui/icons-material';
import { documentService } from '../services/api';
@@ -56,6 +66,16 @@ const SearchPage = () => {
const [loading, setLoading] = useState(false);
const [error, setError] = useState(null);
const [viewMode, setViewMode] = useState('grid');
+ const [queryTime, setQueryTime] = useState(0);
+ const [totalResults, setTotalResults] = useState(0);
+ const [suggestions, setSuggestions] = useState([]);
+
+ // Search settings
+ const [useEnhancedSearch, setUseEnhancedSearch] = useState(true);
+ const [searchMode, setSearchMode] = useState('simple');
+ const [includeSnippets, setIncludeSnippets] = useState(true);
+ const [snippetLength, setSnippetLength] = useState(200);
+ const [showAdvanced, setShowAdvanced] = useState(false);
// Filter states
const [selectedTags, setSelectedTags] = useState([]);
@@ -86,6 +106,9 @@ const SearchPage = () => {
const performSearch = useCallback(async (query, filters = {}) => {
if (!query.trim()) {
setSearchResults([]);
+ setTotalResults(0);
+ setQueryTime(0);
+ setSuggestions([]);
return;
}
@@ -99,9 +122,14 @@ const SearchPage = () => {
mime_types: filters.mimeTypes?.length ? filters.mimeTypes : undefined,
limit: 100,
offset: 0,
+ include_snippets: includeSnippets,
+ snippet_length: snippetLength,
+ search_mode: searchMode,
};
- const response = await documentService.search(searchRequest);
+ const response = useEnhancedSearch
+ ? await documentService.enhancedSearch(searchRequest)
+ : await documentService.search(searchRequest);
// Apply additional client-side filters
let results = response.data.documents || [];
@@ -134,6 +162,9 @@ const SearchPage = () => {
}
setSearchResults(results);
+ setTotalResults(response.data.total || results.length);
+ setQueryTime(response.data.query_time_ms || 0);
+ setSuggestions(response.data.suggestions || []);
// Extract unique tags for filter options
const tags = [...new Set(results.flatMap(doc => doc.tags))];
@@ -145,7 +176,7 @@ const SearchPage = () => {
} finally {
setLoading(false);
}
- }, []);
+ }, [useEnhancedSearch, includeSnippets, snippetLength, searchMode]);
const debouncedSearch = useCallback(
debounce((query, filters) => performSearch(query, filters), 500),
@@ -209,9 +240,63 @@ const SearchPage = () => {
}
};
+ const renderHighlightedText = (text, highlightRanges) => {
+ if (!highlightRanges || highlightRanges.length === 0) {
+ return text;
+ }
+
+ const parts = [];
+ let lastIndex = 0;
+
+ highlightRanges.forEach((range, index) => {
+ // Add text before highlight
+ if (range.start > lastIndex) {
+ parts.push(
+
+ {text.substring(lastIndex, range.start)}
+
+ );
+ }
+
+ // Add highlighted text
+ parts.push(
+
+ {text.substring(range.start, range.end)}
+
+ );
+
+ lastIndex = range.end;
+ });
+
+ // Add remaining text
+ if (lastIndex < text.length) {
+ parts.push(
+
+ {text.substring(lastIndex)}
+
+ );
+ }
+
+ return parts;
+ };
+
+ const handleSuggestionClick = (suggestion) => {
+ setSearchQuery(suggestion);
+ };
+
return (
- {/* Header */}
+ {/* Header with Prominent Search */}
{
backgroundClip: 'text',
WebkitBackgroundClip: 'text',
color: 'transparent',
- mb: 1,
+ mb: 2,
}}
>
Search Documents
-
- Find documents using full-text search and advanced filters
-
+
+ {/* Enhanced Search Bar */}
+
+
+ setSearchQuery(e.target.value)}
+ InputProps={{
+ startAdornment: (
+
+
+
+ ),
+ endAdornment: (
+
+
+ {loading && }
+ {searchQuery && (
+ setSearchQuery('')}
+ >
+
+
+ )}
+ setShowAdvanced(!showAdvanced)}
+ color={showAdvanced ? 'primary' : 'default'}
+ >
+
+
+
+
+ ),
+ }}
+ sx={{
+ '& .MuiOutlinedInput-root': {
+ '& fieldset': {
+ borderWidth: 2,
+ },
+ '&:hover fieldset': {
+ borderColor: 'primary.main',
+ },
+ '&.Mui-focused fieldset': {
+ borderColor: 'primary.main',
+ },
+ },
+ '& .MuiInputBase-input': {
+ fontSize: '1.1rem',
+ py: 2,
+ },
+ }}
+ />
+
+ {/* Loading Progress Bar */}
+ {loading && (
+
+ )}
+
+
+ {/* Quick Stats */}
+ {(searchQuery && !loading) && (
+
+
+ }
+ label={`${totalResults} results`}
+ size="small"
+ color="primary"
+ variant="outlined"
+ />
+ }
+ label={`${queryTime}ms`}
+ size="small"
+ variant="outlined"
+ />
+ {useEnhancedSearch && (
+ }
+ label="Enhanced"
+ size="small"
+ color="success"
+ variant="outlined"
+ />
+ )}
+
+
+ {/* Search Mode Selector */}
+ newMode && setSearchMode(newMode)}
+ size="small"
+ >
+ Simple
+ Phrase
+ Fuzzy
+ Boolean
+
+
+ )}
+
+ {/* Suggestions */}
+ {suggestions.length > 0 && (
+
+
+ Suggestions:
+
+
+ {suggestions.map((suggestion, index) => (
+ handleSuggestionClick(suggestion)}
+ clickable
+ variant="outlined"
+ sx={{
+ '&:hover': {
+ backgroundColor: 'primary.light',
+ color: 'primary.contrastText',
+ }
+ }}
+ />
+ ))}
+
+
+ )}
+
+ {/* Advanced Search Options */}
+ {showAdvanced && (
+
+
+ Search Options
+
+
+
+ setUseEnhancedSearch(e.target.checked)}
+ color="primary"
+ />
+ }
+ label="Enhanced Search"
+ />
+
+
+ setIncludeSnippets(e.target.checked)}
+ color="primary"
+ />
+ }
+ label="Show Snippets"
+ />
+
+
+
+ Snippet Length
+
+
+
+
+
+ )}
+
@@ -391,66 +680,34 @@ const SearchPage = () => {
{/* Search Results */}
- {/* Search Bar */}
-
- setSearchQuery(e.target.value)}
- InputProps={{
- startAdornment: (
-
-
-
- ),
- endAdornment: searchQuery && (
-
- setSearchQuery('')}
- >
-
-
-
- ),
- }}
- sx={{
- '& .MuiOutlinedInput-root': {
- '& fieldset': {
- borderWidth: 2,
- },
- },
- }}
- />
-
{/* Toolbar */}
-
-
- {loading ? 'Searching...' : `${searchResults.length} results found`}
-
-
- newView && setViewMode(newView)}
- size="small"
- >
-
-
-
-
-
-
-
-
+ {searchQuery && (
+
+
+ {loading ? 'Searching...' : `${searchResults.length} results found`}
+
+
+ newView && setViewMode(newView)}
+ size="small"
+ >
+
+
+
+
+
+
+
+
+ )}
{/* Results */}
{loading && (
@@ -496,11 +753,16 @@ const SearchPage = () => {
>
- Start searching
+ Start searching your documents
-
- Enter keywords to search through your documents
+
+ Use the enhanced search bar above to find documents by content, filename, or tags
+
+
+
+
+
)}
@@ -610,6 +872,54 @@ const SearchPage = () => {
)}
)}
+
+ {/* Search Snippets */}
+ {doc.snippets && doc.snippets.length > 0 && (
+
+ {doc.snippets.slice(0, 2).map((snippet, index) => (
+
+
+ ...{renderHighlightedText(snippet.text, snippet.highlight_ranges)}...
+
+
+ ))}
+ {doc.snippets.length > 2 && (
+
+ +{doc.snippets.length - 2} more matches
+
+ )}
+
+ )}
+
+ {/* Search Rank */}
+ {doc.search_rank && (
+
+
+
+ )}
diff --git a/frontend/src/pages/__tests__/SearchPage.test.jsx b/frontend/src/pages/__tests__/SearchPage.test.jsx
new file mode 100644
index 0000000..ba9b336
--- /dev/null
+++ b/frontend/src/pages/__tests__/SearchPage.test.jsx
@@ -0,0 +1,460 @@
+import React from 'react';
+import { render, screen, fireEvent, waitFor, act } from '@testing-library/react';
+import userEvent from '@testing-library/user-event';
+import { BrowserRouter } from 'react-router-dom';
+import SearchPage from '../SearchPage';
+import { documentService } from '../../services/api';
+
+// Mock the API service
+jest.mock('../../services/api', () => ({
+ documentService: {
+ enhancedSearch: jest.fn(),
+ search: jest.fn(),
+ download: jest.fn(),
+ }
+}));
+
+// Mock useNavigate
+const mockNavigate = jest.fn();
+jest.mock('react-router-dom', () => ({
+ ...jest.requireActual('react-router-dom'),
+ useNavigate: () => mockNavigate,
+}));
+
+// Mock data
+const mockSearchResponse = {
+ data: {
+ documents: [
+ {
+ id: '1',
+ filename: 'test.pdf',
+ original_filename: 'test.pdf',
+ file_size: 1024,
+ mime_type: 'application/pdf',
+ tags: ['test', 'document'],
+ created_at: '2023-01-01T00:00:00Z',
+ has_ocr_text: true,
+ search_rank: 0.85,
+ snippets: [
+ {
+ text: 'This is a test document with important information',
+ start_offset: 0,
+ end_offset: 48,
+ highlight_ranges: [
+ { start: 10, end: 14 }
+ ]
+ }
+ ]
+ }
+ ],
+ total: 1,
+ query_time_ms: 45,
+ suggestions: ['\"test\"', 'test*']
+ }
+};
+
+// Helper to render component with router
+const renderWithRouter = (component) => {
+ return render(
+
+ {component}
+
+ );
+};
+
+describe('SearchPage', () => {
+ beforeEach(() => {
+ jest.clearAllMocks();
+ documentService.enhancedSearch.mockResolvedValue(mockSearchResponse);
+ documentService.search.mockResolvedValue(mockSearchResponse);
+ });
+
+ test('renders search page with prominent search bar', () => {
+ renderWithRouter();
+
+ expect(screen.getByText('Search Documents')).toBeInTheDocument();
+ expect(screen.getByPlaceholderText(/Search documents by content, filename, or tags/)).toBeInTheDocument();
+ expect(screen.getByText('Start searching your documents')).toBeInTheDocument();
+ });
+
+ test('displays search suggestions when no query is entered', () => {
+ renderWithRouter();
+
+ expect(screen.getByText('Try: invoice')).toBeInTheDocument();
+ expect(screen.getByText('Try: contract')).toBeInTheDocument();
+ expect(screen.getByText('Try: tag:important')).toBeInTheDocument();
+ });
+
+ test('performs search when user types in search box', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test query');
+ });
+
+ // Wait for debounced search
+ await waitFor(() => {
+ expect(documentService.enhancedSearch).toHaveBeenCalledWith(
+ expect.objectContaining({
+ query: 'test query',
+ include_snippets: true,
+ snippet_length: 200,
+ search_mode: 'simple'
+ })
+ );
+ }, { timeout: 2000 });
+ });
+
+ test('displays search results with snippets', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('test.pdf')).toBeInTheDocument();
+ expect(screen.getByText(/This is a test document/)).toBeInTheDocument();
+ expect(screen.getByText('1 results')).toBeInTheDocument();
+ expect(screen.getByText('45ms')).toBeInTheDocument();
+ });
+ });
+
+ test('shows search suggestions when available', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('Suggestions:')).toBeInTheDocument();
+ expect(screen.getByText('\"test\"')).toBeInTheDocument();
+ expect(screen.getByText('test*')).toBeInTheDocument();
+ });
+ });
+
+ test('toggles advanced search options', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const settingsButton = screen.getByRole('button', { name: /settings/i });
+
+ await user.click(settingsButton);
+
+ expect(screen.getByText('Search Options')).toBeInTheDocument();
+ expect(screen.getByText('Enhanced Search')).toBeInTheDocument();
+ expect(screen.getByText('Show Snippets')).toBeInTheDocument();
+ });
+
+ test('changes search mode', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ // Type a search query first to show the search mode selector
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ const phraseButton = screen.getByRole('button', { name: 'Phrase' });
+ expect(phraseButton).toBeInTheDocument();
+ });
+
+ const phraseButton = screen.getByRole('button', { name: 'Phrase' });
+ await user.click(phraseButton);
+
+ // Wait for search to be called with new mode
+ await waitFor(() => {
+ expect(documentService.enhancedSearch).toHaveBeenCalledWith(
+ expect.objectContaining({
+ search_mode: 'phrase'
+ })
+ );
+ });
+ });
+
+ test('handles search suggestions click', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('\"test\"')).toBeInTheDocument();
+ });
+
+ const suggestionChip = screen.getByText('\"test\"');
+ await user.click(suggestionChip);
+
+ expect(searchInput.value).toBe('\"test\"');
+ });
+
+ test('clears search input', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test query');
+ });
+
+ const clearButton = screen.getByRole('button', { name: /clear/i });
+ await user.click(clearButton);
+
+ expect(searchInput.value).toBe('');
+ });
+
+ test('toggles enhanced search setting', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ // Open advanced options
+ const settingsButton = screen.getByRole('button', { name: /settings/i });
+ await user.click(settingsButton);
+
+ const enhancedSearchSwitch = screen.getByRole('checkbox', { name: /enhanced search/i });
+ await user.click(enhancedSearchSwitch);
+
+ // Type a search to trigger API call
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ // Should use regular search instead of enhanced search
+ await waitFor(() => {
+ expect(documentService.search).toHaveBeenCalled();
+ });
+ });
+
+ test('changes snippet length setting', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ // Open advanced options
+ const settingsButton = screen.getByRole('button', { name: /settings/i });
+ await user.click(settingsButton);
+
+ const snippetSelect = screen.getByLabelText('Snippet Length');
+ await user.click(snippetSelect);
+
+ const longOption = screen.getByText('Long (400)');
+ await user.click(longOption);
+
+ // Type a search to trigger API call
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(documentService.enhancedSearch).toHaveBeenCalledWith(
+ expect.objectContaining({
+ snippet_length: 400
+ })
+ );
+ });
+ });
+
+ test('displays loading state during search', async () => {
+ const user = userEvent.setup();
+
+ // Mock a delayed response
+ documentService.enhancedSearch.mockImplementation(() =>
+ new Promise(resolve => setTimeout(() => resolve(mockSearchResponse), 100))
+ );
+
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ // Should show loading indicator
+ expect(screen.getByRole('progressbar')).toBeInTheDocument();
+
+ await waitFor(() => {
+ expect(screen.getByText('test.pdf')).toBeInTheDocument();
+ });
+ });
+
+ test('handles search error gracefully', async () => {
+ const user = userEvent.setup();
+
+ documentService.enhancedSearch.mockRejectedValue(new Error('Search failed'));
+
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('Search failed. Please try again.')).toBeInTheDocument();
+ });
+ });
+
+ test('navigates to document details on view click', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('test.pdf')).toBeInTheDocument();
+ });
+
+ const viewButton = screen.getByLabelText('View Details');
+ await user.click(viewButton);
+
+ expect(mockNavigate).toHaveBeenCalledWith('/documents/1');
+ });
+
+ test('handles document download', async () => {
+ const user = userEvent.setup();
+ const mockBlob = new Blob(['test content'], { type: 'application/pdf' });
+ documentService.download.mockResolvedValue({ data: mockBlob });
+
+ // Mock URL.createObjectURL
+ global.URL.createObjectURL = jest.fn(() => 'mock-url');
+ global.URL.revokeObjectURL = jest.fn();
+
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('test.pdf')).toBeInTheDocument();
+ });
+
+ const downloadButton = screen.getByLabelText('Download');
+ await user.click(downloadButton);
+
+ expect(documentService.download).toHaveBeenCalledWith('1');
+ });
+
+ test('switches between grid and list view modes', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('test.pdf')).toBeInTheDocument();
+ });
+
+ const listViewButton = screen.getByRole('button', { name: /list view/i });
+ await user.click(listViewButton);
+
+ // The view should change (this would be more thoroughly tested with visual regression tests)
+ expect(listViewButton).toHaveAttribute('aria-pressed', 'true');
+ });
+
+ test('displays file type icons correctly', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ // Should show PDF icon for PDF file
+ expect(screen.getByTestId('PictureAsPdfIcon')).toBeInTheDocument();
+ });
+ });
+
+ test('displays OCR badge when document has OCR text', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('OCR')).toBeInTheDocument();
+ });
+ });
+
+ test('highlights search terms in snippets', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ // Should render the snippet with highlighted text
+ expect(screen.getByText(/This is a test document/)).toBeInTheDocument();
+ });
+ });
+
+ test('shows relevance score when available', async () => {
+ const user = userEvent.setup();
+ renderWithRouter();
+
+ const searchInput = screen.getByPlaceholderText(/Search documents by content, filename, or tags/);
+
+ await act(async () => {
+ await user.type(searchInput, 'test');
+ });
+
+ await waitFor(() => {
+ expect(screen.getByText('Relevance: 85.0%')).toBeInTheDocument();
+ });
+ });
+});
+
+// Test helper functions
+describe('Search Helper Functions', () => {
+ test('formats file sizes correctly', () => {
+ // These would test utility functions if they were exported
+ // For now, we test the component behavior
+ expect(true).toBe(true);
+ });
+
+ test('formats dates correctly', () => {
+ // These would test utility functions if they were exported
+ expect(true).toBe(true);
+ });
+});
\ No newline at end of file
diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts
index 372506d..93330f0 100644
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@@ -27,11 +27,41 @@ export interface SearchRequest {
mime_types?: string[]
limit?: number
offset?: number
+ include_snippets?: boolean
+ snippet_length?: number
+ search_mode?: 'simple' | 'phrase' | 'fuzzy' | 'boolean'
+}
+
+export interface HighlightRange {
+ start: number
+ end: number
+}
+
+export interface SearchSnippet {
+ text: string
+ start_offset: number
+ end_offset: number
+ highlight_ranges: HighlightRange[]
+}
+
+export interface EnhancedDocument {
+ id: string
+ filename: string
+ original_filename: string
+ file_size: number
+ mime_type: string
+ tags: string[]
+ created_at: string
+ has_ocr_text: boolean
+ search_rank?: number
+ snippets: SearchSnippet[]
}
export interface SearchResponse {
- documents: Document[]
+ documents: EnhancedDocument[]
total: number
+ query_time_ms: number
+ suggestions: string[]
}
export const documentService = {
@@ -62,4 +92,15 @@ export const documentService = {
params: searchRequest,
})
},
+
+ enhancedSearch: (searchRequest: SearchRequest) => {
+ return api.get('/search/enhanced', {
+ params: {
+ ...searchRequest,
+ include_snippets: searchRequest.include_snippets ?? true,
+ snippet_length: searchRequest.snippet_length ?? 200,
+ search_mode: searchRequest.search_mode ?? 'simple',
+ },
+ })
+ },
}
\ No newline at end of file
diff --git a/src/db.rs b/src/db.rs
index 0648021..1aab516 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -3,7 +3,7 @@ use chrono::Utc;
use sqlx::{PgPool, Row};
use uuid::Uuid;
-use crate::models::{CreateUser, Document, SearchRequest, User};
+use crate::models::{CreateUser, Document, SearchRequest, SearchMode, SearchSnippet, HighlightRange, EnhancedDocumentResponse, User};
#[derive(Clone)]
pub struct Database {
@@ -328,6 +328,169 @@ impl Database {
Ok((documents, total))
}
+ pub async fn enhanced_search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec, i64, u64)> {
+ let start_time = std::time::Instant::now();
+
+ // Build search query based on search mode
+ let search_mode = search.search_mode.as_ref().unwrap_or(&SearchMode::Simple);
+ let query_function = match search_mode {
+ SearchMode::Simple => "plainto_tsquery",
+ SearchMode::Phrase => "phraseto_tsquery",
+ SearchMode::Fuzzy => "plainto_tsquery", // Could be enhanced with similarity
+ SearchMode::Boolean => "to_tsquery",
+ };
+
+ let mut query_builder = sqlx::QueryBuilder::new(&format!(
+ r#"
+ SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id,
+ ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), {}('english', "#,
+ query_function
+ ));
+
+ query_builder.push_bind(&search.query);
+ query_builder.push(&format!(")) as rank FROM documents WHERE user_id = "));
+ query_builder.push_bind(user_id);
+ query_builder.push(&format!(" AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ {}('english', ", query_function));
+ query_builder.push_bind(&search.query);
+ query_builder.push(")");
+
+ if let Some(tags) = &search.tags {
+ if !tags.is_empty() {
+ query_builder.push(" AND tags && ");
+ query_builder.push_bind(tags);
+ }
+ }
+
+ if let Some(mime_types) = &search.mime_types {
+ if !mime_types.is_empty() {
+ query_builder.push(" AND mime_type = ANY(");
+ query_builder.push_bind(mime_types);
+ query_builder.push(")");
+ }
+ }
+
+ query_builder.push(" ORDER BY rank DESC, created_at DESC");
+
+ if let Some(limit) = search.limit {
+ query_builder.push(" LIMIT ");
+ query_builder.push_bind(limit);
+ }
+
+ if let Some(offset) = search.offset {
+ query_builder.push(" OFFSET ");
+ query_builder.push_bind(offset);
+ }
+
+ let rows = query_builder.build().fetch_all(&self.pool).await?;
+
+ let include_snippets = search.include_snippets.unwrap_or(true);
+ let snippet_length = search.snippet_length.unwrap_or(200);
+
+ let mut documents = Vec::new();
+ for row in rows {
+ let doc_id: Uuid = row.get("id");
+ let content: Option = row.get("content");
+ let ocr_text: Option = row.get("ocr_text");
+ let rank: f32 = row.get("rank");
+
+ let snippets = if include_snippets {
+ self.generate_snippets(&search.query, content.as_deref(), ocr_text.as_deref(), snippet_length)
+ } else {
+ Vec::new()
+ };
+
+ documents.push(EnhancedDocumentResponse {
+ id: doc_id,
+ filename: row.get("filename"),
+ original_filename: row.get("original_filename"),
+ file_size: row.get("file_size"),
+ mime_type: row.get("mime_type"),
+ tags: row.get("tags"),
+ created_at: row.get("created_at"),
+ has_ocr_text: ocr_text.is_some(),
+ search_rank: Some(rank),
+ snippets,
+ });
+ }
+
+ let total_row = sqlx::query(&format!(
+ r#"
+ SELECT COUNT(*) as total FROM documents
+ WHERE user_id = $1
+ AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ {}('english', $2)
+ "#, query_function
+ ))
+ .bind(user_id)
+ .bind(&search.query)
+ .fetch_one(&self.pool)
+ .await?;
+
+ let total: i64 = total_row.get("total");
+ let query_time = start_time.elapsed().as_millis() as u64;
+
+ Ok((documents, total, query_time))
+ }
+
+ fn generate_snippets(&self, query: &str, content: Option<&str>, ocr_text: Option<&str>, snippet_length: i32) -> Vec {
+ let mut snippets = Vec::new();
+
+ // Combine content and OCR text
+ let full_text = match (content, ocr_text) {
+ (Some(c), Some(o)) => format!("{} {}", c, o),
+ (Some(c), None) => c.to_string(),
+ (None, Some(o)) => o.to_string(),
+ (None, None) => return snippets,
+ };
+
+ // Simple keyword matching for snippets (could be enhanced with better search algorithms)
+ let _query_terms: Vec<&str> = query.split_whitespace().collect();
+ let text_lower = full_text.to_lowercase();
+ let query_lower = query.to_lowercase();
+
+ // Find matches
+ for (i, _) in text_lower.match_indices(&query_lower) {
+ let snippet_start = if i >= snippet_length as usize / 2 {
+ i - snippet_length as usize / 2
+ } else {
+ 0
+ };
+
+ let snippet_end = std::cmp::min(
+ snippet_start + snippet_length as usize,
+ full_text.len()
+ );
+
+ if snippet_start < full_text.len() {
+ let snippet_text = &full_text[snippet_start..snippet_end];
+
+ // Find highlight ranges within this snippet
+ let mut highlight_ranges = Vec::new();
+ let snippet_lower = snippet_text.to_lowercase();
+
+ for (match_start, _) in snippet_lower.match_indices(&query_lower) {
+ highlight_ranges.push(HighlightRange {
+ start: match_start as i32,
+ end: (match_start + query.len()) as i32,
+ });
+ }
+
+ snippets.push(SearchSnippet {
+ text: snippet_text.to_string(),
+ start_offset: snippet_start as i32,
+ end_offset: snippet_end as i32,
+ highlight_ranges,
+ });
+
+ // Limit to a few snippets per document
+ if snippets.len() >= 3 {
+ break;
+ }
+ }
+ }
+
+ snippets
+ }
+
pub async fn update_document_ocr(&self, id: Uuid, ocr_text: &str) -> Result<()> {
sqlx::query("UPDATE documents SET ocr_text = $1, updated_at = NOW() WHERE id = $2")
.bind(ocr_text)
diff --git a/src/models.rs b/src/models.rs
index f796905..2cf7266 100644
--- a/src/models.rs
+++ b/src/models.rs
@@ -74,12 +74,63 @@ pub struct SearchRequest {
pub mime_types: Option>,
pub limit: Option,
pub offset: Option,
+ pub include_snippets: Option,
+ pub snippet_length: Option,
+ pub search_mode: Option,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub enum SearchMode {
+ #[serde(rename = "simple")]
+ Simple,
+ #[serde(rename = "phrase")]
+ Phrase,
+ #[serde(rename = "fuzzy")]
+ Fuzzy,
+ #[serde(rename = "boolean")]
+ Boolean,
+}
+
+impl Default for SearchMode {
+ fn default() -> Self {
+ SearchMode::Simple
+ }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct SearchSnippet {
+ pub text: String,
+ pub start_offset: i32,
+ pub end_offset: i32,
+ pub highlight_ranges: Vec,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct HighlightRange {
+ pub start: i32,
+ pub end: i32,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct EnhancedDocumentResponse {
+ pub id: Uuid,
+ pub filename: String,
+ pub original_filename: String,
+ pub file_size: i64,
+ pub mime_type: String,
+ pub tags: Vec,
+ pub created_at: DateTime,
+ pub has_ocr_text: bool,
+ pub search_rank: Option,
+ pub snippets: Vec,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SearchResponse {
- pub documents: Vec,
+ pub documents: Vec,
pub total: i64,
+ pub query_time_ms: u64,
+ pub suggestions: Vec,
}
impl From for DocumentResponse {
diff --git a/src/routes/search.rs b/src/routes/search.rs
index 2fd0039..2a768a3 100644
--- a/src/routes/search.rs
+++ b/src/routes/search.rs
@@ -9,12 +9,14 @@ use std::sync::Arc;
use crate::{
auth::AuthUser,
- models::{SearchRequest, SearchResponse},
+ models::{SearchRequest, SearchResponse, EnhancedDocumentResponse},
AppState,
};
pub fn router() -> Router> {
- Router::new().route("/", get(search_documents))
+ Router::new()
+ .route("/", get(search_documents))
+ .route("/enhanced", get(enhanced_search_documents))
}
async fn search_documents(
@@ -29,9 +31,69 @@ async fn search_documents(
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let response = SearchResponse {
- documents: documents.into_iter().map(|doc| doc.into()).collect(),
+ documents: documents.into_iter().map(|doc| EnhancedDocumentResponse {
+ id: doc.id,
+ filename: doc.filename,
+ original_filename: doc.original_filename,
+ file_size: doc.file_size,
+ mime_type: doc.mime_type,
+ tags: doc.tags,
+ created_at: doc.created_at,
+ has_ocr_text: doc.ocr_text.is_some(),
+ search_rank: None,
+ snippets: Vec::new(),
+ }).collect(),
total,
+ query_time_ms: 0,
+ suggestions: Vec::new(),
};
Ok(Json(response))
+}
+
+async fn enhanced_search_documents(
+ State(state): State>,
+ auth_user: AuthUser,
+ Query(search_request): Query,
+) -> Result, StatusCode> {
+ // Generate suggestions before moving search_request
+ let suggestions = generate_search_suggestions(&search_request.query);
+
+ let (documents, total, query_time) = state
+ .db
+ .enhanced_search_documents(auth_user.user.id, search_request)
+ .await
+ .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+ let response = SearchResponse {
+ documents,
+ total,
+ query_time_ms: query_time,
+ suggestions,
+ };
+
+ Ok(Json(response))
+}
+
+fn generate_search_suggestions(query: &str) -> Vec {
+ // Simple suggestion generation - could be enhanced with a proper suggestion system
+ let mut suggestions = Vec::new();
+
+ if query.len() > 3 {
+ // Common search variations
+ suggestions.push(format!("\"{}\"", query)); // Exact phrase
+
+ // Add wildcard suggestions
+ if !query.contains('*') {
+ suggestions.push(format!("{}*", query));
+ }
+
+ // Add similar terms (this would typically come from a thesaurus or ML model)
+ if query.contains("document") {
+ suggestions.push(query.replace("document", "file"));
+ suggestions.push(query.replace("document", "paper"));
+ }
+ }
+
+ suggestions.into_iter().take(3).collect()
}
\ No newline at end of file
diff --git a/src/tests/db_tests.rs b/src/tests/db_tests.rs
index 19ba1db..fb924f6 100644
--- a/src/tests/db_tests.rs
+++ b/src/tests/db_tests.rs
@@ -147,6 +147,9 @@ mod tests {
mime_types: None,
limit: Some(10),
offset: Some(0),
+ include_snippets: Some(true),
+ snippet_length: Some(200),
+ search_mode: None,
};
let result = db.search_documents(user.id, search_request).await;
diff --git a/src/tests/enhanced_search_tests.rs b/src/tests/enhanced_search_tests.rs
new file mode 100644
index 0000000..7ae0225
--- /dev/null
+++ b/src/tests/enhanced_search_tests.rs
@@ -0,0 +1,695 @@
+#[cfg(test)]
+mod tests {
+ use crate::db::Database;
+ use crate::models::{
+ CreateUser, Document, SearchRequest, SearchMode,
+ EnhancedDocumentResponse, SearchSnippet, HighlightRange
+ };
+ use chrono::Utc;
+ use uuid::Uuid;
+
+ // Mock database for testing snippet generation without PostgreSQL dependency
+ struct MockDatabase;
+
+ impl MockDatabase {
+ fn new() -> Self {
+ Self
+ }
+
+ // Test the snippet generation logic directly
+ fn generate_snippets(&self, query: &str, content: Option<&str>, ocr_text: Option<&str>, snippet_length: i32) -> Vec {
+ let mut snippets = Vec::new();
+
+ // Combine content and OCR text
+ let full_text = match (content, ocr_text) {
+ (Some(c), Some(o)) => format!("{} {}", c, o),
+ (Some(c), None) => c.to_string(),
+ (None, Some(o)) => o.to_string(),
+ (None, None) => return snippets,
+ };
+
+ // Simple keyword matching for snippets
+ let text_lower = full_text.to_lowercase();
+ let query_lower = query.to_lowercase();
+
+ // Find matches
+ for (i, _) in text_lower.match_indices(&query_lower) {
+ let snippet_start = if i >= snippet_length as usize / 2 {
+ i - snippet_length as usize / 2
+ } else {
+ 0
+ };
+
+ let snippet_end = std::cmp::min(
+ snippet_start + snippet_length as usize,
+ full_text.len()
+ );
+
+ if snippet_start < full_text.len() {
+ let snippet_text = &full_text[snippet_start..snippet_end];
+
+ // Find highlight ranges within this snippet
+ let mut highlight_ranges = Vec::new();
+ let snippet_lower = snippet_text.to_lowercase();
+
+ for (match_start, _) in snippet_lower.match_indices(&query_lower) {
+ highlight_ranges.push(HighlightRange {
+ start: match_start as i32,
+ end: (match_start + query.len()) as i32,
+ });
+ }
+
+ snippets.push(SearchSnippet {
+ text: snippet_text.to_string(),
+ start_offset: snippet_start as i32,
+ end_offset: snippet_end as i32,
+ highlight_ranges,
+ });
+
+ // Limit to a few snippets per document
+ if snippets.len() >= 3 {
+ break;
+ }
+ }
+ }
+
+ snippets
+ }
+ }
+
+ #[test]
+ fn test_snippet_generation_basic() {
+ let mock_db = MockDatabase::new();
+ let content = "This is a test document with some important information about testing and quality assurance.";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 50);
+
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].text.contains("test"));
+ assert!(!snippets[0].highlight_ranges.is_empty());
+
+ // Check that highlight range is correct
+ let highlight = &snippets[0].highlight_ranges[0];
+ let highlighted_text = &snippets[0].text[highlight.start as usize..highlight.end as usize];
+ assert_eq!(highlighted_text.to_lowercase(), "test");
+ }
+
+ #[test]
+ fn test_snippet_generation_multiple_matches() {
+ let mock_db = MockDatabase::new();
+ let content = "The first test shows that testing is important. Another test demonstrates test effectiveness.";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 100);
+
+ assert!(!snippets.is_empty());
+
+ // Should find multiple highlight ranges in the snippet
+ let total_highlights: usize = snippets.iter()
+ .map(|s| s.highlight_ranges.len())
+ .sum();
+ assert!(total_highlights >= 2);
+ }
+
+ #[test]
+ fn test_snippet_generation_with_ocr_text() {
+ let mock_db = MockDatabase::new();
+ let content = "Document content with information";
+ let ocr_text = "OCR extracted text with important data";
+
+ let snippets = mock_db.generate_snippets("important", Some(content), Some(ocr_text), 100);
+
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].text.contains("important"));
+ }
+
+ #[test]
+ fn test_snippet_generation_case_insensitive() {
+ let mock_db = MockDatabase::new();
+ let content = "This Document contains IMPORTANT Information";
+
+ let snippets = mock_db.generate_snippets("important", Some(content), None, 50);
+
+ assert!(!snippets.is_empty());
+ let highlight = &snippets[0].highlight_ranges[0];
+ let highlighted_text = &snippets[0].text[highlight.start as usize..highlight.end as usize];
+ assert_eq!(highlighted_text, "IMPORTANT");
+ }
+
+ #[test]
+ fn test_snippet_generation_empty_content() {
+ let mock_db = MockDatabase::new();
+
+ let snippets = mock_db.generate_snippets("test", None, None, 100);
+ assert!(snippets.is_empty());
+ }
+
+ #[test]
+ fn test_snippet_generation_no_matches() {
+ let mock_db = MockDatabase::new();
+ let content = "This document has no matching terms";
+
+ let snippets = mock_db.generate_snippets("xyzabc", Some(content), None, 100);
+ assert!(snippets.is_empty());
+ }
+
+ #[test]
+ fn test_snippet_length_limits() {
+ let mock_db = MockDatabase::new();
+ let content = "A very long document with lots of text that should be truncated when generating snippets to test the length limiting functionality of the snippet generation system.";
+
+ let short_snippets = mock_db.generate_snippets("text", Some(content), None, 50);
+ let long_snippets = mock_db.generate_snippets("text", Some(content), None, 150);
+
+ assert!(!short_snippets.is_empty());
+ assert!(!long_snippets.is_empty());
+ assert!(short_snippets[0].text.len() <= 50);
+ assert!(long_snippets[0].text.len() > short_snippets[0].text.len());
+ }
+
+ #[test]
+ fn test_snippet_positioning() {
+ let mock_db = MockDatabase::new();
+ let content = "Start of document. This is the middle part with test content. End of document.";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 40);
+
+ assert!(!snippets.is_empty());
+ let snippet = &snippets[0];
+
+ // Should have reasonable start and end offsets
+ assert!(snippet.start_offset >= 0);
+ assert!(snippet.end_offset > snippet.start_offset);
+ assert!(snippet.end_offset <= content.len() as i32);
+ }
+
+ #[test]
+ fn test_search_request_defaults() {
+ let request = SearchRequest {
+ query: "test".to_string(),
+ tags: None,
+ mime_types: None,
+ limit: None,
+ offset: None,
+ include_snippets: None,
+ snippet_length: None,
+ search_mode: None,
+ };
+
+ // Test that default values work correctly
+ assert_eq!(request.query, "test");
+ assert!(request.include_snippets.is_none());
+ assert!(request.search_mode.is_none());
+ }
+
+ #[test]
+ fn test_search_request_with_options() {
+ let request = SearchRequest {
+ query: "test query".to_string(),
+ tags: Some(vec!["tag1".to_string(), "tag2".to_string()]),
+ mime_types: Some(vec!["application/pdf".to_string()]),
+ limit: Some(10),
+ offset: Some(0),
+ include_snippets: Some(true),
+ snippet_length: Some(300),
+ search_mode: Some(SearchMode::Phrase),
+ };
+
+ assert_eq!(request.query, "test query");
+ assert_eq!(request.tags.as_ref().unwrap().len(), 2);
+ assert_eq!(request.include_snippets, Some(true));
+ assert_eq!(request.snippet_length, Some(300));
+ assert!(matches!(request.search_mode, Some(SearchMode::Phrase)));
+ }
+
+ #[test]
+ fn test_search_mode_variants() {
+ // Test all search mode variants
+ let simple = SearchMode::Simple;
+ let phrase = SearchMode::Phrase;
+ let fuzzy = SearchMode::Fuzzy;
+ let boolean = SearchMode::Boolean;
+
+ // Test serialization names
+ assert_eq!(format!("{:?}", simple), "Simple");
+ assert_eq!(format!("{:?}", phrase), "Phrase");
+ assert_eq!(format!("{:?}", fuzzy), "Fuzzy");
+ assert_eq!(format!("{:?}", boolean), "Boolean");
+ }
+
+ #[test]
+ fn test_search_mode_default() {
+ let default_mode = SearchMode::default();
+ assert!(matches!(default_mode, SearchMode::Simple));
+ }
+
+ #[test]
+ fn test_highlight_range_creation() {
+ let range = HighlightRange {
+ start: 10,
+ end: 20,
+ };
+
+ assert_eq!(range.start, 10);
+ assert_eq!(range.end, 20);
+ assert!(range.end > range.start);
+ }
+
+ #[test]
+ fn test_enhanced_document_response_creation() {
+ let doc_id = Uuid::new_v4();
+ let now = Utc::now();
+
+ let snippets = vec![
+ SearchSnippet {
+ text: "This is a test snippet".to_string(),
+ start_offset: 0,
+ end_offset: 22,
+ highlight_ranges: vec![
+ HighlightRange { start: 10, end: 14 }
+ ],
+ }
+ ];
+
+ let response = EnhancedDocumentResponse {
+ id: doc_id,
+ filename: "test.pdf".to_string(),
+ original_filename: "test.pdf".to_string(),
+ file_size: 1024,
+ mime_type: "application/pdf".to_string(),
+ tags: vec!["test".to_string()],
+ created_at: now,
+ has_ocr_text: true,
+ search_rank: Some(0.75),
+ snippets,
+ };
+
+ assert_eq!(response.id, doc_id);
+ assert_eq!(response.filename, "test.pdf");
+ assert_eq!(response.search_rank, Some(0.75));
+ assert!(response.has_ocr_text);
+ assert_eq!(response.snippets.len(), 1);
+ assert_eq!(response.snippets[0].text, "This is a test snippet");
+ }
+
+ #[test]
+ fn test_snippet_overlap_handling() {
+ let mock_db = MockDatabase::new();
+ // Content with multiple overlapping matches
+ let content = "test testing tested test";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 30);
+
+ assert!(!snippets.is_empty());
+
+ // Should handle overlapping matches gracefully
+ for snippet in &snippets {
+ assert!(!snippet.text.is_empty());
+ assert!(!snippet.highlight_ranges.is_empty());
+ }
+ }
+
+ #[test]
+ fn test_snippet_boundary_conditions() {
+ let mock_db = MockDatabase::new();
+
+ // Test with very short content
+ let short_content = "test";
+ let snippets = mock_db.generate_snippets("test", Some(short_content), None, 100);
+ assert!(!snippets.is_empty());
+ assert_eq!(snippets[0].text, "test");
+
+ // Test with match at the beginning
+ let start_content = "test document content";
+ let snippets = mock_db.generate_snippets("test", Some(start_content), None, 50);
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].text.starts_with("test"));
+
+ // Test with match at the end
+ let end_content = "document content test";
+ let snippets = mock_db.generate_snippets("test", Some(end_content), None, 50);
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].text.ends_with("test"));
+ }
+
+ #[test]
+ fn test_complex_search_scenarios() {
+ let mock_db = MockDatabase::new();
+
+ // Test with content that has multiple search terms
+ let complex_content = "This is a comprehensive test document that contains testing methodologies and test cases for quality assurance testing procedures.";
+
+ let snippets = mock_db.generate_snippets("test", Some(complex_content), None, 80);
+
+ assert!(!snippets.is_empty());
+
+ // Verify that highlights are properly positioned
+ for snippet in &snippets {
+ for highlight in &snippet.highlight_ranges {
+ assert!(highlight.start >= 0);
+ assert!(highlight.end > highlight.start);
+ assert!(highlight.end <= snippet.text.len() as i32);
+
+ let highlighted_text = &snippet.text[highlight.start as usize..highlight.end as usize];
+ assert_eq!(highlighted_text.to_lowercase(), "test");
+ }
+ }
+ }
+
+ #[test]
+ fn test_unicode_content_handling() {
+ let mock_db = MockDatabase::new();
+ let unicode_content = "Это тест документ с важной информацией для тестирования";
+
+ let snippets = mock_db.generate_snippets("тест", Some(unicode_content), None, 50);
+
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].text.contains("тест"));
+ }
+
+ #[test]
+ fn test_special_characters_in_query() {
+ let mock_db = MockDatabase::new();
+ let content = "Document with special chars: test@example.com and test-case";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 60);
+
+ assert!(!snippets.is_empty());
+ // Should find both occurrences of "test"
+ let total_highlights: usize = snippets.iter()
+ .map(|s| s.highlight_ranges.len())
+ .sum();
+ assert!(total_highlights >= 2);
+ }
+
+ // Test search suggestions functionality
+ fn generate_search_suggestions(query: &str) -> Vec {
+ // Copy of the function from search.rs for testing
+ let mut suggestions = Vec::new();
+
+ if query.len() > 3 {
+ // Common search variations
+ suggestions.push(format!("\"{}\"", query)); // Exact phrase
+
+ // Add wildcard suggestions
+ if !query.contains('*') {
+ suggestions.push(format!("{}*", query));
+ }
+
+ // Add similar terms (this would typically come from a thesaurus or ML model)
+ if query.contains("document") {
+ suggestions.push(query.replace("document", "file"));
+ suggestions.push(query.replace("document", "paper"));
+ }
+ }
+
+ suggestions.into_iter().take(3).collect()
+ }
+
+ #[test]
+ fn test_search_suggestions_basic() {
+ let suggestions = generate_search_suggestions("invoice");
+
+ assert!(!suggestions.is_empty());
+ assert!(suggestions.contains(&"\"invoice\"".to_string()));
+ assert!(suggestions.contains(&"invoice*".to_string()));
+ }
+
+ #[test]
+ fn test_search_suggestions_short_query() {
+ let suggestions = generate_search_suggestions("ab");
+
+ // Should not generate suggestions for very short queries
+ assert!(suggestions.is_empty());
+ }
+
+ #[test]
+ fn test_search_suggestions_document_replacement() {
+ let suggestions = generate_search_suggestions("document search");
+
+ assert!(!suggestions.is_empty());
+ assert!(suggestions.iter().any(|s| s.contains("file search")));
+ assert!(suggestions.iter().any(|s| s.contains("paper search")));
+ }
+
+ #[test]
+ fn test_search_suggestions_with_wildcard() {
+ let suggestions = generate_search_suggestions("test*");
+
+ assert!(!suggestions.is_empty());
+ // Should not add another wildcard if one already exists
+ assert!(!suggestions.iter().any(|s| s.contains("test**")));
+ }
+
+ #[test]
+ fn test_search_suggestions_limit() {
+ let suggestions = generate_search_suggestions("document test example");
+
+ // Should limit to 3 suggestions
+ assert!(suggestions.len() <= 3);
+ }
+
+ #[test]
+ fn test_search_suggestions_case_sensitivity() {
+ let suggestions = generate_search_suggestions("Document");
+
+ assert!(!suggestions.is_empty());
+ // Should work with different cases
+ assert!(suggestions.iter().any(|s| s.contains("file") || s.contains("File")));
+ }
+
+ // Performance and error handling tests
+ #[test]
+ fn test_snippet_generation_performance() {
+ let mock_db = MockDatabase::new();
+
+ // Test with large content
+ let large_content = "test ".repeat(10000); // 50KB of repeated "test "
+
+ let start_time = std::time::Instant::now();
+ let snippets = mock_db.generate_snippets("test", Some(&large_content), None, 200);
+ let duration = start_time.elapsed();
+
+ // Should complete within reasonable time (100ms for this size)
+ assert!(duration.as_millis() < 100);
+ assert!(!snippets.is_empty());
+
+ // Should still limit snippets even with many matches
+ assert!(snippets.len() <= 3);
+ }
+
+ #[test]
+ fn test_snippet_generation_memory_usage() {
+ let mock_db = MockDatabase::new();
+
+ // Test with content that could cause memory issues
+ let content_with_many_matches = (0..1000)
+ .map(|i| format!("test{} ", i))
+ .collect::();
+
+ let snippets = mock_db.generate_snippets("test", Some(&content_with_many_matches), None, 100);
+
+ // Should handle gracefully without consuming excessive memory
+ assert!(!snippets.is_empty());
+ assert!(snippets.len() <= 3); // Should still limit results
+ }
+
+ #[test]
+ fn test_search_request_validation() {
+ // Test with empty query
+ let empty_request = SearchRequest {
+ query: "".to_string(),
+ tags: None,
+ mime_types: None,
+ limit: None,
+ offset: None,
+ include_snippets: None,
+ snippet_length: None,
+ search_mode: None,
+ };
+
+ // Should handle empty query gracefully
+ assert_eq!(empty_request.query, "");
+
+ // Test with extreme values
+ let extreme_request = SearchRequest {
+ query: "a".repeat(10000), // Very long query
+ tags: Some(vec!["tag".to_string(); 1000]), // Many tags
+ mime_types: Some(vec!["type".to_string(); 100]), // Many mime types
+ limit: Some(i64::MAX),
+ offset: Some(i64::MAX),
+ include_snippets: Some(true),
+ snippet_length: Some(i32::MAX),
+ search_mode: Some(SearchMode::Boolean),
+ };
+
+ // Should handle extreme values without panicking
+ assert!(extreme_request.query.len() == 10000);
+ assert!(extreme_request.tags.as_ref().unwrap().len() == 1000);
+ }
+
+ #[test]
+ fn test_highlight_range_validation() {
+ let mock_db = MockDatabase::new();
+ let content = "This is a test document for validation";
+
+ let snippets = mock_db.generate_snippets("test", Some(content), None, 50);
+
+ assert!(!snippets.is_empty());
+
+ // Validate all highlight ranges
+ for snippet in &snippets {
+ for highlight in &snippet.highlight_ranges {
+ // Ranges should be valid
+ assert!(highlight.start >= 0);
+ assert!(highlight.end > highlight.start);
+ assert!(highlight.end <= snippet.text.len() as i32);
+
+ // Highlighted text should match query (case insensitive)
+ let highlighted_text = &snippet.text[highlight.start as usize..highlight.end as usize];
+ assert_eq!(highlighted_text.to_lowercase(), "test");
+ }
+ }
+ }
+
+ #[test]
+ fn test_search_mode_query_function_mapping() {
+ // Test that different search modes would map to correct PostgreSQL functions
+ let modes = vec![
+ (SearchMode::Simple, "plainto_tsquery"),
+ (SearchMode::Phrase, "phraseto_tsquery"),
+ (SearchMode::Fuzzy, "plainto_tsquery"), // Same as simple for now
+ (SearchMode::Boolean, "to_tsquery"),
+ ];
+
+ for (mode, expected_function) in modes {
+ // This tests the logic that would be used in the database layer
+ let query_function = match mode {
+ SearchMode::Simple => "plainto_tsquery",
+ SearchMode::Phrase => "phraseto_tsquery",
+ SearchMode::Fuzzy => "plainto_tsquery",
+ SearchMode::Boolean => "to_tsquery",
+ };
+
+ assert_eq!(query_function, expected_function);
+ }
+ }
+
+ #[test]
+ fn test_enhanced_document_response_serialization() {
+ let doc_id = Uuid::new_v4();
+ let now = Utc::now();
+
+ let response = EnhancedDocumentResponse {
+ id: doc_id,
+ filename: "test.pdf".to_string(),
+ original_filename: "test.pdf".to_string(),
+ file_size: 1024,
+ mime_type: "application/pdf".to_string(),
+ tags: vec!["test".to_string(), "document".to_string()],
+ created_at: now,
+ has_ocr_text: true,
+ search_rank: Some(0.85),
+ snippets: vec![
+ SearchSnippet {
+ text: "Test snippet".to_string(),
+ start_offset: 0,
+ end_offset: 12,
+ highlight_ranges: vec![
+ HighlightRange { start: 0, end: 4 }
+ ],
+ }
+ ],
+ };
+
+ // Test that all fields are properly accessible
+ assert_eq!(response.id, doc_id);
+ assert_eq!(response.tags.len(), 2);
+ assert_eq!(response.snippets.len(), 1);
+ assert!(response.search_rank.unwrap() > 0.8);
+ }
+
+ #[test]
+ fn test_snippet_edge_cases() {
+ let mock_db = MockDatabase::new();
+
+ // Test with query longer than content
+ let short_content = "hi";
+ let snippets = mock_db.generate_snippets("hello world", Some(short_content), None, 100);
+ assert!(snippets.is_empty());
+
+ // Test with whitespace-only content
+ let whitespace_content = " \t\n ";
+ let snippets = mock_db.generate_snippets("test", Some(whitespace_content), None, 100);
+ assert!(snippets.is_empty());
+
+ // Test with special characters in content
+ let special_content = "test@example.com, test-case, test/path, test(1)";
+ let snippets = mock_db.generate_snippets("test", Some(special_content), None, 100);
+ assert!(!snippets.is_empty());
+ assert!(snippets[0].highlight_ranges.len() >= 3); // Should find multiple "test" instances
+ }
+
+ // Integration tests that would work with actual database
+ #[tokio::test]
+ #[ignore = "Requires PostgreSQL database for integration testing"]
+ async fn test_enhanced_search_integration() {
+ // This would test the actual database integration
+ // Similar to existing db_tests but for enhanced search
+ let db_url = std::env::var("TEST_DATABASE_URL")
+ .unwrap_or_else(|_| "postgresql://postgres:postgres@localhost:5432/readur_test".to_string());
+
+ let db = Database::new(&db_url).await.expect("Failed to connect to test database");
+ db.migrate().await.expect("Failed to migrate test database");
+
+ // Create test user
+ let user_data = CreateUser {
+ username: "test_enhanced_search".to_string(),
+ email: "enhanced@test.com".to_string(),
+ password: "password123".to_string(),
+ };
+ let user = db.create_user(user_data).await.unwrap();
+
+ // Create test document with rich content
+ let document = Document {
+ id: Uuid::new_v4(),
+ filename: "enhanced_test.pdf".to_string(),
+ original_filename: "enhanced_test.pdf".to_string(),
+ file_path: "/path/to/enhanced_test.pdf".to_string(),
+ file_size: 2048,
+ mime_type: "application/pdf".to_string(),
+ content: Some("This is a comprehensive test document for enhanced search functionality testing".to_string()),
+ ocr_text: Some("OCR extracted content with additional test information for search validation".to_string()),
+ tags: vec!["enhanced".to_string(), "search".to_string(), "test".to_string()],
+ created_at: Utc::now(),
+ updated_at: Utc::now(),
+ user_id: user.id,
+ };
+
+ db.create_document(document).await.unwrap();
+
+ // Test enhanced search with snippets
+ let search_request = SearchRequest {
+ query: "test".to_string(),
+ tags: None,
+ mime_types: None,
+ limit: Some(10),
+ offset: Some(0),
+ include_snippets: Some(true),
+ snippet_length: Some(100),
+ search_mode: Some(SearchMode::Simple),
+ };
+
+ let result = db.enhanced_search_documents(user.id, search_request).await;
+ assert!(result.is_ok());
+
+ let (documents, total, query_time) = result.unwrap();
+ assert_eq!(total, 1);
+ assert_eq!(documents.len(), 1);
+ assert!(query_time > 0);
+
+ let doc = &documents[0];
+ assert!(!doc.snippets.is_empty());
+ assert!(doc.search_rank.is_some());
+ assert!(doc.search_rank.unwrap() > 0.0);
+ }
+}
\ No newline at end of file
diff --git a/src/tests/mod.rs b/src/tests/mod.rs
index 0368f80..6780955 100644
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@@ -3,5 +3,6 @@ mod auth_tests;
mod db_tests;
mod file_service_tests;
mod ocr_tests;
+mod enhanced_search_tests;
mod settings_tests;
mod users_tests;
\ No newline at end of file