commit b88774272deb90b18b7df24a3a3c8dd1e3923459 Author: perf3ct Date: Wed Jun 11 23:04:21 2025 +0000 Initial commit to setup repo via Fish diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d832fee --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "readur" +version = "0.1.0" +edition = "2021" + +[dependencies] +tokio = { version = "1.0", features = ["full"] } +axum = "0.7" +tower = "0.4" +tower-http = { version = "0.5", features = ["cors", "fs"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sqlx = { version = "0.7", features = ["runtime-tokio-rustls", "postgres", "chrono", "uuid"] } +uuid = { version = "1.0", features = ["v4", "serde"] } +chrono = { version = "0.4", features = ["serde"] } +bcrypt = "0.15" +jsonwebtoken = "9.0" +anyhow = "1.0" +tracing = "0.1" +tracing-subscriber = "0.3" +tokio-util = { version = "0.7", features = ["io"] } +futures-util = "0.3" +notify = "6.0" +mime_guess = "2.0" +tesseract = "0.14" +pdf-extract = "0.7" +reqwest = { version = "0.11", features = ["json", "multipart"] } +dotenvy = "0.15" + +[dev-dependencies] +tempfile = "3.0" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..333bc01 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Build stage +FROM rust:1.75 as builder + +# Install system dependencies for OCR +RUN apt-get update && apt-get install -y \ + tesseract-ocr \ + tesseract-ocr-eng \ + libtesseract-dev \ + libleptonica-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY Cargo.toml Cargo.lock ./ +COPY src ./src + +RUN cargo build --release + +# Runtime stage +FROM debian:bookworm-slim + +# Install runtime dependencies +RUN apt-get update && apt-get install -y \ + tesseract-ocr \ + tesseract-ocr-eng \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Copy the binary +COPY --from=builder /app/target/release/readur /app/readur + +# Create necessary directories +RUN mkdir -p /app/uploads /app/watch /app/frontend + +# Copy frontend files (will be built separately) +COPY frontend/dist /app/frontend + +EXPOSE 8000 + +CMD ["./readur"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..e6df559 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +version: '3.8' + +services: + readur: + build: . + ports: + - "8000:8000" + environment: + - DATABASE_URL=postgresql://readur:readur_password@postgres:5432/readur + - JWT_SECRET=your-super-secret-jwt-key-change-this-in-production + - UPLOAD_PATH=/app/uploads + - WATCH_FOLDER=/app/watch + volumes: + - uploads:/app/uploads + - watch:/app/watch + depends_on: + - postgres + restart: unless-stopped + + postgres: + image: postgres:15 + environment: + - POSTGRES_USER=readur + - POSTGRES_PASSWORD=readur_password + - POSTGRES_DB=readur + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - "5432:5432" + restart: unless-stopped + +volumes: + uploads: + watch: + postgres_data: \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..58d8291 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + Readur - Document Management + + +
+ + + \ No newline at end of file diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..aa90400 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,33 @@ +{ + "name": "readur-frontend", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "test": "vitest" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-router-dom": "^6.8.0", + "axios": "^1.3.0", + "react-hook-form": "^7.43.0", + "@heroicons/react": "^2.0.16", + "react-dropzone": "^14.2.3" + }, + "devDependencies": { + "@types/react": "^18.0.28", + "@types/react-dom": "^18.0.11", + "@vitejs/plugin-react": "^3.1.0", + "vite": "^4.1.0", + "vitest": "^0.28.0", + "@testing-library/react": "^14.0.0", + "@testing-library/jest-dom": "^5.16.5", + "tailwindcss": "^3.2.7", + "autoprefixer": "^10.4.14", + "postcss": "^8.4.21" + } +} \ No newline at end of file diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx new file mode 100644 index 0000000..80baa02 --- /dev/null +++ b/frontend/src/App.tsx @@ -0,0 +1,42 @@ +import React from 'react' +import { Routes, Route, Navigate } from 'react-router-dom' +import { useAuth } from './contexts/AuthContext' +import Login from './components/Login' +import Register from './components/Register' +import Dashboard from './components/Dashboard' +import Layout from './components/Layout' + +function App() { + const { user, loading } = useAuth() + + if (loading) { + return ( +
+
+
+ ) + } + + return ( + + : } /> + : } /> + + + } /> + + + ) : ( + + ) + } + /> + + ) +} + +export default App \ No newline at end of file diff --git a/frontend/src/components/Dashboard.tsx b/frontend/src/components/Dashboard.tsx new file mode 100644 index 0000000..cf3d336 --- /dev/null +++ b/frontend/src/components/Dashboard.tsx @@ -0,0 +1,74 @@ +import React, { useState, useEffect } from 'react' +import FileUpload from './FileUpload' +import DocumentList from './DocumentList' +import SearchBar from './SearchBar' +import { Document, documentService } from '../services/api' + +function Dashboard() { + const [documents, setDocuments] = useState([]) + const [loading, setLoading] = useState(true) + const [searchResults, setSearchResults] = useState(null) + + useEffect(() => { + loadDocuments() + }, []) + + const loadDocuments = async () => { + try { + const response = await documentService.list() + setDocuments(response.data) + } catch (error) { + console.error('Failed to load documents:', error) + } finally { + setLoading(false) + } + } + + const handleUploadSuccess = (newDocument: Document) => { + setDocuments(prev => [newDocument, ...prev]) + } + + const handleSearch = async (query: string) => { + if (!query.trim()) { + setSearchResults(null) + return + } + + try { + const response = await documentService.search({ query }) + setSearchResults(response.data.documents) + } catch (error) { + console.error('Search failed:', error) + } + } + + const displayDocuments = searchResults || documents + + return ( +
+
+

Document Management

+ +
+ +
+ +
+ + {searchResults && ( +
+ +
+ )} + + +
+ ) +} + +export default Dashboard \ No newline at end of file diff --git a/frontend/src/components/DocumentList.tsx b/frontend/src/components/DocumentList.tsx new file mode 100644 index 0000000..aaa4029 --- /dev/null +++ b/frontend/src/components/DocumentList.tsx @@ -0,0 +1,102 @@ +import React from 'react' +import { + DocumentIcon, + PhotoIcon, + ArrowDownTrayIcon, +} from '@heroicons/react/24/outline' +import { Document, documentService } from '../services/api' + +interface DocumentListProps { + documents: Document[] + loading: boolean +} + +function DocumentList({ documents, loading }: DocumentListProps) { + const handleDownload = async (document: Document) => { + try { + const response = await documentService.download(document.id) + const blob = new Blob([response.data]) + const url = window.URL.createObjectURL(blob) + const link = window.document.createElement('a') + link.href = url + link.download = document.original_filename + link.click() + window.URL.revokeObjectURL(url) + } catch (error) { + console.error('Download failed:', error) + } + } + + const getFileIcon = (mimeType: string) => { + if (mimeType.startsWith('image/')) { + return + } + return + } + + const formatFileSize = (bytes: number) => { + if (bytes === 0) return '0 Bytes' + const k = 1024 + const sizes = ['Bytes', 'KB', 'MB', 'GB'] + const i = Math.floor(Math.log(bytes) / Math.log(k)) + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i] + } + + if (loading) { + return ( +
+
+

Loading documents...

+
+ ) + } + + if (documents.length === 0) { + return ( +
+ +

No documents found

+
+ ) + } + + return ( +
+
    + {documents.map((document) => ( +
  • +
    +
    + {getFileIcon(document.mime_type)} +
    +
    + {document.original_filename} +
    +
    + {formatFileSize(document.file_size)} • {document.mime_type} + {document.has_ocr_text && ( + + OCR + + )} +
    +
    + {new Date(document.created_at).toLocaleDateString()} +
    +
    +
    + +
    +
  • + ))} +
+
+ ) +} + +export default DocumentList \ No newline at end of file diff --git a/frontend/src/components/FileUpload.tsx b/frontend/src/components/FileUpload.tsx new file mode 100644 index 0000000..45cb3c1 --- /dev/null +++ b/frontend/src/components/FileUpload.tsx @@ -0,0 +1,74 @@ +import React, { useCallback, useState } from 'react' +import { useDropzone } from 'react-dropzone' +import { DocumentArrowUpIcon } from '@heroicons/react/24/outline' +import { Document, documentService } from '../services/api' + +interface FileUploadProps { + onUploadSuccess: (document: Document) => void +} + +function FileUpload({ onUploadSuccess }: FileUploadProps) { + const [uploading, setUploading] = useState(false) + const [error, setError] = useState(null) + + const onDrop = useCallback(async (acceptedFiles: File[]) => { + const file = acceptedFiles[0] + if (!file) return + + setUploading(true) + setError(null) + + try { + const response = await documentService.upload(file) + onUploadSuccess(response.data) + } catch (err: any) { + setError(err.response?.data?.message || 'Upload failed') + } finally { + setUploading(false) + } + }, [onUploadSuccess]) + + const { getRootProps, getInputProps, isDragActive } = useDropzone({ + onDrop, + multiple: false, + accept: { + 'application/pdf': ['.pdf'], + 'text/plain': ['.txt'], + 'image/*': ['.png', '.jpg', '.jpeg', '.tiff', '.bmp'], + 'application/msword': ['.doc'], + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], + }, + }) + + return ( +
+
+ + +

+ {isDragActive + ? 'Drop the file here...' + : 'Drag & drop a file here, or click to select'} +

+

+ Supported: PDF, TXT, DOC, DOCX, PNG, JPG, JPEG, TIFF, BMP +

+ {uploading && ( +

Uploading and processing...

+ )} +
+ {error && ( +
{error}
+ )} +
+ ) +} + +export default FileUpload \ No newline at end of file diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx new file mode 100644 index 0000000..0715a3c --- /dev/null +++ b/frontend/src/components/Layout.tsx @@ -0,0 +1,38 @@ +import React from 'react' +import { useAuth } from '../contexts/AuthContext' + +interface LayoutProps { + children: React.ReactNode +} + +function Layout({ children }: LayoutProps) { + const { user, logout } = useAuth() + + return ( +
+ +
+ {children} +
+
+ ) +} + +export default Layout \ No newline at end of file diff --git a/frontend/src/components/Login.tsx b/frontend/src/components/Login.tsx new file mode 100644 index 0000000..2084532 --- /dev/null +++ b/frontend/src/components/Login.tsx @@ -0,0 +1,90 @@ +import React, { useState } from 'react' +import { Link } from 'react-router-dom' +import { useAuth } from '../contexts/AuthContext' + +function Login() { + const [username, setUsername] = useState('') + const [password, setPassword] = useState('') + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + const { login } = useAuth() + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + setError('') + setLoading(true) + + try { + await login(username, password) + } catch (err: any) { + setError(err.response?.data?.message || 'Failed to login') + } finally { + setLoading(false) + } + } + + return ( +
+
+
+

+ Sign in to Readur +

+
+
+ {error && ( +
+ {error} +
+ )} +
+ + setUsername(e.target.value)} + /> +
+
+ + setPassword(e.target.value)} + /> +
+
+ +
+
+ + Don't have an account? Sign up + +
+
+
+
+ ) +} + +export default Login \ No newline at end of file diff --git a/frontend/src/components/Register.tsx b/frontend/src/components/Register.tsx new file mode 100644 index 0000000..c965826 --- /dev/null +++ b/frontend/src/components/Register.tsx @@ -0,0 +1,106 @@ +import React, { useState } from 'react' +import { Link } from 'react-router-dom' +import { useAuth } from '../contexts/AuthContext' + +function Register() { + const [username, setUsername] = useState('') + const [email, setEmail] = useState('') + const [password, setPassword] = useState('') + const [error, setError] = useState('') + const [loading, setLoading] = useState(false) + const { register } = useAuth() + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault() + setError('') + setLoading(true) + + try { + await register(username, email, password) + } catch (err: any) { + setError(err.response?.data?.message || 'Failed to register') + } finally { + setLoading(false) + } + } + + return ( +
+
+
+

+ Create your Readur account +

+
+
+ {error && ( +
+ {error} +
+ )} +
+ + setUsername(e.target.value)} + /> +
+
+ + setEmail(e.target.value)} + /> +
+
+ + setPassword(e.target.value)} + /> +
+
+ +
+
+ + Already have an account? Sign in + +
+
+
+
+ ) +} + +export default Register \ No newline at end of file diff --git a/frontend/src/components/SearchBar.tsx b/frontend/src/components/SearchBar.tsx new file mode 100644 index 0000000..3609dd9 --- /dev/null +++ b/frontend/src/components/SearchBar.tsx @@ -0,0 +1,38 @@ +import React, { useState } from 'react' +import { MagnifyingGlassIcon } from '@heroicons/react/24/outline' + +interface SearchBarProps { + onSearch: (query: string) => void +} + +function SearchBar({ onSearch }: SearchBarProps) { + const [query, setQuery] = useState('') + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault() + onSearch(query) + } + + return ( +
+
+ + setQuery(e.target.value)} + className="w-full pl-10 pr-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-transparent" + /> +
+ +
+ ) +} + +export default SearchBar \ No newline at end of file diff --git a/frontend/src/components/__tests__/Dashboard.test.tsx b/frontend/src/components/__tests__/Dashboard.test.tsx new file mode 100644 index 0000000..a81f961 --- /dev/null +++ b/frontend/src/components/__tests__/Dashboard.test.tsx @@ -0,0 +1,117 @@ +import { render, screen, waitFor } from '@testing-library/react' +import { vi } from 'vitest' +import Dashboard from '../Dashboard' +import { documentService } from '../../services/api' + +// Mock the API service +vi.mock('../../services/api', () => ({ + documentService: { + list: vi.fn(), + search: vi.fn(), + }, +})) + +// Mock child components +vi.mock('../FileUpload', () => ({ + default: ({ onUploadSuccess }: any) => ( +
File Upload Component
+ ), +})) + +vi.mock('../DocumentList', () => ({ + default: ({ documents, loading }: any) => ( +
+ {loading ? 'Loading...' : `${documents.length} documents`} +
+ ), +})) + +vi.mock('../SearchBar', () => ({ + default: ({ onSearch }: any) => ( + onSearch(e.target.value)} + /> + ), +})) + +const mockDocuments = [ + { + id: '1', + filename: 'test1.pdf', + original_filename: 'test1.pdf', + file_size: 1024, + mime_type: 'application/pdf', + tags: [], + created_at: '2023-01-01T00:00:00Z', + has_ocr_text: true, + }, + { + id: '2', + filename: 'test2.txt', + original_filename: 'test2.txt', + file_size: 512, + mime_type: 'text/plain', + tags: ['important'], + created_at: '2023-01-02T00:00:00Z', + has_ocr_text: false, + }, +] + +describe('Dashboard', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + test('renders dashboard with file upload and document list', async () => { + const mockList = vi.mocked(documentService.list) + mockList.mockResolvedValue({ data: mockDocuments }) + + render() + + expect(screen.getByText('Document Management')).toBeInTheDocument() + expect(screen.getByTestId('file-upload')).toBeInTheDocument() + expect(screen.getByTestId('search-bar')).toBeInTheDocument() + + await waitFor(() => { + expect(screen.getByTestId('document-list')).toBeInTheDocument() + expect(screen.getByText('2 documents')).toBeInTheDocument() + }) + }) + + test('handles loading state', () => { + const mockList = vi.mocked(documentService.list) + mockList.mockImplementation(() => new Promise(() => {})) // Never resolves + + render() + + expect(screen.getByText('Loading...')).toBeInTheDocument() + }) + + test('handles search functionality', async () => { + const mockList = vi.mocked(documentService.list) + const mockSearch = vi.mocked(documentService.search) + + mockList.mockResolvedValue({ data: mockDocuments }) + mockSearch.mockResolvedValue({ + data: { + documents: [mockDocuments[0]], + total: 1, + }, + }) + + render() + + await waitFor(() => { + expect(screen.getByText('2 documents')).toBeInTheDocument() + }) + + const searchBar = screen.getByTestId('search-bar') + searchBar.dispatchEvent(new Event('change', { bubbles: true })) + + await waitFor(() => { + expect(mockSearch).toHaveBeenCalled() + }) + }) +}) \ No newline at end of file diff --git a/frontend/src/contexts/AuthContext.tsx b/frontend/src/contexts/AuthContext.tsx new file mode 100644 index 0000000..9d671f8 --- /dev/null +++ b/frontend/src/contexts/AuthContext.tsx @@ -0,0 +1,83 @@ +import React, { createContext, useContext, useEffect, useState } from 'react' +import { api } from '../services/api' + +interface User { + id: string + username: string + email: string +} + +interface AuthContextType { + user: User | null + loading: boolean + login: (username: string, password: string) => Promise + register: (username: string, email: string, password: string) => Promise + logout: () => void +} + +const AuthContext = createContext(undefined) + +export function AuthProvider({ children }: { children: React.ReactNode }) { + const [user, setUser] = useState(null) + const [loading, setLoading] = useState(true) + + useEffect(() => { + const token = localStorage.getItem('token') + if (token) { + api.defaults.headers.common['Authorization'] = `Bearer ${token}` + fetchUser() + } else { + setLoading(false) + } + }, []) + + const fetchUser = async () => { + try { + const response = await api.get('/auth/me') + setUser(response.data) + } catch (error) { + localStorage.removeItem('token') + delete api.defaults.headers.common['Authorization'] + } finally { + setLoading(false) + } + } + + const login = async (username: string, password: string) => { + const response = await api.post('/auth/login', { username, password }) + const { token, user: userData } = response.data + + localStorage.setItem('token', token) + api.defaults.headers.common['Authorization'] = `Bearer ${token}` + setUser(userData) + } + + const register = async (username: string, email: string, password: string) => { + await api.post('/auth/register', { username, email, password }) + await login(username, password) + } + + const logout = () => { + localStorage.removeItem('token') + delete api.defaults.headers.common['Authorization'] + setUser(null) + } + + const value = { + user, + loading, + login, + register, + logout, + } + + return {children} +} + +export function useAuth() { + const context = useContext(AuthContext) + if (context === undefined) { + throw new Error('useAuth must be used within an AuthProvider') + } + return context +} \ No newline at end of file diff --git a/frontend/src/index.css b/frontend/src/index.css new file mode 100644 index 0000000..bd6213e --- /dev/null +++ b/frontend/src/index.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; \ No newline at end of file diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx new file mode 100644 index 0000000..13f1c7c --- /dev/null +++ b/frontend/src/main.tsx @@ -0,0 +1,16 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' +import { BrowserRouter } from 'react-router-dom' +import App from './App' +import './index.css' +import { AuthProvider } from './contexts/AuthContext' + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + + + + + , +) \ No newline at end of file diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts new file mode 100644 index 0000000..097ac7e --- /dev/null +++ b/frontend/src/services/api.ts @@ -0,0 +1,62 @@ +import axios from 'axios' + +export const api = axios.create({ + baseURL: '/api', + headers: { + 'Content-Type': 'application/json', + }, +}) + +export interface Document { + id: string + filename: string + original_filename: string + file_size: number + mime_type: string + tags: string[] + created_at: string + has_ocr_text: boolean +} + +export interface SearchRequest { + query: string + tags?: string[] + mime_types?: string[] + limit?: number + offset?: number +} + +export interface SearchResponse { + documents: Document[] + total: number +} + +export const documentService = { + upload: (file: File) => { + const formData = new FormData() + formData.append('file', file) + return api.post('/documents', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }) + }, + + list: (limit = 50, offset = 0) => { + return api.get('/documents', { + params: { limit, offset }, + }) + }, + + download: (id: string) => { + return api.get(`/documents/${id}/download`, { + responseType: 'blob', + }) + }, + + search: (searchRequest: SearchRequest) => { + return api.get('/search', { + params: searchRequest, + }) + }, +} \ No newline at end of file diff --git a/frontend/tailwind.config.js b/frontend/tailwind.config.js new file mode 100644 index 0000000..89a305e --- /dev/null +++ b/frontend/tailwind.config.js @@ -0,0 +1,11 @@ +/** @type {import('tailwindcss').Config} */ +export default { + content: [ + "./index.html", + "./src/**/*.{js,ts,jsx,tsx}", + ], + theme: { + extend: {}, + }, + plugins: [], +} \ No newline at end of file diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts new file mode 100644 index 0000000..7759654 --- /dev/null +++ b/frontend/vite.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + proxy: { + '/api': { + target: 'http://localhost:8000', + changeOrigin: true, + }, + }, + }, + build: { + outDir: 'dist', + assetsDir: 'assets', + }, +}) \ No newline at end of file diff --git a/src/auth.rs b/src/auth.rs new file mode 100644 index 0000000..dd55661 --- /dev/null +++ b/src/auth.rs @@ -0,0 +1,94 @@ +use anyhow::Result; +use axum::{ + async_trait, + extract::{FromRequestParts, State}, + http::{request::Parts, HeaderMap, StatusCode}, + response::{IntoResponse, Response}, + Json, +}; +use chrono::{Duration, Utc}; +use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use uuid::Uuid; + +use crate::{models::User, AppState}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Claims { + pub sub: Uuid, + pub username: String, + pub exp: usize, +} + +pub struct AuthUser { + pub user: User, +} + +#[async_trait] +impl FromRequestParts> for AuthUser { + type Rejection = Response; + + async fn from_request_parts( + parts: &mut Parts, + state: &Arc, + ) -> Result { + let headers = &parts.headers; + let token = extract_token_from_headers(headers) + .ok_or_else(|| (StatusCode::UNAUTHORIZED, "Missing authorization header").into_response())?; + + let claims = verify_jwt(&token, &state.config.jwt_secret) + .map_err(|_| (StatusCode::UNAUTHORIZED, "Invalid token").into_response())?; + + let user = state + .db + .get_user_by_id(claims.sub) + .await + .map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "Database error").into_response())? + .ok_or_else(|| (StatusCode::UNAUTHORIZED, "User not found").into_response())?; + + Ok(AuthUser { user }) + } +} + +pub fn create_jwt(user: &User, secret: &str) -> Result { + let expiration = Utc::now() + .checked_add_signed(Duration::hours(24)) + .expect("valid timestamp") + .timestamp(); + + let claims = Claims { + sub: user.id, + username: user.username.clone(), + exp: expiration as usize, + }; + + let token = encode( + &Header::default(), + &claims, + &EncodingKey::from_secret(secret.as_bytes()), + )?; + + Ok(token) +} + +pub fn verify_jwt(token: &str, secret: &str) -> Result { + let token_data = decode::( + token, + &DecodingKey::from_secret(secret.as_bytes()), + &Validation::default(), + )?; + + Ok(token_data.claims) +} + +fn extract_token_from_headers(headers: &HeaderMap) -> Option { + let auth_header = headers.get("authorization")?; + let auth_str = auth_header.to_str().ok()?; + + if auth_str.starts_with("Bearer ") { + Some(auth_str.trim_start_matches("Bearer ").to_string()) + } else { + None + } +} \ No newline at end of file diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..6391a4a --- /dev/null +++ b/src/config.rs @@ -0,0 +1,36 @@ +use anyhow::Result; +use std::env; + +#[derive(Clone, Debug)] +pub struct Config { + pub database_url: String, + pub server_address: String, + pub jwt_secret: String, + pub upload_path: String, + pub watch_folder: String, + pub allowed_file_types: Vec, +} + +impl Config { + pub fn from_env() -> Result { + dotenvy::dotenv().ok(); + + Ok(Config { + database_url: env::var("DATABASE_URL") + .unwrap_or_else(|_| "postgresql://readur:readur@localhost/readur".to_string()), + server_address: env::var("SERVER_ADDRESS") + .unwrap_or_else(|_| "0.0.0.0:8000".to_string()), + jwt_secret: env::var("JWT_SECRET") + .unwrap_or_else(|_| "your-secret-key".to_string()), + upload_path: env::var("UPLOAD_PATH") + .unwrap_or_else(|_| "./uploads".to_string()), + watch_folder: env::var("WATCH_FOLDER") + .unwrap_or_else(|_| "./watch".to_string()), + allowed_file_types: env::var("ALLOWED_FILE_TYPES") + .unwrap_or_else(|_| "pdf,txt,doc,docx,png,jpg,jpeg".to_string()) + .split(',') + .map(|s| s.trim().to_lowercase()) + .collect(), + }) + } +} \ No newline at end of file diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..6cd7c67 --- /dev/null +++ b/src/db.rs @@ -0,0 +1,296 @@ +use anyhow::Result; +use chrono::Utc; +use sqlx::{PgPool, Row}; +use uuid::Uuid; + +use crate::models::{CreateUser, Document, SearchRequest, User}; + +#[derive(Clone)] +pub struct Database { + pool: PgPool, +} + +impl Database { + pub async fn new(database_url: &str) -> Result { + let pool = PgPool::connect(database_url).await?; + Ok(Self { pool }) + } + + pub async fn migrate(&self) -> Result<()> { + sqlx::query( + r#" + CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + CREATE EXTENSION IF NOT EXISTS "pg_trgm"; + + CREATE TABLE IF NOT EXISTS users ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + username VARCHAR(255) UNIQUE NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() + ); + + CREATE TABLE IF NOT EXISTS documents ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + filename VARCHAR(255) NOT NULL, + original_filename VARCHAR(255) NOT NULL, + file_path VARCHAR(500) NOT NULL, + file_size BIGINT NOT NULL, + mime_type VARCHAR(100) NOT NULL, + content TEXT, + ocr_text TEXT, + tags TEXT[] DEFAULT '{}', + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + user_id UUID REFERENCES users(id) ON DELETE CASCADE + ); + + CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id); + CREATE INDEX IF NOT EXISTS idx_documents_filename ON documents(filename); + CREATE INDEX IF NOT EXISTS idx_documents_mime_type ON documents(mime_type); + CREATE INDEX IF NOT EXISTS idx_documents_tags ON documents USING GIN(tags); + CREATE INDEX IF NOT EXISTS idx_documents_content_search ON documents USING GIN(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, ''))); + "# + ) + .execute(&self.pool) + .await?; + + Ok(()) + } + + pub async fn create_user(&self, user: CreateUser) -> Result { + let password_hash = bcrypt::hash(&user.password, 12)?; + let now = Utc::now(); + + let row = sqlx::query( + r#" + INSERT INTO users (username, email, password_hash, created_at, updated_at) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, username, email, password_hash, created_at, updated_at + "# + ) + .bind(&user.username) + .bind(&user.email) + .bind(&password_hash) + .bind(now) + .bind(now) + .fetch_one(&self.pool) + .await?; + + Ok(User { + id: row.get("id"), + username: row.get("username"), + email: row.get("email"), + password_hash: row.get("password_hash"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + }) + } + + pub async fn get_user_by_username(&self, username: &str) -> Result> { + let row = sqlx::query( + "SELECT id, username, email, password_hash, created_at, updated_at FROM users WHERE username = $1" + ) + .bind(username) + .fetch_optional(&self.pool) + .await?; + + match row { + Some(row) => Ok(Some(User { + id: row.get("id"), + username: row.get("username"), + email: row.get("email"), + password_hash: row.get("password_hash"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + })), + None => Ok(None), + } + } + + pub async fn get_user_by_id(&self, id: Uuid) -> Result> { + let row = sqlx::query( + "SELECT id, username, email, password_hash, created_at, updated_at FROM users WHERE id = $1" + ) + .bind(id) + .fetch_optional(&self.pool) + .await?; + + match row { + Some(row) => Ok(Some(User { + id: row.get("id"), + username: row.get("username"), + email: row.get("email"), + password_hash: row.get("password_hash"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + })), + None => Ok(None), + } + } + + pub async fn create_document(&self, document: Document) -> Result { + let row = sqlx::query( + r#" + INSERT INTO documents (id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) + RETURNING id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id + "# + ) + .bind(document.id) + .bind(&document.filename) + .bind(&document.original_filename) + .bind(&document.file_path) + .bind(document.file_size) + .bind(&document.mime_type) + .bind(&document.content) + .bind(&document.ocr_text) + .bind(&document.tags) + .bind(document.created_at) + .bind(document.updated_at) + .bind(document.user_id) + .fetch_one(&self.pool) + .await?; + + Ok(Document { + id: row.get("id"), + filename: row.get("filename"), + original_filename: row.get("original_filename"), + file_path: row.get("file_path"), + file_size: row.get("file_size"), + mime_type: row.get("mime_type"), + content: row.get("content"), + ocr_text: row.get("ocr_text"), + tags: row.get("tags"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + user_id: row.get("user_id"), + }) + } + + pub async fn get_documents_by_user(&self, user_id: Uuid, limit: i64, offset: i64) -> Result> { + let rows = sqlx::query( + r#" + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id + FROM documents + WHERE user_id = $1 + ORDER BY created_at DESC + LIMIT $2 OFFSET $3 + "# + ) + .bind(user_id) + .bind(limit) + .bind(offset) + .fetch_all(&self.pool) + .await?; + + let documents = rows + .into_iter() + .map(|row| Document { + id: row.get("id"), + filename: row.get("filename"), + original_filename: row.get("original_filename"), + file_path: row.get("file_path"), + file_size: row.get("file_size"), + mime_type: row.get("mime_type"), + content: row.get("content"), + ocr_text: row.get("ocr_text"), + tags: row.get("tags"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + user_id: row.get("user_id"), + }) + .collect(); + + Ok(documents) + } + + pub async fn search_documents(&self, user_id: Uuid, search: SearchRequest) -> Result<(Vec, i64)> { + let mut query_builder = sqlx::QueryBuilder::new( + r#" + SELECT id, filename, original_filename, file_path, file_size, mime_type, content, ocr_text, tags, created_at, updated_at, user_id, + ts_rank(to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')), plainto_tsquery('english', "# + ); + + query_builder.push_bind(&search.query); + query_builder.push(")) as rank FROM documents WHERE user_id = "); + query_builder.push_bind(user_id); + query_builder.push(" AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ plainto_tsquery('english', "); + query_builder.push_bind(&search.query); + query_builder.push(")"); + + if let Some(tags) = &search.tags { + if !tags.is_empty() { + query_builder.push(" AND tags && "); + query_builder.push_bind(tags); + } + } + + if let Some(mime_types) = &search.mime_types { + if !mime_types.is_empty() { + query_builder.push(" AND mime_type = ANY("); + query_builder.push_bind(mime_types); + query_builder.push(")"); + } + } + + query_builder.push(" ORDER BY rank DESC, created_at DESC"); + + if let Some(limit) = search.limit { + query_builder.push(" LIMIT "); + query_builder.push_bind(limit); + } + + if let Some(offset) = search.offset { + query_builder.push(" OFFSET "); + query_builder.push_bind(offset); + } + + let rows = query_builder.build().fetch_all(&self.pool).await?; + + let documents = rows + .into_iter() + .map(|row| Document { + id: row.get("id"), + filename: row.get("filename"), + original_filename: row.get("original_filename"), + file_path: row.get("file_path"), + file_size: row.get("file_size"), + mime_type: row.get("mime_type"), + content: row.get("content"), + ocr_text: row.get("ocr_text"), + tags: row.get("tags"), + created_at: row.get("created_at"), + updated_at: row.get("updated_at"), + user_id: row.get("user_id"), + }) + .collect(); + + let total_row = sqlx::query( + r#" + SELECT COUNT(*) as total FROM documents + WHERE user_id = $1 + AND to_tsvector('english', COALESCE(content, '') || ' ' || COALESCE(ocr_text, '')) @@ plainto_tsquery('english', $2) + "# + ) + .bind(user_id) + .bind(&search.query) + .fetch_one(&self.pool) + .await?; + + let total: i64 = total_row.get("total"); + + Ok((documents, total)) + } + + pub async fn update_document_ocr(&self, id: Uuid, ocr_text: &str) -> Result<()> { + sqlx::query("UPDATE documents SET ocr_text = $1, updated_at = NOW() WHERE id = $2") + .bind(ocr_text) + .bind(id) + .execute(&self.pool) + .await?; + + Ok(()) + } +} \ No newline at end of file diff --git a/src/file_service.rs b/src/file_service.rs new file mode 100644 index 0000000..28e4a54 --- /dev/null +++ b/src/file_service.rs @@ -0,0 +1,83 @@ +use anyhow::Result; +use chrono::Utc; +use std::path::Path; +use tokio::fs; +use uuid::Uuid; + +use crate::models::Document; + +pub struct FileService { + upload_path: String, +} + +impl FileService { + pub fn new(upload_path: String) -> Self { + Self { upload_path } + } + + pub async fn save_file(&self, filename: &str, data: &[u8]) -> Result { + let file_id = Uuid::new_v4(); + let extension = Path::new(filename) + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or(""); + + let saved_filename = if extension.is_empty() { + file_id.to_string() + } else { + format!("{}.{}", file_id, extension) + }; + + let file_path = Path::new(&self.upload_path).join(&saved_filename); + + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent).await?; + } + + fs::write(&file_path, data).await?; + + Ok(file_path.to_string_lossy().to_string()) + } + + pub async fn create_document( + &self, + filename: &str, + original_filename: &str, + file_path: &str, + file_size: i64, + mime_type: &str, + user_id: Uuid, + ) -> Document { + Document { + id: Uuid::new_v4(), + filename: filename.to_string(), + original_filename: original_filename.to_string(), + file_path: file_path.to_string(), + file_size, + mime_type: mime_type.to_string(), + content: None, + ocr_text: None, + tags: Vec::new(), + created_at: Utc::now(), + updated_at: Utc::now(), + user_id, + } + } + + pub fn is_allowed_file_type(&self, filename: &str, allowed_types: &[String]) -> bool { + if let Some(extension) = Path::new(filename) + .extension() + .and_then(|ext| ext.to_str()) + { + let ext_lower = extension.to_lowercase(); + allowed_types.contains(&ext_lower) + } else { + false + } + } + + pub async fn read_file(&self, file_path: &str) -> Result> { + let data = fs::read(file_path).await?; + Ok(data) + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..1f39656 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,69 @@ +use axum::{ + extract::State, + http::StatusCode, + response::Json, + routing::{get, post}, + Router, +}; +use std::sync::Arc; +use tower_http::cors::CorsLayer; +use tracing::{info, error}; + +mod auth; +mod config; +mod db; +mod file_service; +mod models; +mod ocr; +mod routes; +mod watcher; + +#[cfg(test)] +mod tests; + +use config::Config; +use db::Database; + +#[derive(Clone)] +pub struct AppState { + pub db: Database, + pub config: Config, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + tracing_subscriber::init(); + + let config = Config::from_env()?; + let db = Database::new(&config.database_url).await?; + + db.migrate().await?; + + let state = AppState { db, config: config.clone() }; + + let app = Router::new() + .route("/api/health", get(health_check)) + .nest("/api/auth", routes::auth::router()) + .nest("/api/documents", routes::documents::router()) + .nest("/api/search", routes::search::router()) + .layer(CorsLayer::permissive()) + .with_state(Arc::new(state)); + + let watcher_config = config.clone(); + tokio::spawn(async move { + if let Err(e) = watcher::start_folder_watcher(watcher_config).await { + error!("Folder watcher error: {}", e); + } + }); + + let listener = tokio::net::TcpListener::bind(&config.server_address).await?; + info!("Server starting on {}", config.server_address); + + axum::serve(listener, app).await?; + + Ok(()) +} + +async fn health_check() -> Result, StatusCode> { + Ok(Json(serde_json::json!({"status": "ok"}))) +} \ No newline at end of file diff --git a/src/models.rs b/src/models.rs new file mode 100644 index 0000000..255ac1c --- /dev/null +++ b/src/models.rs @@ -0,0 +1,108 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use sqlx::FromRow; +use uuid::Uuid; + +#[derive(Debug, Serialize, Deserialize, FromRow)] +pub struct User { + pub id: Uuid, + pub username: String, + pub email: String, + pub password_hash: String, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct CreateUser { + pub username: String, + pub email: String, + pub password: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LoginRequest { + pub username: String, + pub password: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LoginResponse { + pub token: String, + pub user: UserResponse, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct UserResponse { + pub id: Uuid, + pub username: String, + pub email: String, +} + +#[derive(Debug, Serialize, Deserialize, FromRow)] +pub struct Document { + pub id: Uuid, + pub filename: String, + pub original_filename: String, + pub file_path: String, + pub file_size: i64, + pub mime_type: String, + pub content: Option, + pub ocr_text: Option, + pub tags: Vec, + pub created_at: DateTime, + pub updated_at: DateTime, + pub user_id: Uuid, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct DocumentResponse { + pub id: Uuid, + pub filename: String, + pub original_filename: String, + pub file_size: i64, + pub mime_type: String, + pub tags: Vec, + pub created_at: DateTime, + pub has_ocr_text: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SearchRequest { + pub query: String, + pub tags: Option>, + pub mime_types: Option>, + pub limit: Option, + pub offset: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SearchResponse { + pub documents: Vec, + pub total: i64, +} + +impl From for DocumentResponse { + fn from(doc: Document) -> Self { + Self { + id: doc.id, + filename: doc.filename, + original_filename: doc.original_filename, + file_size: doc.file_size, + mime_type: doc.mime_type, + tags: doc.tags, + created_at: doc.created_at, + has_ocr_text: doc.ocr_text.is_some(), + } + } +} + +impl From for UserResponse { + fn from(user: User) -> Self { + Self { + id: user.id, + username: user.username, + email: user.email, + } + } +} \ No newline at end of file diff --git a/src/ocr.rs b/src/ocr.rs new file mode 100644 index 0000000..8303ada --- /dev/null +++ b/src/ocr.rs @@ -0,0 +1,61 @@ +use anyhow::{anyhow, Result}; +use std::path::Path; +use tesseract::Tesseract; + +pub struct OcrService; + +impl OcrService { + pub fn new() -> Self { + Self + } + + pub async fn extract_text_from_image(&self, file_path: &str) -> Result { + let mut tesseract = Tesseract::new(None, Some("eng"))?; + + tesseract.set_image(file_path)?; + + let text = tesseract.get_text()?; + + Ok(text.trim().to_string()) + } + + pub async fn extract_text_from_pdf(&self, file_path: &str) -> Result { + let bytes = std::fs::read(file_path)?; + let text = pdf_extract::extract_text_from_mem(&bytes) + .map_err(|e| anyhow!("Failed to extract text from PDF: {}", e))?; + + Ok(text.trim().to_string()) + } + + pub async fn extract_text(&self, file_path: &str, mime_type: &str) -> Result { + match mime_type { + "application/pdf" => self.extract_text_from_pdf(file_path).await, + "image/png" | "image/jpeg" | "image/jpg" | "image/tiff" | "image/bmp" => { + self.extract_text_from_image(file_path).await + } + "text/plain" => { + let text = tokio::fs::read_to_string(file_path).await?; + Ok(text) + } + _ => { + if self.is_image_file(file_path) { + self.extract_text_from_image(file_path).await + } else { + Err(anyhow!("Unsupported file type for OCR: {}", mime_type)) + } + } + } + } + + fn is_image_file(&self, file_path: &str) -> bool { + if let Some(extension) = Path::new(file_path) + .extension() + .and_then(|ext| ext.to_str()) + { + let ext_lower = extension.to_lowercase(); + matches!(ext_lower.as_str(), "png" | "jpg" | "jpeg" | "tiff" | "bmp" | "gif") + } else { + false + } + } +} \ No newline at end of file diff --git a/src/routes/auth.rs b/src/routes/auth.rs new file mode 100644 index 0000000..b499f90 --- /dev/null +++ b/src/routes/auth.rs @@ -0,0 +1,65 @@ +use axum::{ + extract::State, + http::StatusCode, + response::Json, + routing::{get, post}, + Router, +}; +use std::sync::Arc; + +use crate::{ + auth::{create_jwt, AuthUser}, + models::{CreateUser, LoginRequest, LoginResponse, UserResponse}, + AppState, +}; + +pub fn router() -> Router> { + Router::new() + .route("/register", post(register)) + .route("/login", post(login)) + .route("/me", get(me)) +} + +async fn register( + State(state): State>, + Json(user_data): Json, +) -> Result, StatusCode> { + let user = state + .db + .create_user(user_data) + .await + .map_err(|_| StatusCode::BAD_REQUEST)?; + + Ok(Json(user.into())) +} + +async fn login( + State(state): State>, + Json(login_data): Json, +) -> Result, StatusCode> { + let user = state + .db + .get_user_by_username(&login_data.username) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? + .ok_or(StatusCode::UNAUTHORIZED)?; + + let is_valid = bcrypt::verify(&login_data.password, &user.password_hash) + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + if !is_valid { + return Err(StatusCode::UNAUTHORIZED); + } + + let token = create_jwt(&user, &state.config.jwt_secret) + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(Json(LoginResponse { + token, + user: user.into(), + })) +} + +async fn me(auth_user: AuthUser) -> Json { + Json(auth_user.user.into()) +} \ No newline at end of file diff --git a/src/routes/documents.rs b/src/routes/documents.rs new file mode 100644 index 0000000..defa561 --- /dev/null +++ b/src/routes/documents.rs @@ -0,0 +1,143 @@ +use axum::{ + extract::{Multipart, Path, Query, State}, + http::StatusCode, + response::Json, + routing::{get, post}, + Router, +}; +use serde::Deserialize; +use std::sync::Arc; +use tokio::spawn; + +use crate::{ + auth::AuthUser, + file_service::FileService, + models::{DocumentResponse, SearchRequest, SearchResponse}, + ocr::OcrService, + AppState, +}; + +#[derive(Deserialize)] +struct PaginationQuery { + limit: Option, + offset: Option, +} + +pub fn router() -> Router> { + Router::new() + .route("/", post(upload_document)) + .route("/", get(list_documents)) + .route("/:id/download", get(download_document)) +} + +async fn upload_document( + State(state): State>, + auth_user: AuthUser, + mut multipart: Multipart, +) -> Result, StatusCode> { + let file_service = FileService::new(state.config.upload_path.clone()); + + while let Some(field) = multipart.next_field().await.map_err(|_| StatusCode::BAD_REQUEST)? { + let name = field.name().unwrap_or("").to_string(); + + if name == "file" { + let filename = field + .file_name() + .ok_or(StatusCode::BAD_REQUEST)? + .to_string(); + + if !file_service.is_allowed_file_type(&filename, &state.config.allowed_file_types) { + return Err(StatusCode::BAD_REQUEST); + } + + let data = field.bytes().await.map_err(|_| StatusCode::BAD_REQUEST)?; + let file_size = data.len() as i64; + + let mime_type = mime_guess::from_path(&filename) + .first_or_octet_stream() + .to_string(); + + let file_path = file_service + .save_file(&filename, &data) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let document = file_service.create_document( + &filename, + &filename, + &file_path, + file_size, + &mime_type, + auth_user.user.id, + ); + + let saved_document = state + .db + .create_document(document) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let document_id = saved_document.id; + let db_clone = state.db.clone(); + let file_path_clone = file_path.clone(); + let mime_type_clone = mime_type.clone(); + + spawn(async move { + let ocr_service = OcrService::new(); + if let Ok(text) = ocr_service.extract_text(&file_path_clone, &mime_type_clone).await { + if !text.is_empty() { + let _ = db_clone.update_document_ocr(document_id, &text).await; + } + } + }); + + return Ok(Json(saved_document.into())); + } + } + + Err(StatusCode::BAD_REQUEST) +} + +async fn list_documents( + State(state): State>, + auth_user: AuthUser, + Query(pagination): Query, +) -> Result>, StatusCode> { + let limit = pagination.limit.unwrap_or(50); + let offset = pagination.offset.unwrap_or(0); + + let documents = state + .db + .get_documents_by_user(auth_user.user.id, limit, offset) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let response: Vec = documents.into_iter().map(|doc| doc.into()).collect(); + + Ok(Json(response)) +} + +async fn download_document( + State(state): State>, + auth_user: AuthUser, + Path(document_id): Path, +) -> Result, StatusCode> { + let documents = state + .db + .get_documents_by_user(auth_user.user.id, 1000, 0) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let document = documents + .into_iter() + .find(|doc| doc.id == document_id) + .ok_or(StatusCode::NOT_FOUND)?; + + let file_service = FileService::new(state.config.upload_path.clone()); + let file_data = file_service + .read_file(&document.file_path) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + Ok(file_data) +} \ No newline at end of file diff --git a/src/routes/mod.rs b/src/routes/mod.rs new file mode 100644 index 0000000..c3e50b4 --- /dev/null +++ b/src/routes/mod.rs @@ -0,0 +1,3 @@ +pub mod auth; +pub mod documents; +pub mod search; \ No newline at end of file diff --git a/src/routes/search.rs b/src/routes/search.rs new file mode 100644 index 0000000..2fd0039 --- /dev/null +++ b/src/routes/search.rs @@ -0,0 +1,37 @@ +use axum::{ + extract::{Query, State}, + http::StatusCode, + response::Json, + routing::get, + Router, +}; +use std::sync::Arc; + +use crate::{ + auth::AuthUser, + models::{SearchRequest, SearchResponse}, + AppState, +}; + +pub fn router() -> Router> { + Router::new().route("/", get(search_documents)) +} + +async fn search_documents( + State(state): State>, + auth_user: AuthUser, + Query(search_request): Query, +) -> Result, StatusCode> { + let (documents, total) = state + .db + .search_documents(auth_user.user.id, search_request) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + let response = SearchResponse { + documents: documents.into_iter().map(|doc| doc.into()).collect(), + total, + }; + + Ok(Json(response)) +} \ No newline at end of file diff --git a/src/tests/auth_tests.rs b/src/tests/auth_tests.rs new file mode 100644 index 0000000..f9aedda --- /dev/null +++ b/src/tests/auth_tests.rs @@ -0,0 +1,75 @@ +#[cfg(test)] +mod tests { + use super::super::auth::{create_jwt, verify_jwt}; + use super::super::models::User; + use chrono::Utc; + use uuid::Uuid; + + fn create_test_user() -> User { + User { + id: Uuid::new_v4(), + username: "testuser".to_string(), + email: "test@example.com".to_string(), + password_hash: "hashed_password".to_string(), + created_at: Utc::now(), + updated_at: Utc::now(), + } + } + + #[test] + fn test_create_jwt() { + let user = create_test_user(); + let secret = "test_secret"; + + let result = create_jwt(&user, secret); + assert!(result.is_ok()); + + let token = result.unwrap(); + assert!(!token.is_empty()); + } + + #[test] + fn test_verify_jwt_valid() { + let user = create_test_user(); + let secret = "test_secret"; + + let token = create_jwt(&user, secret).unwrap(); + let result = verify_jwt(&token, secret); + + assert!(result.is_ok()); + + let claims = result.unwrap(); + assert_eq!(claims.sub, user.id); + assert_eq!(claims.username, user.username); + } + + #[test] + fn test_verify_jwt_invalid_secret() { + let user = create_test_user(); + let secret = "test_secret"; + let wrong_secret = "wrong_secret"; + + let token = create_jwt(&user, secret).unwrap(); + let result = verify_jwt(&token, wrong_secret); + + assert!(result.is_err()); + } + + #[test] + fn test_verify_jwt_malformed_token() { + let secret = "test_secret"; + let malformed_token = "invalid.token.here"; + + let result = verify_jwt(malformed_token, secret); + assert!(result.is_err()); + } + + #[test] + fn test_verify_jwt_empty_token() { + let secret = "test_secret"; + let empty_token = ""; + + let result = verify_jwt(empty_token, secret); + assert!(result.is_err()); + } +} \ No newline at end of file diff --git a/src/tests/db_tests.rs b/src/tests/db_tests.rs new file mode 100644 index 0000000..20b127c --- /dev/null +++ b/src/tests/db_tests.rs @@ -0,0 +1,168 @@ +#[cfg(test)] +mod tests { + use super::super::db::Database; + use super::super::models::{CreateUser, Document, SearchRequest}; + use chrono::Utc; + use tempfile::NamedTempFile; + use uuid::Uuid; + + async fn create_test_db() -> Database { + let temp_file = NamedTempFile::new().unwrap(); + let db_url = format!("sqlite://{}", temp_file.path().display()); + + let db = Database::new(&db_url).await.unwrap(); + db.migrate().await.unwrap(); + db + } + + fn create_test_user_data() -> CreateUser { + CreateUser { + username: "testuser".to_string(), + email: "test@example.com".to_string(), + password: "password123".to_string(), + } + } + + fn create_test_document(user_id: Uuid) -> Document { + Document { + id: Uuid::new_v4(), + filename: "test.pdf".to_string(), + original_filename: "test.pdf".to_string(), + file_path: "/path/to/test.pdf".to_string(), + file_size: 1024, + mime_type: "application/pdf".to_string(), + content: Some("Test content".to_string()), + ocr_text: Some("OCR extracted text".to_string()), + tags: vec!["test".to_string(), "document".to_string()], + created_at: Utc::now(), + updated_at: Utc::now(), + user_id, + } + } + + #[tokio::test] + async fn test_create_user() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + + let result = db.create_user(user_data).await; + assert!(result.is_ok()); + + let user = result.unwrap(); + assert_eq!(user.username, "testuser"); + assert_eq!(user.email, "test@example.com"); + assert!(!user.password_hash.is_empty()); + assert_ne!(user.password_hash, "password123"); // Should be hashed + } + + #[tokio::test] + async fn test_get_user_by_username() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + + let created_user = db.create_user(user_data).await.unwrap(); + + let result = db.get_user_by_username("testuser").await; + assert!(result.is_ok()); + + let found_user = result.unwrap(); + assert!(found_user.is_some()); + + let user = found_user.unwrap(); + assert_eq!(user.id, created_user.id); + assert_eq!(user.username, "testuser"); + } + + #[tokio::test] + async fn test_get_user_by_username_not_found() { + let db = create_test_db().await; + + let result = db.get_user_by_username("nonexistent").await; + assert!(result.is_ok()); + + let found_user = result.unwrap(); + assert!(found_user.is_none()); + } + + #[tokio::test] + async fn test_create_document() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + let user = db.create_user(user_data).await.unwrap(); + + let document = create_test_document(user.id); + + let result = db.create_document(document.clone()).await; + assert!(result.is_ok()); + + let created_doc = result.unwrap(); + assert_eq!(created_doc.filename, document.filename); + assert_eq!(created_doc.user_id, user.id); + } + + #[tokio::test] + async fn test_get_documents_by_user() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + let user = db.create_user(user_data).await.unwrap(); + + let document1 = create_test_document(user.id); + let document2 = create_test_document(user.id); + + db.create_document(document1).await.unwrap(); + db.create_document(document2).await.unwrap(); + + let result = db.get_documents_by_user(user.id, 10, 0).await; + assert!(result.is_ok()); + + let documents = result.unwrap(); + assert_eq!(documents.len(), 2); + } + + #[tokio::test] + async fn test_search_documents() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + let user = db.create_user(user_data).await.unwrap(); + + let mut document = create_test_document(user.id); + document.content = Some("This is a searchable document".to_string()); + document.ocr_text = Some("OCR searchable text".to_string()); + + db.create_document(document).await.unwrap(); + + let search_request = SearchRequest { + query: "searchable".to_string(), + tags: None, + mime_types: None, + limit: Some(10), + offset: Some(0), + }; + + let result = db.search_documents(user.id, search_request).await; + assert!(result.is_ok()); + + let (documents, total) = result.unwrap(); + assert_eq!(documents.len(), 1); + assert_eq!(total, 1); + } + + #[tokio::test] + async fn test_update_document_ocr() { + let db = create_test_db().await; + let user_data = create_test_user_data(); + let user = db.create_user(user_data).await.unwrap(); + + let document = create_test_document(user.id); + let created_doc = db.create_document(document).await.unwrap(); + + let new_ocr_text = "Updated OCR text"; + let result = db.update_document_ocr(created_doc.id, new_ocr_text).await; + assert!(result.is_ok()); + + // Verify the update by searching + let documents = db.get_documents_by_user(user.id, 10, 0).await.unwrap(); + let updated_doc = documents.iter().find(|d| d.id == created_doc.id).unwrap(); + assert_eq!(updated_doc.ocr_text.as_ref().unwrap(), new_ocr_text); + } +} \ No newline at end of file diff --git a/src/tests/file_service_tests.rs b/src/tests/file_service_tests.rs new file mode 100644 index 0000000..90f0096 --- /dev/null +++ b/src/tests/file_service_tests.rs @@ -0,0 +1,126 @@ +#[cfg(test)] +mod tests { + use super::super::file_service::FileService; + use std::fs; + use tempfile::TempDir; + use uuid::Uuid; + + fn create_test_file_service() -> (FileService, TempDir) { + let temp_dir = TempDir::new().unwrap(); + let upload_path = temp_dir.path().to_string_lossy().to_string(); + let service = FileService::new(upload_path); + (service, temp_dir) + } + + #[tokio::test] + async fn test_save_file() { + let (service, _temp_dir) = create_test_file_service(); + let filename = "test.txt"; + let data = b"Hello, World!"; + + let result = service.save_file(filename, data).await; + assert!(result.is_ok()); + + let file_path = result.unwrap(); + assert!(fs::metadata(&file_path).is_ok()); + + let saved_content = fs::read(&file_path).unwrap(); + assert_eq!(saved_content, data); + } + + #[tokio::test] + async fn test_save_file_with_extension() { + let (service, _temp_dir) = create_test_file_service(); + let filename = "document.pdf"; + let data = b"PDF content"; + + let result = service.save_file(filename, data).await; + assert!(result.is_ok()); + + let file_path = result.unwrap(); + assert!(file_path.ends_with(".pdf")); + } + + #[tokio::test] + async fn test_save_file_without_extension() { + let (service, _temp_dir) = create_test_file_service(); + let filename = "document"; + let data = b"Some content"; + + let result = service.save_file(filename, data).await; + assert!(result.is_ok()); + + let file_path = result.unwrap(); + // Should not have an extension + assert!(!file_path.contains('.')); + } + + #[test] + fn test_create_document() { + let (service, _temp_dir) = create_test_file_service(); + let user_id = Uuid::new_v4(); + + let document = service.create_document( + "saved_file.pdf", + "original_file.pdf", + "/path/to/saved_file.pdf", + 1024, + "application/pdf", + user_id, + ); + + assert_eq!(document.filename, "saved_file.pdf"); + assert_eq!(document.original_filename, "original_file.pdf"); + assert_eq!(document.file_path, "/path/to/saved_file.pdf"); + assert_eq!(document.file_size, 1024); + assert_eq!(document.mime_type, "application/pdf"); + assert_eq!(document.user_id, user_id); + assert!(document.content.is_none()); + assert!(document.ocr_text.is_none()); + assert!(document.tags.is_empty()); + } + + #[test] + fn test_is_allowed_file_type() { + let (service, _temp_dir) = create_test_file_service(); + let allowed_types = vec![ + "pdf".to_string(), + "txt".to_string(), + "png".to_string(), + "jpg".to_string(), + ]; + + assert!(service.is_allowed_file_type("document.pdf", &allowed_types)); + assert!(service.is_allowed_file_type("text.txt", &allowed_types)); + assert!(service.is_allowed_file_type("image.PNG", &allowed_types)); // Case insensitive + assert!(service.is_allowed_file_type("photo.JPG", &allowed_types)); // Case insensitive + + assert!(!service.is_allowed_file_type("document.doc", &allowed_types)); + assert!(!service.is_allowed_file_type("archive.zip", &allowed_types)); + assert!(!service.is_allowed_file_type("noextension", &allowed_types)); + } + + #[tokio::test] + async fn test_read_file() { + let (service, _temp_dir) = create_test_file_service(); + let filename = "test.txt"; + let original_data = b"Hello, World!"; + + let file_path = service.save_file(filename, original_data).await.unwrap(); + + let result = service.read_file(&file_path).await; + assert!(result.is_ok()); + + let read_data = result.unwrap(); + assert_eq!(read_data, original_data); + } + + #[tokio::test] + async fn test_read_nonexistent_file() { + let (service, _temp_dir) = create_test_file_service(); + let nonexistent_path = "/path/to/nonexistent/file.txt"; + + let result = service.read_file(nonexistent_path).await; + assert!(result.is_err()); + } +} \ No newline at end of file diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..2a476a8 --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1,4 @@ +mod auth_tests; +mod db_tests; +mod file_service_tests; +mod ocr_tests; \ No newline at end of file diff --git a/src/tests/ocr_tests.rs b/src/tests/ocr_tests.rs new file mode 100644 index 0000000..589375f --- /dev/null +++ b/src/tests/ocr_tests.rs @@ -0,0 +1,100 @@ +#[cfg(test)] +mod tests { + use super::super::ocr::OcrService; + use std::fs; + use tempfile::NamedTempFile; + + #[test] + fn test_is_image_file() { + let ocr_service = OcrService::new(); + + assert!(ocr_service.is_image_file("image.png")); + assert!(ocr_service.is_image_file("photo.jpg")); + assert!(ocr_service.is_image_file("picture.JPEG")); + assert!(ocr_service.is_image_file("scan.tiff")); + assert!(ocr_service.is_image_file("bitmap.bmp")); + assert!(ocr_service.is_image_file("animation.gif")); + + assert!(!ocr_service.is_image_file("document.pdf")); + assert!(!ocr_service.is_image_file("text.txt")); + assert!(!ocr_service.is_image_file("archive.zip")); + assert!(!ocr_service.is_image_file("noextension")); + } + + #[tokio::test] + async fn test_extract_text_from_plain_text() { + let ocr_service = OcrService::new(); + + let mut temp_file = NamedTempFile::new().unwrap(); + let test_content = "This is a test text file.\nWith multiple lines."; + fs::write(temp_file.path(), test_content).unwrap(); + + let result = ocr_service + .extract_text(temp_file.path().to_str().unwrap(), "text/plain") + .await; + + assert!(result.is_ok()); + let extracted_text = result.unwrap(); + assert_eq!(extracted_text, test_content); + } + + #[tokio::test] + async fn test_extract_text_unsupported_type() { + let ocr_service = OcrService::new(); + + let mut temp_file = NamedTempFile::new().unwrap(); + fs::write(temp_file.path(), "some content").unwrap(); + + let result = ocr_service + .extract_text(temp_file.path().to_str().unwrap(), "application/zip") + .await; + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Unsupported file type")); + } + + #[tokio::test] + async fn test_extract_text_from_nonexistent_file() { + let ocr_service = OcrService::new(); + + let result = ocr_service + .extract_text("/path/to/nonexistent/file.txt", "text/plain") + .await; + + assert!(result.is_err()); + } + + // Note: These tests would require actual PDF and image files to test fully + // For now, we're testing the error handling and basic functionality + + #[tokio::test] + async fn test_extract_text_from_pdf_empty_file() { + let ocr_service = OcrService::new(); + + let mut temp_file = NamedTempFile::new().unwrap(); + fs::write(temp_file.path(), "").unwrap(); // Empty file, not a valid PDF + + let result = ocr_service + .extract_text_from_pdf(temp_file.path().to_str().unwrap()) + .await; + + // Should fail because it's not a valid PDF + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_extract_text_with_image_extension_fallback() { + let ocr_service = OcrService::new(); + + let mut temp_file = NamedTempFile::with_suffix(".png").unwrap(); + fs::write(temp_file.path(), "fake image data").unwrap(); + + let result = ocr_service + .extract_text(temp_file.path().to_str().unwrap(), "unknown/type") + .await; + + // This should try to process as image due to extension, but fail due to invalid data + // The important thing is that it attempts image processing + assert!(result.is_err()); + } +} \ No newline at end of file diff --git a/src/watcher.rs b/src/watcher.rs new file mode 100644 index 0000000..cb92632 --- /dev/null +++ b/src/watcher.rs @@ -0,0 +1,99 @@ +use anyhow::Result; +use notify::{RecommendedWatcher, RecursiveMode, Watcher}; +use std::path::Path; +use tokio::sync::mpsc; +use tracing::{error, info}; + +use crate::{config::Config, db::Database, file_service::FileService, ocr::OcrService}; + +pub async fn start_folder_watcher(config: Config) -> Result<()> { + let (tx, mut rx) = mpsc::channel(100); + + let mut watcher = RecommendedWatcher::new( + move |res| { + if let Err(e) = tx.blocking_send(res) { + error!("Failed to send file event: {}", e); + } + }, + notify::Config::default(), + )?; + + watcher.watch(Path::new(&config.watch_folder), RecursiveMode::Recursive)?; + + info!("Starting folder watcher on: {}", config.watch_folder); + + let db = Database::new(&config.database_url).await?; + let file_service = FileService::new(config.upload_path.clone()); + let ocr_service = OcrService::new(); + + while let Some(res) = rx.recv().await { + match res { + Ok(event) => { + for path in event.paths { + if let Err(e) = process_file(&path, &db, &file_service, &ocr_service, &config).await { + error!("Failed to process file {:?}: {}", path, e); + } + } + } + Err(e) => error!("Watch error: {:?}", e), + } + } + + Ok(()) +} + +async fn process_file( + path: &std::path::Path, + db: &Database, + file_service: &FileService, + ocr_service: &OcrService, + config: &Config, +) -> Result<()> { + if !path.is_file() { + return Ok(()); + } + + let filename = path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("") + .to_string(); + + if !file_service.is_allowed_file_type(&filename, &config.allowed_file_types) { + return Ok(()); + } + + info!("Processing new file: {:?}", path); + + let file_data = tokio::fs::read(path).await?; + let file_size = file_data.len() as i64; + + let mime_type = mime_guess::from_path(&filename) + .first_or_octet_stream() + .to_string(); + + let file_path = file_service.save_file(&filename, &file_data).await?; + + let system_user_id = uuid::Uuid::parse_str("00000000-0000-0000-0000-000000000000")?; + + let mut document = file_service.create_document( + &filename, + &filename, + &file_path, + file_size, + &mime_type, + system_user_id, + ); + + if let Ok(text) = ocr_service.extract_text(&file_path, &mime_type).await { + if !text.is_empty() { + document.ocr_text = Some(text); + } + } + + db.create_document(document).await?; + + info!("Successfully processed file: {}", filename); + + Ok(()) +} \ No newline at end of file