diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 0000000..28ac08d --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,153 @@ +# GitHub Actions workflow for deploying MkDocs documentation to Cloudflare Pages +# This workflow builds and deploys your MkDocs site when changes are pushed to main +name: Deploy MkDocs Documentation + +on: + # Trigger on push to main branch + push: + branches: + - main + # Only run when docs files change + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'requirements.txt' + - '.github/workflows/deploy-docs.yml' + + # Allow manual triggering from Actions tab + workflow_dispatch: + + # Run on pull requests for preview deployments + pull_request: + branches: + - main + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'requirements.txt' + - '.github/workflows/deploy-docs.yml' + +jobs: + build-and-deploy: + name: Build and Deploy MkDocs + runs-on: ubuntu-latest + timeout-minutes: 10 + + # Required permissions for deployment + permissions: + contents: read + deployments: write + pull-requests: write # For PR preview comments + id-token: write # For OIDC authentication (if needed) + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for git info and mkdocs-git-revision-date plugin + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: 'requirements.txt' + + - name: Install MkDocs and Dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + env: + PIP_DISABLE_PIP_VERSION_CHECK: 1 + + - name: Build MkDocs Site + run: | + # Build with strict mode but ignore the expected README.md warning + # MkDocs always warns when README.md exists alongside index.md + mkdocs build --strict --verbose || { + EXIT_CODE=$? + # Check if the only issue is the README.md conflict + if mkdocs build --strict 2>&1 | grep -q "WARNING.*README.md.*conflicts with.*index.md" && \ + [ $(mkdocs build --strict 2>&1 | grep -c "WARNING") -eq 1 ]; then + echo "β Build succeeded with expected README.md warning" + mkdocs build --verbose + else + echo "β Build failed with unexpected errors" + exit $EXIT_CODE + fi + } + + - name: Validate Built Site + run: | + # Basic validation that important files exist + test -f site/index.html || (echo "ERROR: site/index.html not found" && exit 1) + test -f site/sitemap.xml || (echo "ERROR: site/sitemap.xml not found" && exit 1) + test -d site/assets || (echo "ERROR: site/assets directory not found" && exit 1) + echo "β Site validation passed" + + # Deploy using Wrangler (recommended by Cloudflare) + - name: Deploy to Cloudflare Pages + id: deploy + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + uses: cloudflare/wrangler-action@v3 + with: + command: pages deploy site --project-name=readur-docs --branch=${{ github.ref_name }} + + # Deploy preview for PRs + - name: Deploy Preview to Cloudflare Pages + id: preview-deployment + if: github.event_name == 'pull_request' + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + uses: cloudflare/wrangler-action@v3 + with: + command: pages deploy site --project-name=readur-docs --branch=pr-${{ github.event.pull_request.number }} + + # Post deployment URL as PR comment + - name: Comment PR with Preview URL + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const prNumber = context.issue.number; + // Construct preview URL based on Cloudflare Pages pattern + // Note: Actual URL may vary based on Cloudflare configuration + const previewUrl = `https://pr-${prNumber}.readur-docs.pages.dev`; + const mainUrl = 'https://readur.app'; + + // Check if we already commented + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber + }); + + const botComment = comments.data.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('Documentation preview is ready') + ); + + const commentBody = `π Documentation preview is ready!\n\nπ Preview URL: ${previewUrl}\nπ Production URL: ${mainUrl}\n\nβ All checks passed\n\n_This preview will be updated automatically with new commits._`; + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: commentBody + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + issue_number: prNumber, + owner: context.repo.owner, + repo: context.repo.repo, + body: commentBody + }); + } \ No newline at end of file diff --git a/.gitignore b/.gitignore index ef18b05..38550a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,22 @@ +# Rust target/ + +# Node client/node_modules/ node_modules/ .env assets/ frontend/dist/ -.claude/settings.local.json # This file is used to store the local Claude settings. + +# Python +venv/ +site/ + +# Testing readur_uploads/ readur_watch/ test-results/ uploads/ + +# Misc. .claude/settings.local.json diff --git a/README.md b/README.md index 5b29428..35c2069 100644 --- a/README.md +++ b/README.md @@ -140,3 +140,5 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file **Made with β€οΈ and β by the Readur team** + + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..8e29c7e --- /dev/null +++ b/docs/README.md @@ -0,0 +1,106 @@ +# Readur Documentation + +This directory contains the source files for the Readur documentation site, built with MkDocs and Material for MkDocs. + +## Local Development + +### Prerequisites + +- Python 3.8+ +- pip + +### Setup + +1. Install dependencies: +```bash +pip install -r ../requirements.txt +``` + +2. Start the development server: +```bash +mkdocs serve +``` + +The documentation will be available at `http://localhost:8000`. + +### Building + +To build the static site: +```bash +mkdocs build +``` + +The built site will be in the `site/` directory. + +## Deployment + +The documentation is automatically deployed to [readur.app](https://readur.app) via GitHub Actions when changes are pushed to the main branch. + +### Manual Deployment + +If you need to deploy manually: + +1. Build the site: +```bash +mkdocs build +``` + +2. Deploy to Cloudflare Pages: +```bash +wrangler pages deploy site --project-name=readur-docs +``` + +## Structure + +- `docs/` - Documentation source files (Markdown) +- `mkdocs.yml` - MkDocs configuration +- `requirements.txt` - Python dependencies +- `overrides/` - Theme customizations +- `stylesheets/` - Custom CSS +- `javascripts/` - Custom JavaScript + +## Writing Documentation + +### Adding New Pages + +1. Create a new `.md` file in the appropriate directory +2. Add the page to the navigation in `mkdocs.yml` +3. Use Material for MkDocs features for rich content + +### Markdown Extensions + +We use several markdown extensions for enhanced functionality: + +- **Admonitions** - For notes, warnings, tips +- **Code blocks** - With syntax highlighting +- **Tabs** - For grouped content +- **Tables** - For structured data +- **Emoji** - For visual elements + +Example: +```markdown +!!! note "Important" + This is an important note. + +=== "Tab 1" + Content for tab 1 + +=== "Tab 2" + Content for tab 2 +``` + +## Contributing + +Please follow these guidelines when contributing to the documentation: + +1. Use clear, concise language +2. Include code examples where appropriate +3. Test all links and code samples +4. Run `mkdocs build --strict` before submitting +5. Update the navigation in `mkdocs.yml` for new pages + +## Resources + +- [MkDocs Documentation](https://www.mkdocs.org/) +- [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) +- [Markdown Guide](https://www.markdownguide.org/) \ No newline at end of file diff --git a/docs/dev/ERROR_SYSTEM.md b/docs/dev/ERROR_SYSTEM.md index 687dffd..ba45a7d 100644 --- a/docs/dev/ERROR_SYSTEM.md +++ b/docs/dev/ERROR_SYSTEM.md @@ -612,7 +612,4 @@ Planned improvements to the error system: ## References -- [Error Management Documentation](./ERROR_MANAGEMENT.md) -- [API Error Response Standards](../api-reference.md#error-responses) -- [Frontend Error Handling Guide](../../frontend/ERROR_HANDLING.md) -- [Monitoring and Observability](./MONITORING.md) \ No newline at end of file +- [API Reference](../api-reference.md) \ No newline at end of file diff --git a/docs/dev/README.md b/docs/dev/README.md index 911cd16..9ebd3cc 100644 --- a/docs/dev/README.md +++ b/docs/dev/README.md @@ -34,7 +34,6 @@ This directory contains technical documentation for developers working on Readur - [Configuration Reference](../configuration-reference.md) - Complete configuration options - [User Guide](../user-guide.md) - How to use Readur features - [API Reference](../api-reference.md) - REST API documentation -- [New Features in 2.5.4](../new-features-2.5.4.md) - Latest features and improvements ## π€ Contributing diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md index 9a3f644..ea6a280 100644 --- a/docs/dev/architecture.md +++ b/docs/dev/architecture.md @@ -344,7 +344,7 @@ Potential service boundaries: ## Next Steps -- Review [deployment options](deployment.md) +- Review [deployment options](../deployment.md) - Explore [performance tuning](OCR_OPTIMIZATION_GUIDE.md) - Understand [database design](DATABASE_GUARDRAILS.md) - Learn about [testing strategy](TESTING.md) \ No newline at end of file diff --git a/docs/dev/development.md b/docs/dev/development.md index 5f7ff1a..f3c986b 100644 --- a/docs/dev/development.md +++ b/docs/dev/development.md @@ -268,7 +268,7 @@ Style preferences: ## Contributing -We welcome contributions! Please see our [Contributing Guide](../CONTRIBUTING.md) for details. +We welcome contributions! ### Getting Started diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 0000000..29a8b38 --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,506 @@ +# Configuration Guide + +Configure Readur for your specific needs and optimize for your workload. + +## Configuration Overview + +Readur uses environment variables for configuration, making it easy to deploy in containerized environments. Configuration can be set through: + +1. **Environment variables** - Direct system environment +2. **`.env` file** - Docker Compose automatically loads this +3. **`docker-compose.yml`** - Directly in the compose file +4. **Kubernetes ConfigMaps** - For K8s deployments + +## Essential Configuration + +### Security Settings + +These MUST be changed from defaults in production: + +```bash +# Generate secure secrets +JWT_SECRET=$(openssl rand -base64 32) +DB_PASSWORD=$(openssl rand -base64 32) + +# Set admin password +ADMIN_PASSWORD=your_secure_password_here + +# Enable HTTPS (reverse proxy recommended) +FORCE_HTTPS=true +SECURE_COOKIES=true +``` + +### Database Configuration + +```bash +# PostgreSQL connection +DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur + +# Connection pool settings +DB_POOL_SIZE=20 +DB_MAX_OVERFLOW=40 +DB_POOL_TIMEOUT=30 + +# PostgreSQL specific optimizations +POSTGRES_SHARED_BUFFERS=256MB +POSTGRES_EFFECTIVE_CACHE_SIZE=1GB +``` + +### Storage Configuration + +#### Local Storage (Default) + +```bash +# File storage paths +UPLOAD_PATH=/app/uploads +TEMP_PATH=/app/temp + +# Size limits +MAX_FILE_SIZE_MB=50 +TOTAL_STORAGE_LIMIT_GB=100 + +# File types +ALLOWED_FILE_TYPES=pdf,png,jpg,jpeg,tiff,bmp,gif,txt,rtf,doc,docx +``` + +#### S3 Storage (Scalable) + +```bash +# Enable S3 backend +STORAGE_BACKEND=s3 +S3_ENABLED=true + +# AWS S3 +S3_BUCKET_NAME=readur-documents +S3_REGION=us-east-1 +AWS_ACCESS_KEY_ID=your_access_key +AWS_SECRET_ACCESS_KEY=your_secret_key + +# Or S3-compatible (MinIO, Wasabi, etc.) +S3_ENDPOINT=https://s3.example.com +S3_PATH_STYLE=true # For MinIO +``` + +## OCR Configuration + +### Language Settings + +```bash +# Single language (fastest) +OCR_LANGUAGE=eng + +# Multiple languages +OCR_LANGUAGE=eng+deu+fra+spa + +# Available languages (partial list): +# eng - English +# deu - German (Deutsch) +# fra - French (FranΓ§ais) +# spa - Spanish (EspaΓ±ol) +# ita - Italian (Italiano) +# por - Portuguese +# rus - Russian +# chi_sim - Chinese Simplified +# jpn - Japanese +# ara - Arabic +``` + +### Performance Tuning + +```bash +# Concurrent processing +CONCURRENT_OCR_JOBS=4 # Match CPU cores +OCR_WORKER_THREADS=2 # Threads per job + +# Timeouts and limits +OCR_TIMEOUT_SECONDS=300 +OCR_MAX_PAGES=500 +MAX_FILE_SIZE_MB=100 + +# Memory management +OCR_MEMORY_LIMIT_MB=512 # Per job +ENABLE_MEMORY_PROFILING=false + +# Processing options +OCR_DPI=300 # Higher = better quality, slower +ENABLE_PREPROCESSING=true +ENABLE_AUTO_ROTATION=true +ENABLE_DESKEW=true +``` + +### Quality vs Speed + +#### High Quality (Slow) +```bash +OCR_QUALITY_PRESET=high +OCR_DPI=300 +ENABLE_PREPROCESSING=true +ENABLE_DESKEW=true +ENABLE_AUTO_ROTATION=true +OCR_ENGINE_MODE=3 # LSTM only +``` + +#### Balanced (Default) +```bash +OCR_QUALITY_PRESET=balanced +OCR_DPI=200 +ENABLE_PREPROCESSING=true +ENABLE_DESKEW=false +ENABLE_AUTO_ROTATION=true +OCR_ENGINE_MODE=2 # LSTM + Legacy +``` + +#### Fast (Lower Quality) +```bash +OCR_QUALITY_PRESET=fast +OCR_DPI=150 +ENABLE_PREPROCESSING=false +ENABLE_DESKEW=false +ENABLE_AUTO_ROTATION=false +OCR_ENGINE_MODE=0 # Legacy only +``` + +## Source Synchronization + +### Watch Folders + +```bash +# Global watch folder +WATCH_FOLDER=/app/watch +WATCH_INTERVAL_SECONDS=60 +FILE_STABILITY_CHECK_MS=2000 + +# Per-user watch folders +ENABLE_PER_USER_WATCH=true +USER_WATCH_BASE_DIR=/app/user_watch + +# Processing rules +WATCH_PROCESS_HIDDEN_FILES=false +WATCH_RECURSIVE=true +WATCH_MAX_DEPTH=5 +DELETE_AFTER_IMPORT=false +``` + +### WebDAV Sources + +```bash +# Default WebDAV settings +WEBDAV_TIMEOUT_SECONDS=30 +WEBDAV_MAX_RETRIES=3 +WEBDAV_CHUNK_SIZE_MB=10 +WEBDAV_VERIFY_SSL=true +``` + +### S3 Sources + +```bash +# S3 sync settings +S3_SYNC_INTERVAL_MINUTES=30 +S3_BATCH_SIZE=100 +S3_MULTIPART_THRESHOLD_MB=100 +S3_CONCURRENT_DOWNLOADS=4 +``` + +## Authentication & Security + +### Local Authentication + +```bash +# Password policy +PASSWORD_MIN_LENGTH=12 +PASSWORD_REQUIRE_UPPERCASE=true +PASSWORD_REQUIRE_NUMBERS=true +PASSWORD_REQUIRE_SPECIAL=true + +# Session management +SESSION_TIMEOUT_MINUTES=60 +REMEMBER_ME_DURATION_DAYS=30 +MAX_LOGIN_ATTEMPTS=5 +LOCKOUT_DURATION_MINUTES=15 +``` + +### OIDC/SSO Configuration + +```bash +# Enable OIDC +OIDC_ENABLED=true + +# Provider configuration +OIDC_ISSUER=https://login.microsoftonline.com/tenant-id/v2.0 +OIDC_CLIENT_ID=your-client-id +OIDC_CLIENT_SECRET=your-client-secret +OIDC_REDIRECT_URI=https://readur.example.com/auth/callback + +# Optional settings +OIDC_SCOPE=openid profile email +OIDC_USER_CLAIM=email +OIDC_GROUPS_CLAIM=groups +OIDC_ADMIN_GROUP=readur-admins + +# Auto-provisioning +OIDC_AUTO_CREATE_USERS=true +OIDC_DEFAULT_ROLE=user +``` + +## Search Configuration + +### Search Engine + +```bash +# PostgreSQL Full-Text Search settings +SEARCH_LANGUAGE=english +SEARCH_RANKING_NORMALIZATION=32 +ENABLE_PHRASE_SEARCH=true +ENABLE_FUZZY_SEARCH=true +FUZZY_SEARCH_DISTANCE=2 + +# Search results +SEARCH_RESULTS_PER_PAGE=20 +SEARCH_SNIPPET_LENGTH=200 +SEARCH_HIGHLIGHT_TAG=mark +``` + +### Search Performance + +```bash +# Index management +AUTO_REINDEX=true +REINDEX_SCHEDULE=0 3 * * * # 3 AM daily +SEARCH_CACHE_TTL_SECONDS=300 +SEARCH_CACHE_SIZE_MB=100 + +# Query optimization +MAX_SEARCH_TERMS=10 +ENABLE_SEARCH_SUGGESTIONS=true +SUGGESTION_MIN_LENGTH=3 +``` + +## Monitoring & Logging + +### Logging Configuration + +```bash +# Log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO +LOG_FORMAT=json # or text + +# Log outputs +LOG_TO_FILE=true +LOG_FILE_PATH=/app/logs/readur.log +LOG_FILE_MAX_SIZE_MB=100 +LOG_FILE_BACKUP_COUNT=10 + +# Detailed logging +LOG_SQL_QUERIES=false +LOG_HTTP_REQUESTS=true +LOG_OCR_DETAILS=false +``` + +### Health Monitoring + +```bash +# Health check endpoints +HEALTH_CHECK_ENABLED=true +HEALTH_CHECK_PATH=/health +METRICS_ENABLED=true +METRICS_PATH=/metrics + +# Alerting thresholds +ALERT_QUEUE_SIZE=100 +ALERT_OCR_FAILURE_RATE=0.1 +ALERT_DISK_USAGE_PERCENT=80 +ALERT_MEMORY_USAGE_PERCENT=90 +``` + +## Performance Optimization + +### System Resources + +```bash +# Memory limits +MEMORY_LIMIT_MB=2048 +MEMORY_SOFT_LIMIT_MB=1536 + +# CPU settings +CPU_CORES=4 +WORKER_PROCESSES=auto # or specific number +WORKER_THREADS=2 + +# Connection limits +MAX_CONNECTIONS=100 +CONNECTION_TIMEOUT=30 +``` + +### Caching + +```bash +# Enable caching layers +ENABLE_CACHE=true +CACHE_TYPE=redis # or memory + +# Redis cache (if used) +REDIS_URL=redis://redis:6379/0 +REDIS_MAX_CONNECTIONS=50 + +# Cache TTLs +DOCUMENT_CACHE_TTL=3600 +SEARCH_CACHE_TTL=300 +USER_CACHE_TTL=1800 +``` + +### Queue Management + +```bash +# Background job processing +QUEUE_TYPE=database # or redis +MAX_QUEUE_SIZE=1000 +QUEUE_POLL_INTERVAL=5 + +# Job priorities +OCR_JOB_PRIORITY=5 +SYNC_JOB_PRIORITY=3 +CLEANUP_JOB_PRIORITY=1 + +# Retry configuration +MAX_JOB_RETRIES=3 +RETRY_DELAY_SECONDS=60 +EXPONENTIAL_BACKOFF=true +``` + +## Environment-Specific Configurations + +### Development + +```bash +# .env.development +DEBUG=true +LOG_LEVEL=DEBUG +RELOAD_ON_CHANGE=true +CONCURRENT_OCR_JOBS=1 +DISABLE_RATE_LIMITING=true +``` + +### Staging + +```bash +# .env.staging +DEBUG=false +LOG_LEVEL=INFO +CONCURRENT_OCR_JOBS=2 +ENABLE_PROFILING=true +MOCK_EXTERNAL_SERVICES=true +``` + +### Production + +```bash +# .env.production +DEBUG=false +LOG_LEVEL=WARNING +CONCURRENT_OCR_JOBS=8 +ENABLE_RATE_LIMITING=true +SECURE_COOKIES=true +FORCE_HTTPS=true +``` + +## Configuration Validation + +### Check Configuration + +```bash +# Validate current configuration +docker exec readur python validate_config.py + +# Test specific settings +docker exec readur python -c " +from config import settings +print(f'OCR Languages: {settings.OCR_LANGUAGE}') +print(f'Storage Backend: {settings.STORAGE_BACKEND}') +print(f'Max File Size: {settings.MAX_FILE_SIZE_MB}MB') +" +``` + +### Common Validation Errors + +```bash +# Missing required S3 credentials +ERROR: S3_ENABLED=true but S3_BUCKET_NAME not set + +# Invalid language code +ERROR: OCR_LANGUAGE 'xyz' not supported + +# Insufficient resources +WARNING: CONCURRENT_OCR_JOBS=8 but only 2 CPU cores available +``` + +## Configuration Best Practices + +### Security + +1. **Never commit secrets** - Use `.env` files and add to `.gitignore` +2. **Rotate secrets regularly** - Especially JWT_SECRET +3. **Use strong passwords** - Minimum 16 characters for admin +4. **Enable HTTPS** - Always in production +5. **Restrict file types** - Only allow necessary formats + +### Performance + +1. **Match workers to cores** - CONCURRENT_OCR_JOBS β€ CPU cores +2. **Monitor memory usage** - Adjust limits based on usage +3. **Use S3 for scale** - Local storage limited by disk +4. **Enable caching** - Reduces database load +5. **Tune PostgreSQL** - Adjust shared_buffers and work_mem + +### Reliability + +1. **Set reasonable timeouts** - Prevent hanging jobs +2. **Configure retries** - Handle transient failures +3. **Enable health checks** - For load balancer integration +4. **Set up logging** - Essential for troubleshooting +5. **Regular backups** - Automate database backups + +## Configuration Examples + +### Small Office (5-10 users) + +```bash +# Minimal resources, local storage +CONCURRENT_OCR_JOBS=2 +MEMORY_LIMIT_MB=1024 +STORAGE_BACKEND=local +MAX_FILE_SIZE_MB=20 +SEARCH_CACHE_TTL=600 +``` + +### Medium Business (50-100 users) + +```bash +# Balanced performance, S3 storage +CONCURRENT_OCR_JOBS=4 +MEMORY_LIMIT_MB=4096 +STORAGE_BACKEND=s3 +MAX_FILE_SIZE_MB=50 +ENABLE_CACHE=true +CACHE_TYPE=redis +``` + +### Enterprise (500+ users) + +```bash +# High performance, full features +CONCURRENT_OCR_JOBS=16 +MEMORY_LIMIT_MB=16384 +STORAGE_BACKEND=s3 +MAX_FILE_SIZE_MB=100 +ENABLE_CACHE=true +CACHE_TYPE=redis +QUEUE_TYPE=redis +OIDC_ENABLED=true +``` + +## Next Steps + +- [Installation Guide](installation.md) - Deploy Readur +- [User Guide](../user-guide.md) - Learn the interface +- [API Reference](../api-reference.md) - Integrate with Readur +- [Deployment Guide](../deployment.md) - Production setup \ No newline at end of file diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..095c64e --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,500 @@ +# Installation Guide + +Deploy Readur document management system with OCR capabilities using Docker. + +## Prerequisites + +### System Requirements + +#### Minimum Requirements +- **CPU**: 2 cores (x86_64 or ARM64) +- **RAM**: 4GB (system) + 1GB per concurrent OCR job +- **Storage**: 10GB for application + space for documents +- **OS**: Linux, macOS, or Windows with Docker support + +#### Recommended for Production +- **CPU**: 4+ cores for parallel OCR processing +- **RAM**: 8GB minimum, 16GB for heavy workloads +- **Storage**: SSD for database, adequate space for document growth +- **Network**: Stable connection for source synchronization + +### Software Requirements + +```bash +# Check Docker version (20.10+ required) +docker --version + +# Check Docker Compose version (2.0+ required) +docker-compose --version + +# Verify Docker is running +docker ps +``` + +## Installation Methods + +### Quick Start with Docker Compose (Recommended) + +#### 1. Clone the Repository + +```bash +# Clone the repository +git clone https://github.com/readur/readur.git +cd readur + +# Review the configuration +cat docker-compose.yml +``` + +#### 2. Configure Environment + +Create a `.env` file with your settings: + +```bash +# Security - CHANGE THESE! +JWT_SECRET=$(openssl rand -base64 32) +DB_PASSWORD=$(openssl rand -base64 32) +ADMIN_PASSWORD=your_secure_password_here + +# OCR Configuration +OCR_LANGUAGE=eng # or: deu, fra, spa, etc. +CONCURRENT_OCR_JOBS=2 + +# Storage Paths (create these directories) +UPLOAD_PATH=./data/uploads +WATCH_FOLDER=./data/watch + +# Optional: S3 Storage (instead of local) +# STORAGE_BACKEND=s3 +# S3_BUCKET=readur-documents +# S3_REGION=us-east-1 +# AWS_ACCESS_KEY_ID=your_key +# AWS_SECRET_ACCESS_KEY=your_secret +``` + +#### 3. Create Required Directories + +```bash +# Create data directories +mkdir -p data/{uploads,watch,postgres} + +# Set appropriate permissions +chmod 755 data/uploads data/watch +``` + +#### 4. Start the Application + +```bash +# Start all services +docker-compose up -d + +# Monitor startup logs +docker-compose logs -f + +# Wait for "Server started on 0.0.0.0:8000" +``` + +#### 5. Verify Installation + +```bash +# Check service health +docker-compose ps + +# Test the API endpoint +curl http://localhost:8000/health + +# Expected response: +# {"status":"healthy","database":"connected","ocr":"ready"} +``` + +### Production Deployment with Custom Configuration + +#### 1. Create Production Compose File + +Create `docker-compose.prod.yml`: + +```yaml +services: + readur: + image: readur:latest + ports: + - "8000:8000" + environment: + - DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur + - JWT_SECRET=${JWT_SECRET} + - SERVER_ADDRESS=0.0.0.0:8000 + - UPLOAD_PATH=/app/uploads + - CONCURRENT_OCR_JOBS=4 + - MAX_FILE_SIZE_MB=100 + volumes: + - ./data/uploads:/app/uploads + - /mnt/shared/documents:/app/watch:ro + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + memory: 2G + cpus: '2.0' + + postgres: + image: postgres:15-alpine + environment: + - POSTGRES_USER=readur + - POSTGRES_PASSWORD=${DB_PASSWORD} + - POSTGRES_DB=readur + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U readur"] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + +volumes: + postgres_data: +``` + +#### 2. Deploy with Production Settings + +```bash +# Use production configuration +docker-compose -f docker-compose.prod.yml up -d + +# Enable automatic startup +sudo systemctl enable docker +``` + +### Kubernetes Deployment + +#### Using Helm Chart + +```bash +# Add Readur Helm repository +helm repo add readur https://charts.readur.app +helm repo update + +# Install with custom values +helm install readur readur/readur \ + --set image.tag=latest \ + --set postgresql.auth.password=$DB_PASSWORD \ + --set auth.jwtSecret=$JWT_SECRET \ + --set persistence.size=50Gi \ + --set ingress.enabled=true \ + --set ingress.hostname=readur.example.com +``` + +#### Using Raw Manifests + +```bash +# Apply Kubernetes manifests +kubectl create namespace readur +kubectl apply -f https://raw.githubusercontent.com/readur/readur/main/k8s/ + +# Check deployment status +kubectl -n readur get pods +kubectl -n readur get svc +``` + +### Docker Run (Development Only) + +For quick testing without persistence: + +```bash +# Run with in-memory database (data lost on restart) +docker run -d \ + --name readur \ + -p 8000:8000 \ + -e DATABASE_URL=sqlite:///tmp/readur.db \ + -e JWT_SECRET=dev-only-secret \ + readur:latest + +# Access logs +docker logs -f readur +``` + +## Post-Installation Setup + +### Initial Login + +1. **Access the Web Interface** + ``` + http://localhost:8000 + ``` + +2. **Login with Default Credentials** + - Username: `admin` + - Password: `readur2024` + + β οΈ **Security**: Change the admin password immediately after first login + +3. **Change Admin Password** + - Navigate to Settings β User Management + - Click on admin user + - Set a strong password + - Save changes + +### Essential Configuration + +#### 1. Configure OCR Languages + +```bash +# Check available languages +docker exec readur tesseract --list-langs + +# Add additional language packs if needed +docker exec readur apt-get update +docker exec readur apt-get install -y tesseract-ocr-deu # German +docker exec readur apt-get install -y tesseract-ocr-fra # French +docker exec readur apt-get install -y tesseract-ocr-spa # Spanish +``` + +#### 2. Set Up Document Sources + +1. Navigate to Settings β Sources +2. Add your document sources: + - **Local Folders**: Mount volumes in docker-compose.yml + - **WebDAV**: Configure Nextcloud/ownCloud connections + - **S3 Buckets**: Add AWS S3 or compatible storage + +#### 3. Configure User Authentication + +**For Local Users:** +- Settings β User Management β Create User +- Assign appropriate roles (User or Admin) + +**For SSO/OIDC:** +```bash +# Add to your .env file +OIDC_ENABLED=true +OIDC_ISSUER=https://auth.example.com +OIDC_CLIENT_ID=readur-client +OIDC_CLIENT_SECRET=your-secret +``` + +#### 4. Adjust Performance Settings + +```bash +# Edit .env for your workload +CONCURRENT_OCR_JOBS=4 # Increase for faster processing +OCR_TIMEOUT_SECONDS=300 # Increase for large documents +MAX_FILE_SIZE_MB=100 # Adjust based on your documents +MEMORY_LIMIT_MB=2048 # Increase for better performance +``` + +## Verification & Health Checks + +### Service Health + +```bash +# Check all services are running +docker-compose ps + +# Expected output: +NAME STATUS PORTS +readur running (healthy) 0.0.0.0:8000->8000/tcp +postgres running (healthy) 5432/tcp +``` + +### API Health Check + +```bash +# Test the health endpoint +curl -s http://localhost:8000/health | jq + +# Expected response: +{ + "status": "healthy", + "version": "2.5.4", + "database": "connected", + "ocr_service": "ready", + "storage": "available", + "queue_size": 0 +} +``` + +### Database Connectivity + +```bash +# Test database connection +docker exec readur-postgres psql -U readur -c "SELECT version();" + +# Check tables were created +docker exec readur-postgres psql -U readur -d readur -c "\dt" +``` + +### OCR Functionality + +```bash +# Test OCR engine +docker exec readur tesseract --version + +# Upload a test document +curl -X POST http://localhost:8000/api/upload \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -F "file=@test.pdf" +``` + +## Troubleshooting Installation + +### Common Issues and Solutions + +#### Port Already in Use + +```bash +# Check what's using port 8000 +sudo lsof -i :8000 + +# Solution 1: Stop the conflicting service +sudo systemctl stop conflicting-service + +# Solution 2: Use a different port +# Edit docker-compose.yml: +ports: + - "8080:8000" # Change 8080 to your preferred port +``` + +#### Database Connection Failed + +```bash +# Check PostgreSQL logs +docker-compose logs postgres + +# Common fixes: +# 1. Ensure PostgreSQL is fully started +docker-compose restart postgres +sleep 10 +docker-compose restart readur + +# 2. Reset database (WARNING: Deletes all data) +docker-compose down -v +docker-compose up -d +``` + +#### OCR Processing Stuck + +```bash +# Check OCR queue status +curl http://localhost:8000/api/admin/queue/status + +# Restart OCR workers +docker-compose restart readur + +# Increase timeout for large files +# Add to .env: +OCR_TIMEOUT_SECONDS=600 +``` + +#### Docker Permission Denied + +```bash +# Linux: Add user to docker group +sudo usermod -aG docker $USER +newgrp docker + +# Verify docker access +docker ps +``` + +#### Insufficient Memory + +```bash +# Check container memory usage +docker stats readur + +# Increase memory limits in docker-compose.yml: +deploy: + resources: + limits: + memory: 4G # Increase as needed +``` + +### Getting Help + +1. **Check Logs** + ```bash + # Application logs + docker-compose logs -f readur + + # Database logs + docker-compose logs -f postgres + ``` + +2. **Enable Debug Mode** + ```bash + # Add to .env + LOG_LEVEL=DEBUG + + # Restart services + docker-compose restart + ``` + +3. **Community Support** + - [GitHub Issues](https://github.com/readur/readur/issues) + - [Documentation](https://docs.readur.app) + - [Discord Community](https://discord.gg/readur) + +## Next Steps + +### Essential Reading + +1. **[User Guide](../user-guide.md)** + - Upload and manage documents + - Configure OCR processing + - Master search features + - Organize with labels + +2. **[Configuration Reference](../configuration-reference.md)** + - Complete environment variable list + - Performance tuning + - Storage configuration + - Security settings + +3. **[Deployment Guide](../deployment.md)** + - SSL/TLS setup with reverse proxy + - Backup and restore procedures + - Monitoring and alerts + - Scaling strategies + +### Advanced Setup + +4. **[Sources Guide](../sources-guide.md)** + - WebDAV integration + - S3 bucket synchronization + - Watch folder configuration + - Automated imports + +5. **[OIDC Setup](../oidc-setup.md)** + - Enterprise SSO integration + - Azure AD configuration + - Google Workspace setup + - Keycloak integration + +6. **[API Reference](../api-reference.md)** + - REST API endpoints + - Authentication + - Automation examples + - Webhook integration + +### Quick Test + +Upload your first document: + +```bash +# 1. Login to get token +TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"readur2024"}' | jq -r .token) + +# 2. Upload a PDF +curl -X POST http://localhost:8000/api/documents/upload \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@sample.pdf" + +# 3. Check OCR status +curl -H "Authorization: Bearer $TOKEN" \ + http://localhost:8000/api/documents +``` \ No newline at end of file diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..39be212 --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,252 @@ +# Quick Start Guide + +Get Readur running and process your first documents in under 5 minutes. + +## Prerequisites + +Ensure you have Docker and Docker Compose installed: +```bash +docker --version # Should be 20.10+ +docker-compose --version # Should be 2.0+ +``` + +## 5-Minute Setup + +### Step 1: Get Readur + +```bash +# Clone and enter the repository +git clone https://github.com/readur/readur.git +cd readur +``` + +### Step 2: Start Services + +```bash +# Start with default configuration +docker-compose up -d + +# Watch the logs (optional) +docker-compose logs -f +``` + +Wait about 30 seconds for services to initialize. + +### Step 3: Access the Interface + +Open your browser and navigate to: +``` +http://localhost:8000 +``` + +Login with default credentials: +- **Username**: `admin` +- **Password**: `readur2024` + +### Step 4: Upload Your First Document + +#### Via Web Interface + +1. Click the **Upload** button in the top navigation +2. Drag and drop a PDF or image file +3. Click **Upload** to start processing +4. Wait for the OCR indicator to turn green + +#### Via API (Optional) + +```bash +# Get authentication token +TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"readur2024"}' | jq -r .token) + +# Upload a document +curl -X POST http://localhost:8000/api/documents/upload \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@your-document.pdf" +``` + +### Step 5: Search Your Documents + +Once OCR processing completes (green indicator): + +1. Use the **Search** bar at the top +2. Enter any text from your document +3. Press Enter to see results +4. Click on a result to view the document + +## Common First Tasks + +### Change Admin Password + +**Important**: Do this immediately after installation. + +1. Navigate to **Settings** β **User Management** +2. Click on the admin user +3. Enter a new secure password +4. Click **Save** + +### Add Your First Source + +Automatically import documents from external storage: + +1. Go to **Settings** β **Sources** +2. Click **Add Source** +3. Choose your source type: + - **Local Folder**: For directories on the server + - **WebDAV**: For Nextcloud/ownCloud + - **S3**: For cloud storage +4. Configure connection details +5. Test and save + +### Create Document Labels + +Organize your documents with labels: + +1. Navigate to **Settings** β **Labels** +2. Click **Create Label** +3. Enter a name and choose a color +4. Save the label +5. Apply to documents via: + - Document details page + - Bulk selection + - During upload + +### Set Up Watch Folder + +Monitor a directory for automatic document import: + +```bash +# Create a watch directory +mkdir -p ./data/watch + +# Add to docker-compose.yml volumes: +volumes: + - ./data/watch:/app/watch + +# Restart Readur +docker-compose restart readur +``` + +Drop files into `./data/watch` - they'll be automatically imported. + +## Essential Keyboard Shortcuts + +| Shortcut | Action | +|----------|--------| +| `/` or `Ctrl+K` | Focus search bar | +| `Ctrl+U` | Open upload dialog | +| `Esc` | Close dialogs | +| `G then D` | Go to documents | +| `G then S` | Go to settings | + +## Sample Workflow + +### Legal Document Management + +```bash +# 1. Create label structure +Labels: "Contracts", "Invoices", "Legal", "2024" + +# 2. Set up source folder +Source: /shared/legal-docs (WebDAV) +Sync: Every 30 minutes + +# 3. Configure OCR +Language: English +Quality: High +Concurrent Jobs: 4 + +# 4. Upload initial batch +Select all PDFs β Upload β Apply "2024" label + +# 5. Create saved search +Search: label:Contracts AND date:2024 +Save as: "2024 Contracts" +``` + +### Research Paper Archive + +```bash +# 1. Configure for academic documents +OCR Language: Multiple (eng+deu+fra) +Max File Size: 100MB + +# 2. Create categories +Labels: "Published", "Draft", "Review", "Citations" + +# 3. Set up automated import +Watch Folder: /research/papers +Process: Auto-OCR and label by folder + +# 4. Advanced search setup +Boolean search: enabled +Fuzzy matching: 2 (for OCR errors) +``` + +## Performance Tips + +### For Faster OCR Processing + +```bash +# Increase concurrent jobs (if you have CPU cores) +CONCURRENT_OCR_JOBS=8 + +# Optimize for your document types +OCR_LANGUAGE=eng # Single language is faster +ENABLE_PREPROCESSING=false # Skip if documents are clean +``` + +### For Large Document Collections + +```bash +# Use S3 storage instead of local +S3_ENABLED=true +S3_BUCKET_NAME=readur-docs + +# Increase memory limits +MEMORY_LIMIT_MB=4096 + +# Enable compression +ENABLE_COMPRESSION=true +``` + +## Troubleshooting Quick Fixes + +### OCR Not Starting +```bash +# Check the queue +curl http://localhost:8000/api/admin/queue/status + +# Restart OCR workers +docker-compose restart readur +``` + +### Can't Login +```bash +# Reset to default password +docker exec readur python reset_admin_password.py +``` + +### Slow Search +```bash +# Rebuild search index +docker exec readur python rebuild_index.py +``` + +## Next Steps + +Now that you have Readur running: + +1. **[Configure OCR](../multi-language-ocr-guide.md)** for your language +2. **[Set up Sources](../sources-guide.md)** for automated import +3. **[Create Labels](../labels-and-organization.md)** for organization +4. **[Learn Advanced Search](../advanced-search.md)** techniques +5. **[Configure Backups](../deployment.md#backup-strategy)** for data safety + +## Getting Help + +- **Documentation**: [Full User Guide](../user-guide.md) +- **API Reference**: [REST API Docs](../api-reference.md) +- **Community**: [GitHub Discussions](https://github.com/readur/readur/discussions) +- **Issues**: [Report Bugs](https://github.com/readur/readur/issues) \ No newline at end of file diff --git a/docs/guide/overview.md b/docs/guide/overview.md new file mode 100644 index 0000000..28b0bd1 --- /dev/null +++ b/docs/guide/overview.md @@ -0,0 +1,192 @@ +# Readur User Guide Overview + +Welcome to the comprehensive guide for using Readur's document management system. This guide covers everything from basic operations to advanced features. + +## Guide Structure + +### Getting Started +- **[Installation](../getting-started/installation.md)** - Deploy Readur with Docker +- **[Quick Start](../getting-started/quickstart.md)** - 5-minute setup guide +- **[Configuration](../getting-started/configuration.md)** - Customize your deployment + +### Core Features +- **[Document Management](../user-guide.md#document-management)** - Upload, organize, and manage documents +- **[OCR Processing](../user-guide.md#ocr-processing)** - Extract text from scanned documents +- **[Search & Discovery](../user-guide.md#search-features)** - Find information quickly +- **[Labels & Organization](../labels-and-organization.md)** - Categorize and structure content + +### Advanced Features +- **[Sources & Sync](../sources-guide.md)** - Automated document import +- **[Advanced Search](../advanced-search.md)** - Complex queries and filters +- **[User Management](../user-management-guide.md)** - Roles and permissions +- **[API Integration](../api-reference.md)** - Programmatic access + +### Administration +- **[Deployment](../deployment.md)** - Production setup and scaling +- **[Monitoring](../health-monitoring-guide.md)** - System health and metrics +- **[Backup & Recovery](../deployment.md#backup-strategy)** - Data protection +- **[Migration](../migration-guide.md)** - Upgrades and data migration + +## Quick Navigation + +### By User Type + +#### Document Users +Start here if you need to: +- Upload and organize documents +- Search for specific content +- Export and share documents + +**Key Guides:** +1. [User Guide](../user-guide.md) +2. [Search Features](../advanced-search.md) +3. [Labels Guide](../labels-and-organization.md) + +#### System Administrators +Start here if you need to: +- Deploy and configure Readur +- Manage users and permissions +- Monitor system health +- Set up integrations + +**Key Guides:** +1. [Installation](../getting-started/installation.md) +2. [Configuration](../configuration-reference.md) +3. [User Management](../user-management-guide.md) +4. [Deployment](../deployment.md) + +#### Developers +Start here if you need to: +- Integrate with the API +- Customize Readur +- Contribute to development + +**Key Guides:** +1. [API Reference](../api-reference.md) +2. [Development Setup](../dev/development.md) +3. [Architecture](../dev/architecture.md) + +### By Task + +#### Initial Setup +1. [Install Readur](../getting-started/installation.md) +2. [Configure OCR languages](../multi-language-ocr-guide.md) +3. [Set up authentication](../oidc-setup.md) +4. [Create users](../user-management-guide.md) + +#### Document Processing +1. [Upload documents](../file-upload-guide.md) +2. [Configure OCR](../user-guide.md#ocr-processing) +3. [Monitor processing](../user-guide.md#ocr-status-indicators) +4. [Troubleshoot OCR](../dev/OCR_OPTIMIZATION_GUIDE.md) + +#### Search & Organization +1. [Basic search](../user-guide.md#search-features) +2. [Advanced search syntax](../advanced-search.md) +3. [Create labels](../labels-and-organization.md) +4. [Save searches](../user-guide.md#smart-collections) + +#### Integration & Automation +1. [Set up sources](../sources-guide.md) +2. [Configure watch folders](../WATCH_FOLDER.md) +3. [Use the API](../api-reference.md) +4. [Automate workflows](../api-reference.md#automation-examples) + +## Feature Highlights + +### Document Intelligence +- **OCR in 100+ Languages**: Process documents in virtually any language +- **Format Support**: PDF, images, Office documents, and text files +- **Batch Processing**: Handle thousands of documents efficiently +- **Quality Enhancement**: Automatic rotation, deskewing, and preprocessing + +### Search Capabilities +- **Full-Text Search**: Search within document content +- **Boolean Logic**: Complex queries with AND, OR, NOT +- **Fuzzy Matching**: Handle OCR errors and typos +- **Filters**: By date, type, size, labels, and more + +### Organization Tools +- **Flexible Labels**: Create custom categorization systems +- **Bulk Operations**: Apply changes to multiple documents +- **Smart Collections**: Saved searches that update automatically +- **Multiple Views**: List and grid layouts + +### Integration Options +- **REST API**: Complete programmatic access +- **Source Sync**: WebDAV, S3, local folders +- **SSO/OIDC**: Enterprise authentication +- **Webhooks**: Event-driven automation + +## Best Practices + +### Document Organization +1. **Consistent Naming**: Use descriptive, standardized file names +2. **Label Strategy**: Create a hierarchical label structure +3. **Regular Cleanup**: Archive or remove outdated documents +4. **Folder Structure**: Organize source folders logically + +### Performance Optimization +1. **OCR Settings**: Balance quality vs. speed for your needs +2. **Concurrent Jobs**: Match to available CPU cores +3. **Storage Backend**: Use S3 for large collections +4. **Search Indexing**: Schedule reindexing during off-hours + +### Security +1. **Change Defaults**: Always change default passwords +2. **Enable HTTPS**: Use SSL/TLS in production +3. **Regular Backups**: Automate database backups +4. **Access Control**: Use roles and permissions appropriately + +### Workflow Efficiency +1. **Bulk Upload**: Process similar documents together +2. **Automation**: Set up sources for automatic import +3. **Saved Searches**: Create shortcuts for common queries +4. **Keyboard Shortcuts**: Learn shortcuts for faster navigation + +## Troubleshooting Resources + +### Common Issues +- [OCR not starting](../user-guide.md#common-issues) +- [Search not finding documents](../advanced-search.md#troubleshooting) +- [Slow performance](../dev/OCR_OPTIMIZATION_GUIDE.md) +- [Upload failures](../file-upload-guide.md#troubleshooting) + +### Getting Help +- **Documentation Search**: Use the search bar above +- **GitHub Issues**: [Report bugs](https://github.com/readur/readur/issues) +- **Community Forum**: [Ask questions](https://github.com/readur/readur/discussions) +- **System Logs**: Check logs for detailed error information + +## Version Information + +This documentation covers Readur version 2.5.4 and later. Key features in recent versions: + +### Version 2.5.4 +- S3 storage backend support +- Enhanced source synchronization +- Per-user watch directories +- Improved health monitoring + +### Version 2.5.0 +- OIDC/SSO authentication +- Advanced search operators +- Bulk operations +- Performance improvements + +## Next Steps + +### New Users +1. Start with the [Quick Start Guide](../getting-started/quickstart.md) +2. Read the [User Guide](../user-guide.md) +3. Learn about [Search Features](../advanced-search.md) + +### Administrators +1. Review [Configuration Options](../configuration-reference.md) +2. Set up [Monitoring](../health-monitoring-guide.md) +3. Plan [Backup Strategy](../deployment.md#backup-strategy) + +### Advanced Users +1. Explore [API Integration](../api-reference.md) +2. Configure [Sources](../sources-guide.md) +3. Optimize [OCR Performance](../dev/OCR_OPTIMIZATION_GUIDE.md) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..e433bb0 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,136 @@ +# Readur Documentation + +## Intelligent Document Management with OCR + +Readur is a powerful document management system that transforms your unstructured documents into a searchable, organized knowledge base. Built for teams and individuals who need to efficiently manage, search, and access large document collections. + +## Key Capabilities + +