From ab962bfd046d2873b932fdbf7de96ba794bde9ef Mon Sep 17 00:00:00 2001 From: perf3ct Date: Sat, 16 Aug 2025 04:53:48 +0000 Subject: [PATCH] feat(docs): add mkdocs requirements feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again feat(mkdocs): try mkdocs again --- .github/workflows/deploy-docs.yml | 153 ++++++++ .gitignore | 12 +- README.md | 2 + docs/README.md | 106 ++++++ docs/dev/ERROR_SYSTEM.md | 5 +- docs/dev/README.md | 1 - docs/dev/architecture.md | 2 +- docs/dev/development.md | 2 +- docs/getting-started/configuration.md | 506 ++++++++++++++++++++++++++ docs/getting-started/installation.md | 500 +++++++++++++++++++++++++ docs/getting-started/quickstart.md | 252 +++++++++++++ docs/guide/overview.md | 192 ++++++++++ docs/index.md | 136 +++++++ docs/javascripts/extra.js | 97 +++++ docs/sources-guide.md | 2 +- docs/stylesheets/extra.css | 119 ++++++ mkdocs.yml | 250 +++++++++++++ requirements.txt | 20 + 18 files changed, 2348 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/deploy-docs.yml create mode 100644 docs/README.md create mode 100644 docs/getting-started/configuration.md create mode 100644 docs/getting-started/installation.md create mode 100644 docs/getting-started/quickstart.md create mode 100644 docs/guide/overview.md create mode 100644 docs/index.md create mode 100644 docs/javascripts/extra.js create mode 100644 docs/stylesheets/extra.css create mode 100644 mkdocs.yml create mode 100644 requirements.txt diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 0000000..28ac08d --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,153 @@ +# GitHub Actions workflow for deploying MkDocs documentation to Cloudflare Pages +# This workflow builds and deploys your MkDocs site when changes are pushed to main +name: Deploy MkDocs Documentation + +on: + # Trigger on push to main branch + push: + branches: + - main + # Only run when docs files change + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'requirements.txt' + - '.github/workflows/deploy-docs.yml' + + # Allow manual triggering from Actions tab + workflow_dispatch: + + # Run on pull requests for preview deployments + pull_request: + branches: + - main + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'requirements.txt' + - '.github/workflows/deploy-docs.yml' + +jobs: + build-and-deploy: + name: Build and Deploy MkDocs + runs-on: ubuntu-latest + timeout-minutes: 10 + + # Required permissions for deployment + permissions: + contents: read + deployments: write + pull-requests: write # For PR preview comments + id-token: write # For OIDC authentication (if needed) + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for git info and mkdocs-git-revision-date plugin + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: 'requirements.txt' + + - name: Install MkDocs and Dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + env: + PIP_DISABLE_PIP_VERSION_CHECK: 1 + + - name: Build MkDocs Site + run: | + # Build with strict mode but ignore the expected README.md warning + # MkDocs always warns when README.md exists alongside index.md + mkdocs build --strict --verbose || { + EXIT_CODE=$? + # Check if the only issue is the README.md conflict + if mkdocs build --strict 2>&1 | grep -q "WARNING.*README.md.*conflicts with.*index.md" && \ + [ $(mkdocs build --strict 2>&1 | grep -c "WARNING") -eq 1 ]; then + echo "βœ… Build succeeded with expected README.md warning" + mkdocs build --verbose + else + echo "❌ Build failed with unexpected errors" + exit $EXIT_CODE + fi + } + + - name: Validate Built Site + run: | + # Basic validation that important files exist + test -f site/index.html || (echo "ERROR: site/index.html not found" && exit 1) + test -f site/sitemap.xml || (echo "ERROR: site/sitemap.xml not found" && exit 1) + test -d site/assets || (echo "ERROR: site/assets directory not found" && exit 1) + echo "βœ… Site validation passed" + + # Deploy using Wrangler (recommended by Cloudflare) + - name: Deploy to Cloudflare Pages + id: deploy + if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + uses: cloudflare/wrangler-action@v3 + with: + command: pages deploy site --project-name=readur-docs --branch=${{ github.ref_name }} + + # Deploy preview for PRs + - name: Deploy Preview to Cloudflare Pages + id: preview-deployment + if: github.event_name == 'pull_request' + env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + uses: cloudflare/wrangler-action@v3 + with: + command: pages deploy site --project-name=readur-docs --branch=pr-${{ github.event.pull_request.number }} + + # Post deployment URL as PR comment + - name: Comment PR with Preview URL + if: github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const prNumber = context.issue.number; + // Construct preview URL based on Cloudflare Pages pattern + // Note: Actual URL may vary based on Cloudflare configuration + const previewUrl = `https://pr-${prNumber}.readur-docs.pages.dev`; + const mainUrl = 'https://readur.app'; + + // Check if we already commented + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber + }); + + const botComment = comments.data.find(comment => + comment.user.type === 'Bot' && + comment.body.includes('Documentation preview is ready') + ); + + const commentBody = `πŸ“š Documentation preview is ready!\n\nπŸ”— Preview URL: ${previewUrl}\nπŸ“– Production URL: ${mainUrl}\n\nβœ… All checks passed\n\n_This preview will be updated automatically with new commits._`; + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: commentBody + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + issue_number: prNumber, + owner: context.repo.owner, + repo: context.repo.repo, + body: commentBody + }); + } \ No newline at end of file diff --git a/.gitignore b/.gitignore index ef18b05..38550a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,22 @@ +# Rust target/ + +# Node client/node_modules/ node_modules/ .env assets/ frontend/dist/ -.claude/settings.local.json # This file is used to store the local Claude settings. + +# Python +venv/ +site/ + +# Testing readur_uploads/ readur_watch/ test-results/ uploads/ + +# Misc. .claude/settings.local.json diff --git a/README.md b/README.md index 5b29428..35c2069 100644 --- a/README.md +++ b/README.md @@ -140,3 +140,5 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file **Made with ❀️ and β˜• by the Readur team** + + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..8e29c7e --- /dev/null +++ b/docs/README.md @@ -0,0 +1,106 @@ +# Readur Documentation + +This directory contains the source files for the Readur documentation site, built with MkDocs and Material for MkDocs. + +## Local Development + +### Prerequisites + +- Python 3.8+ +- pip + +### Setup + +1. Install dependencies: +```bash +pip install -r ../requirements.txt +``` + +2. Start the development server: +```bash +mkdocs serve +``` + +The documentation will be available at `http://localhost:8000`. + +### Building + +To build the static site: +```bash +mkdocs build +``` + +The built site will be in the `site/` directory. + +## Deployment + +The documentation is automatically deployed to [readur.app](https://readur.app) via GitHub Actions when changes are pushed to the main branch. + +### Manual Deployment + +If you need to deploy manually: + +1. Build the site: +```bash +mkdocs build +``` + +2. Deploy to Cloudflare Pages: +```bash +wrangler pages deploy site --project-name=readur-docs +``` + +## Structure + +- `docs/` - Documentation source files (Markdown) +- `mkdocs.yml` - MkDocs configuration +- `requirements.txt` - Python dependencies +- `overrides/` - Theme customizations +- `stylesheets/` - Custom CSS +- `javascripts/` - Custom JavaScript + +## Writing Documentation + +### Adding New Pages + +1. Create a new `.md` file in the appropriate directory +2. Add the page to the navigation in `mkdocs.yml` +3. Use Material for MkDocs features for rich content + +### Markdown Extensions + +We use several markdown extensions for enhanced functionality: + +- **Admonitions** - For notes, warnings, tips +- **Code blocks** - With syntax highlighting +- **Tabs** - For grouped content +- **Tables** - For structured data +- **Emoji** - For visual elements + +Example: +```markdown +!!! note "Important" + This is an important note. + +=== "Tab 1" + Content for tab 1 + +=== "Tab 2" + Content for tab 2 +``` + +## Contributing + +Please follow these guidelines when contributing to the documentation: + +1. Use clear, concise language +2. Include code examples where appropriate +3. Test all links and code samples +4. Run `mkdocs build --strict` before submitting +5. Update the navigation in `mkdocs.yml` for new pages + +## Resources + +- [MkDocs Documentation](https://www.mkdocs.org/) +- [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) +- [Markdown Guide](https://www.markdownguide.org/) \ No newline at end of file diff --git a/docs/dev/ERROR_SYSTEM.md b/docs/dev/ERROR_SYSTEM.md index 687dffd..ba45a7d 100644 --- a/docs/dev/ERROR_SYSTEM.md +++ b/docs/dev/ERROR_SYSTEM.md @@ -612,7 +612,4 @@ Planned improvements to the error system: ## References -- [Error Management Documentation](./ERROR_MANAGEMENT.md) -- [API Error Response Standards](../api-reference.md#error-responses) -- [Frontend Error Handling Guide](../../frontend/ERROR_HANDLING.md) -- [Monitoring and Observability](./MONITORING.md) \ No newline at end of file +- [API Reference](../api-reference.md) \ No newline at end of file diff --git a/docs/dev/README.md b/docs/dev/README.md index 911cd16..9ebd3cc 100644 --- a/docs/dev/README.md +++ b/docs/dev/README.md @@ -34,7 +34,6 @@ This directory contains technical documentation for developers working on Readur - [Configuration Reference](../configuration-reference.md) - Complete configuration options - [User Guide](../user-guide.md) - How to use Readur features - [API Reference](../api-reference.md) - REST API documentation -- [New Features in 2.5.4](../new-features-2.5.4.md) - Latest features and improvements ## 🀝 Contributing diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md index 9a3f644..ea6a280 100644 --- a/docs/dev/architecture.md +++ b/docs/dev/architecture.md @@ -344,7 +344,7 @@ Potential service boundaries: ## Next Steps -- Review [deployment options](deployment.md) +- Review [deployment options](../deployment.md) - Explore [performance tuning](OCR_OPTIMIZATION_GUIDE.md) - Understand [database design](DATABASE_GUARDRAILS.md) - Learn about [testing strategy](TESTING.md) \ No newline at end of file diff --git a/docs/dev/development.md b/docs/dev/development.md index 5f7ff1a..f3c986b 100644 --- a/docs/dev/development.md +++ b/docs/dev/development.md @@ -268,7 +268,7 @@ Style preferences: ## Contributing -We welcome contributions! Please see our [Contributing Guide](../CONTRIBUTING.md) for details. +We welcome contributions! ### Getting Started diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 0000000..29a8b38 --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,506 @@ +# Configuration Guide + +Configure Readur for your specific needs and optimize for your workload. + +## Configuration Overview + +Readur uses environment variables for configuration, making it easy to deploy in containerized environments. Configuration can be set through: + +1. **Environment variables** - Direct system environment +2. **`.env` file** - Docker Compose automatically loads this +3. **`docker-compose.yml`** - Directly in the compose file +4. **Kubernetes ConfigMaps** - For K8s deployments + +## Essential Configuration + +### Security Settings + +These MUST be changed from defaults in production: + +```bash +# Generate secure secrets +JWT_SECRET=$(openssl rand -base64 32) +DB_PASSWORD=$(openssl rand -base64 32) + +# Set admin password +ADMIN_PASSWORD=your_secure_password_here + +# Enable HTTPS (reverse proxy recommended) +FORCE_HTTPS=true +SECURE_COOKIES=true +``` + +### Database Configuration + +```bash +# PostgreSQL connection +DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur + +# Connection pool settings +DB_POOL_SIZE=20 +DB_MAX_OVERFLOW=40 +DB_POOL_TIMEOUT=30 + +# PostgreSQL specific optimizations +POSTGRES_SHARED_BUFFERS=256MB +POSTGRES_EFFECTIVE_CACHE_SIZE=1GB +``` + +### Storage Configuration + +#### Local Storage (Default) + +```bash +# File storage paths +UPLOAD_PATH=/app/uploads +TEMP_PATH=/app/temp + +# Size limits +MAX_FILE_SIZE_MB=50 +TOTAL_STORAGE_LIMIT_GB=100 + +# File types +ALLOWED_FILE_TYPES=pdf,png,jpg,jpeg,tiff,bmp,gif,txt,rtf,doc,docx +``` + +#### S3 Storage (Scalable) + +```bash +# Enable S3 backend +STORAGE_BACKEND=s3 +S3_ENABLED=true + +# AWS S3 +S3_BUCKET_NAME=readur-documents +S3_REGION=us-east-1 +AWS_ACCESS_KEY_ID=your_access_key +AWS_SECRET_ACCESS_KEY=your_secret_key + +# Or S3-compatible (MinIO, Wasabi, etc.) +S3_ENDPOINT=https://s3.example.com +S3_PATH_STYLE=true # For MinIO +``` + +## OCR Configuration + +### Language Settings + +```bash +# Single language (fastest) +OCR_LANGUAGE=eng + +# Multiple languages +OCR_LANGUAGE=eng+deu+fra+spa + +# Available languages (partial list): +# eng - English +# deu - German (Deutsch) +# fra - French (FranΓ§ais) +# spa - Spanish (EspaΓ±ol) +# ita - Italian (Italiano) +# por - Portuguese +# rus - Russian +# chi_sim - Chinese Simplified +# jpn - Japanese +# ara - Arabic +``` + +### Performance Tuning + +```bash +# Concurrent processing +CONCURRENT_OCR_JOBS=4 # Match CPU cores +OCR_WORKER_THREADS=2 # Threads per job + +# Timeouts and limits +OCR_TIMEOUT_SECONDS=300 +OCR_MAX_PAGES=500 +MAX_FILE_SIZE_MB=100 + +# Memory management +OCR_MEMORY_LIMIT_MB=512 # Per job +ENABLE_MEMORY_PROFILING=false + +# Processing options +OCR_DPI=300 # Higher = better quality, slower +ENABLE_PREPROCESSING=true +ENABLE_AUTO_ROTATION=true +ENABLE_DESKEW=true +``` + +### Quality vs Speed + +#### High Quality (Slow) +```bash +OCR_QUALITY_PRESET=high +OCR_DPI=300 +ENABLE_PREPROCESSING=true +ENABLE_DESKEW=true +ENABLE_AUTO_ROTATION=true +OCR_ENGINE_MODE=3 # LSTM only +``` + +#### Balanced (Default) +```bash +OCR_QUALITY_PRESET=balanced +OCR_DPI=200 +ENABLE_PREPROCESSING=true +ENABLE_DESKEW=false +ENABLE_AUTO_ROTATION=true +OCR_ENGINE_MODE=2 # LSTM + Legacy +``` + +#### Fast (Lower Quality) +```bash +OCR_QUALITY_PRESET=fast +OCR_DPI=150 +ENABLE_PREPROCESSING=false +ENABLE_DESKEW=false +ENABLE_AUTO_ROTATION=false +OCR_ENGINE_MODE=0 # Legacy only +``` + +## Source Synchronization + +### Watch Folders + +```bash +# Global watch folder +WATCH_FOLDER=/app/watch +WATCH_INTERVAL_SECONDS=60 +FILE_STABILITY_CHECK_MS=2000 + +# Per-user watch folders +ENABLE_PER_USER_WATCH=true +USER_WATCH_BASE_DIR=/app/user_watch + +# Processing rules +WATCH_PROCESS_HIDDEN_FILES=false +WATCH_RECURSIVE=true +WATCH_MAX_DEPTH=5 +DELETE_AFTER_IMPORT=false +``` + +### WebDAV Sources + +```bash +# Default WebDAV settings +WEBDAV_TIMEOUT_SECONDS=30 +WEBDAV_MAX_RETRIES=3 +WEBDAV_CHUNK_SIZE_MB=10 +WEBDAV_VERIFY_SSL=true +``` + +### S3 Sources + +```bash +# S3 sync settings +S3_SYNC_INTERVAL_MINUTES=30 +S3_BATCH_SIZE=100 +S3_MULTIPART_THRESHOLD_MB=100 +S3_CONCURRENT_DOWNLOADS=4 +``` + +## Authentication & Security + +### Local Authentication + +```bash +# Password policy +PASSWORD_MIN_LENGTH=12 +PASSWORD_REQUIRE_UPPERCASE=true +PASSWORD_REQUIRE_NUMBERS=true +PASSWORD_REQUIRE_SPECIAL=true + +# Session management +SESSION_TIMEOUT_MINUTES=60 +REMEMBER_ME_DURATION_DAYS=30 +MAX_LOGIN_ATTEMPTS=5 +LOCKOUT_DURATION_MINUTES=15 +``` + +### OIDC/SSO Configuration + +```bash +# Enable OIDC +OIDC_ENABLED=true + +# Provider configuration +OIDC_ISSUER=https://login.microsoftonline.com/tenant-id/v2.0 +OIDC_CLIENT_ID=your-client-id +OIDC_CLIENT_SECRET=your-client-secret +OIDC_REDIRECT_URI=https://readur.example.com/auth/callback + +# Optional settings +OIDC_SCOPE=openid profile email +OIDC_USER_CLAIM=email +OIDC_GROUPS_CLAIM=groups +OIDC_ADMIN_GROUP=readur-admins + +# Auto-provisioning +OIDC_AUTO_CREATE_USERS=true +OIDC_DEFAULT_ROLE=user +``` + +## Search Configuration + +### Search Engine + +```bash +# PostgreSQL Full-Text Search settings +SEARCH_LANGUAGE=english +SEARCH_RANKING_NORMALIZATION=32 +ENABLE_PHRASE_SEARCH=true +ENABLE_FUZZY_SEARCH=true +FUZZY_SEARCH_DISTANCE=2 + +# Search results +SEARCH_RESULTS_PER_PAGE=20 +SEARCH_SNIPPET_LENGTH=200 +SEARCH_HIGHLIGHT_TAG=mark +``` + +### Search Performance + +```bash +# Index management +AUTO_REINDEX=true +REINDEX_SCHEDULE=0 3 * * * # 3 AM daily +SEARCH_CACHE_TTL_SECONDS=300 +SEARCH_CACHE_SIZE_MB=100 + +# Query optimization +MAX_SEARCH_TERMS=10 +ENABLE_SEARCH_SUGGESTIONS=true +SUGGESTION_MIN_LENGTH=3 +``` + +## Monitoring & Logging + +### Logging Configuration + +```bash +# Log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO +LOG_FORMAT=json # or text + +# Log outputs +LOG_TO_FILE=true +LOG_FILE_PATH=/app/logs/readur.log +LOG_FILE_MAX_SIZE_MB=100 +LOG_FILE_BACKUP_COUNT=10 + +# Detailed logging +LOG_SQL_QUERIES=false +LOG_HTTP_REQUESTS=true +LOG_OCR_DETAILS=false +``` + +### Health Monitoring + +```bash +# Health check endpoints +HEALTH_CHECK_ENABLED=true +HEALTH_CHECK_PATH=/health +METRICS_ENABLED=true +METRICS_PATH=/metrics + +# Alerting thresholds +ALERT_QUEUE_SIZE=100 +ALERT_OCR_FAILURE_RATE=0.1 +ALERT_DISK_USAGE_PERCENT=80 +ALERT_MEMORY_USAGE_PERCENT=90 +``` + +## Performance Optimization + +### System Resources + +```bash +# Memory limits +MEMORY_LIMIT_MB=2048 +MEMORY_SOFT_LIMIT_MB=1536 + +# CPU settings +CPU_CORES=4 +WORKER_PROCESSES=auto # or specific number +WORKER_THREADS=2 + +# Connection limits +MAX_CONNECTIONS=100 +CONNECTION_TIMEOUT=30 +``` + +### Caching + +```bash +# Enable caching layers +ENABLE_CACHE=true +CACHE_TYPE=redis # or memory + +# Redis cache (if used) +REDIS_URL=redis://redis:6379/0 +REDIS_MAX_CONNECTIONS=50 + +# Cache TTLs +DOCUMENT_CACHE_TTL=3600 +SEARCH_CACHE_TTL=300 +USER_CACHE_TTL=1800 +``` + +### Queue Management + +```bash +# Background job processing +QUEUE_TYPE=database # or redis +MAX_QUEUE_SIZE=1000 +QUEUE_POLL_INTERVAL=5 + +# Job priorities +OCR_JOB_PRIORITY=5 +SYNC_JOB_PRIORITY=3 +CLEANUP_JOB_PRIORITY=1 + +# Retry configuration +MAX_JOB_RETRIES=3 +RETRY_DELAY_SECONDS=60 +EXPONENTIAL_BACKOFF=true +``` + +## Environment-Specific Configurations + +### Development + +```bash +# .env.development +DEBUG=true +LOG_LEVEL=DEBUG +RELOAD_ON_CHANGE=true +CONCURRENT_OCR_JOBS=1 +DISABLE_RATE_LIMITING=true +``` + +### Staging + +```bash +# .env.staging +DEBUG=false +LOG_LEVEL=INFO +CONCURRENT_OCR_JOBS=2 +ENABLE_PROFILING=true +MOCK_EXTERNAL_SERVICES=true +``` + +### Production + +```bash +# .env.production +DEBUG=false +LOG_LEVEL=WARNING +CONCURRENT_OCR_JOBS=8 +ENABLE_RATE_LIMITING=true +SECURE_COOKIES=true +FORCE_HTTPS=true +``` + +## Configuration Validation + +### Check Configuration + +```bash +# Validate current configuration +docker exec readur python validate_config.py + +# Test specific settings +docker exec readur python -c " +from config import settings +print(f'OCR Languages: {settings.OCR_LANGUAGE}') +print(f'Storage Backend: {settings.STORAGE_BACKEND}') +print(f'Max File Size: {settings.MAX_FILE_SIZE_MB}MB') +" +``` + +### Common Validation Errors + +```bash +# Missing required S3 credentials +ERROR: S3_ENABLED=true but S3_BUCKET_NAME not set + +# Invalid language code +ERROR: OCR_LANGUAGE 'xyz' not supported + +# Insufficient resources +WARNING: CONCURRENT_OCR_JOBS=8 but only 2 CPU cores available +``` + +## Configuration Best Practices + +### Security + +1. **Never commit secrets** - Use `.env` files and add to `.gitignore` +2. **Rotate secrets regularly** - Especially JWT_SECRET +3. **Use strong passwords** - Minimum 16 characters for admin +4. **Enable HTTPS** - Always in production +5. **Restrict file types** - Only allow necessary formats + +### Performance + +1. **Match workers to cores** - CONCURRENT_OCR_JOBS ≀ CPU cores +2. **Monitor memory usage** - Adjust limits based on usage +3. **Use S3 for scale** - Local storage limited by disk +4. **Enable caching** - Reduces database load +5. **Tune PostgreSQL** - Adjust shared_buffers and work_mem + +### Reliability + +1. **Set reasonable timeouts** - Prevent hanging jobs +2. **Configure retries** - Handle transient failures +3. **Enable health checks** - For load balancer integration +4. **Set up logging** - Essential for troubleshooting +5. **Regular backups** - Automate database backups + +## Configuration Examples + +### Small Office (5-10 users) + +```bash +# Minimal resources, local storage +CONCURRENT_OCR_JOBS=2 +MEMORY_LIMIT_MB=1024 +STORAGE_BACKEND=local +MAX_FILE_SIZE_MB=20 +SEARCH_CACHE_TTL=600 +``` + +### Medium Business (50-100 users) + +```bash +# Balanced performance, S3 storage +CONCURRENT_OCR_JOBS=4 +MEMORY_LIMIT_MB=4096 +STORAGE_BACKEND=s3 +MAX_FILE_SIZE_MB=50 +ENABLE_CACHE=true +CACHE_TYPE=redis +``` + +### Enterprise (500+ users) + +```bash +# High performance, full features +CONCURRENT_OCR_JOBS=16 +MEMORY_LIMIT_MB=16384 +STORAGE_BACKEND=s3 +MAX_FILE_SIZE_MB=100 +ENABLE_CACHE=true +CACHE_TYPE=redis +QUEUE_TYPE=redis +OIDC_ENABLED=true +``` + +## Next Steps + +- [Installation Guide](installation.md) - Deploy Readur +- [User Guide](../user-guide.md) - Learn the interface +- [API Reference](../api-reference.md) - Integrate with Readur +- [Deployment Guide](../deployment.md) - Production setup \ No newline at end of file diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..095c64e --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,500 @@ +# Installation Guide + +Deploy Readur document management system with OCR capabilities using Docker. + +## Prerequisites + +### System Requirements + +#### Minimum Requirements +- **CPU**: 2 cores (x86_64 or ARM64) +- **RAM**: 4GB (system) + 1GB per concurrent OCR job +- **Storage**: 10GB for application + space for documents +- **OS**: Linux, macOS, or Windows with Docker support + +#### Recommended for Production +- **CPU**: 4+ cores for parallel OCR processing +- **RAM**: 8GB minimum, 16GB for heavy workloads +- **Storage**: SSD for database, adequate space for document growth +- **Network**: Stable connection for source synchronization + +### Software Requirements + +```bash +# Check Docker version (20.10+ required) +docker --version + +# Check Docker Compose version (2.0+ required) +docker-compose --version + +# Verify Docker is running +docker ps +``` + +## Installation Methods + +### Quick Start with Docker Compose (Recommended) + +#### 1. Clone the Repository + +```bash +# Clone the repository +git clone https://github.com/readur/readur.git +cd readur + +# Review the configuration +cat docker-compose.yml +``` + +#### 2. Configure Environment + +Create a `.env` file with your settings: + +```bash +# Security - CHANGE THESE! +JWT_SECRET=$(openssl rand -base64 32) +DB_PASSWORD=$(openssl rand -base64 32) +ADMIN_PASSWORD=your_secure_password_here + +# OCR Configuration +OCR_LANGUAGE=eng # or: deu, fra, spa, etc. +CONCURRENT_OCR_JOBS=2 + +# Storage Paths (create these directories) +UPLOAD_PATH=./data/uploads +WATCH_FOLDER=./data/watch + +# Optional: S3 Storage (instead of local) +# STORAGE_BACKEND=s3 +# S3_BUCKET=readur-documents +# S3_REGION=us-east-1 +# AWS_ACCESS_KEY_ID=your_key +# AWS_SECRET_ACCESS_KEY=your_secret +``` + +#### 3. Create Required Directories + +```bash +# Create data directories +mkdir -p data/{uploads,watch,postgres} + +# Set appropriate permissions +chmod 755 data/uploads data/watch +``` + +#### 4. Start the Application + +```bash +# Start all services +docker-compose up -d + +# Monitor startup logs +docker-compose logs -f + +# Wait for "Server started on 0.0.0.0:8000" +``` + +#### 5. Verify Installation + +```bash +# Check service health +docker-compose ps + +# Test the API endpoint +curl http://localhost:8000/health + +# Expected response: +# {"status":"healthy","database":"connected","ocr":"ready"} +``` + +### Production Deployment with Custom Configuration + +#### 1. Create Production Compose File + +Create `docker-compose.prod.yml`: + +```yaml +services: + readur: + image: readur:latest + ports: + - "8000:8000" + environment: + - DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur + - JWT_SECRET=${JWT_SECRET} + - SERVER_ADDRESS=0.0.0.0:8000 + - UPLOAD_PATH=/app/uploads + - CONCURRENT_OCR_JOBS=4 + - MAX_FILE_SIZE_MB=100 + volumes: + - ./data/uploads:/app/uploads + - /mnt/shared/documents:/app/watch:ro + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + deploy: + resources: + limits: + memory: 2G + cpus: '2.0' + + postgres: + image: postgres:15-alpine + environment: + - POSTGRES_USER=readur + - POSTGRES_PASSWORD=${DB_PASSWORD} + - POSTGRES_DB=readur + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U readur"] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + +volumes: + postgres_data: +``` + +#### 2. Deploy with Production Settings + +```bash +# Use production configuration +docker-compose -f docker-compose.prod.yml up -d + +# Enable automatic startup +sudo systemctl enable docker +``` + +### Kubernetes Deployment + +#### Using Helm Chart + +```bash +# Add Readur Helm repository +helm repo add readur https://charts.readur.app +helm repo update + +# Install with custom values +helm install readur readur/readur \ + --set image.tag=latest \ + --set postgresql.auth.password=$DB_PASSWORD \ + --set auth.jwtSecret=$JWT_SECRET \ + --set persistence.size=50Gi \ + --set ingress.enabled=true \ + --set ingress.hostname=readur.example.com +``` + +#### Using Raw Manifests + +```bash +# Apply Kubernetes manifests +kubectl create namespace readur +kubectl apply -f https://raw.githubusercontent.com/readur/readur/main/k8s/ + +# Check deployment status +kubectl -n readur get pods +kubectl -n readur get svc +``` + +### Docker Run (Development Only) + +For quick testing without persistence: + +```bash +# Run with in-memory database (data lost on restart) +docker run -d \ + --name readur \ + -p 8000:8000 \ + -e DATABASE_URL=sqlite:///tmp/readur.db \ + -e JWT_SECRET=dev-only-secret \ + readur:latest + +# Access logs +docker logs -f readur +``` + +## Post-Installation Setup + +### Initial Login + +1. **Access the Web Interface** + ``` + http://localhost:8000 + ``` + +2. **Login with Default Credentials** + - Username: `admin` + - Password: `readur2024` + + ⚠️ **Security**: Change the admin password immediately after first login + +3. **Change Admin Password** + - Navigate to Settings β†’ User Management + - Click on admin user + - Set a strong password + - Save changes + +### Essential Configuration + +#### 1. Configure OCR Languages + +```bash +# Check available languages +docker exec readur tesseract --list-langs + +# Add additional language packs if needed +docker exec readur apt-get update +docker exec readur apt-get install -y tesseract-ocr-deu # German +docker exec readur apt-get install -y tesseract-ocr-fra # French +docker exec readur apt-get install -y tesseract-ocr-spa # Spanish +``` + +#### 2. Set Up Document Sources + +1. Navigate to Settings β†’ Sources +2. Add your document sources: + - **Local Folders**: Mount volumes in docker-compose.yml + - **WebDAV**: Configure Nextcloud/ownCloud connections + - **S3 Buckets**: Add AWS S3 or compatible storage + +#### 3. Configure User Authentication + +**For Local Users:** +- Settings β†’ User Management β†’ Create User +- Assign appropriate roles (User or Admin) + +**For SSO/OIDC:** +```bash +# Add to your .env file +OIDC_ENABLED=true +OIDC_ISSUER=https://auth.example.com +OIDC_CLIENT_ID=readur-client +OIDC_CLIENT_SECRET=your-secret +``` + +#### 4. Adjust Performance Settings + +```bash +# Edit .env for your workload +CONCURRENT_OCR_JOBS=4 # Increase for faster processing +OCR_TIMEOUT_SECONDS=300 # Increase for large documents +MAX_FILE_SIZE_MB=100 # Adjust based on your documents +MEMORY_LIMIT_MB=2048 # Increase for better performance +``` + +## Verification & Health Checks + +### Service Health + +```bash +# Check all services are running +docker-compose ps + +# Expected output: +NAME STATUS PORTS +readur running (healthy) 0.0.0.0:8000->8000/tcp +postgres running (healthy) 5432/tcp +``` + +### API Health Check + +```bash +# Test the health endpoint +curl -s http://localhost:8000/health | jq + +# Expected response: +{ + "status": "healthy", + "version": "2.5.4", + "database": "connected", + "ocr_service": "ready", + "storage": "available", + "queue_size": 0 +} +``` + +### Database Connectivity + +```bash +# Test database connection +docker exec readur-postgres psql -U readur -c "SELECT version();" + +# Check tables were created +docker exec readur-postgres psql -U readur -d readur -c "\dt" +``` + +### OCR Functionality + +```bash +# Test OCR engine +docker exec readur tesseract --version + +# Upload a test document +curl -X POST http://localhost:8000/api/upload \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -F "file=@test.pdf" +``` + +## Troubleshooting Installation + +### Common Issues and Solutions + +#### Port Already in Use + +```bash +# Check what's using port 8000 +sudo lsof -i :8000 + +# Solution 1: Stop the conflicting service +sudo systemctl stop conflicting-service + +# Solution 2: Use a different port +# Edit docker-compose.yml: +ports: + - "8080:8000" # Change 8080 to your preferred port +``` + +#### Database Connection Failed + +```bash +# Check PostgreSQL logs +docker-compose logs postgres + +# Common fixes: +# 1. Ensure PostgreSQL is fully started +docker-compose restart postgres +sleep 10 +docker-compose restart readur + +# 2. Reset database (WARNING: Deletes all data) +docker-compose down -v +docker-compose up -d +``` + +#### OCR Processing Stuck + +```bash +# Check OCR queue status +curl http://localhost:8000/api/admin/queue/status + +# Restart OCR workers +docker-compose restart readur + +# Increase timeout for large files +# Add to .env: +OCR_TIMEOUT_SECONDS=600 +``` + +#### Docker Permission Denied + +```bash +# Linux: Add user to docker group +sudo usermod -aG docker $USER +newgrp docker + +# Verify docker access +docker ps +``` + +#### Insufficient Memory + +```bash +# Check container memory usage +docker stats readur + +# Increase memory limits in docker-compose.yml: +deploy: + resources: + limits: + memory: 4G # Increase as needed +``` + +### Getting Help + +1. **Check Logs** + ```bash + # Application logs + docker-compose logs -f readur + + # Database logs + docker-compose logs -f postgres + ``` + +2. **Enable Debug Mode** + ```bash + # Add to .env + LOG_LEVEL=DEBUG + + # Restart services + docker-compose restart + ``` + +3. **Community Support** + - [GitHub Issues](https://github.com/readur/readur/issues) + - [Documentation](https://docs.readur.app) + - [Discord Community](https://discord.gg/readur) + +## Next Steps + +### Essential Reading + +1. **[User Guide](../user-guide.md)** + - Upload and manage documents + - Configure OCR processing + - Master search features + - Organize with labels + +2. **[Configuration Reference](../configuration-reference.md)** + - Complete environment variable list + - Performance tuning + - Storage configuration + - Security settings + +3. **[Deployment Guide](../deployment.md)** + - SSL/TLS setup with reverse proxy + - Backup and restore procedures + - Monitoring and alerts + - Scaling strategies + +### Advanced Setup + +4. **[Sources Guide](../sources-guide.md)** + - WebDAV integration + - S3 bucket synchronization + - Watch folder configuration + - Automated imports + +5. **[OIDC Setup](../oidc-setup.md)** + - Enterprise SSO integration + - Azure AD configuration + - Google Workspace setup + - Keycloak integration + +6. **[API Reference](../api-reference.md)** + - REST API endpoints + - Authentication + - Automation examples + - Webhook integration + +### Quick Test + +Upload your first document: + +```bash +# 1. Login to get token +TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"readur2024"}' | jq -r .token) + +# 2. Upload a PDF +curl -X POST http://localhost:8000/api/documents/upload \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@sample.pdf" + +# 3. Check OCR status +curl -H "Authorization: Bearer $TOKEN" \ + http://localhost:8000/api/documents +``` \ No newline at end of file diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..39be212 --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,252 @@ +# Quick Start Guide + +Get Readur running and process your first documents in under 5 minutes. + +## Prerequisites + +Ensure you have Docker and Docker Compose installed: +```bash +docker --version # Should be 20.10+ +docker-compose --version # Should be 2.0+ +``` + +## 5-Minute Setup + +### Step 1: Get Readur + +```bash +# Clone and enter the repository +git clone https://github.com/readur/readur.git +cd readur +``` + +### Step 2: Start Services + +```bash +# Start with default configuration +docker-compose up -d + +# Watch the logs (optional) +docker-compose logs -f +``` + +Wait about 30 seconds for services to initialize. + +### Step 3: Access the Interface + +Open your browser and navigate to: +``` +http://localhost:8000 +``` + +Login with default credentials: +- **Username**: `admin` +- **Password**: `readur2024` + +### Step 4: Upload Your First Document + +#### Via Web Interface + +1. Click the **Upload** button in the top navigation +2. Drag and drop a PDF or image file +3. Click **Upload** to start processing +4. Wait for the OCR indicator to turn green + +#### Via API (Optional) + +```bash +# Get authentication token +TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \ + -H "Content-Type: application/json" \ + -d '{"username":"admin","password":"readur2024"}' | jq -r .token) + +# Upload a document +curl -X POST http://localhost:8000/api/documents/upload \ + -H "Authorization: Bearer $TOKEN" \ + -F "file=@your-document.pdf" +``` + +### Step 5: Search Your Documents + +Once OCR processing completes (green indicator): + +1. Use the **Search** bar at the top +2. Enter any text from your document +3. Press Enter to see results +4. Click on a result to view the document + +## Common First Tasks + +### Change Admin Password + +**Important**: Do this immediately after installation. + +1. Navigate to **Settings** β†’ **User Management** +2. Click on the admin user +3. Enter a new secure password +4. Click **Save** + +### Add Your First Source + +Automatically import documents from external storage: + +1. Go to **Settings** β†’ **Sources** +2. Click **Add Source** +3. Choose your source type: + - **Local Folder**: For directories on the server + - **WebDAV**: For Nextcloud/ownCloud + - **S3**: For cloud storage +4. Configure connection details +5. Test and save + +### Create Document Labels + +Organize your documents with labels: + +1. Navigate to **Settings** β†’ **Labels** +2. Click **Create Label** +3. Enter a name and choose a color +4. Save the label +5. Apply to documents via: + - Document details page + - Bulk selection + - During upload + +### Set Up Watch Folder + +Monitor a directory for automatic document import: + +```bash +# Create a watch directory +mkdir -p ./data/watch + +# Add to docker-compose.yml volumes: +volumes: + - ./data/watch:/app/watch + +# Restart Readur +docker-compose restart readur +``` + +Drop files into `./data/watch` - they'll be automatically imported. + +## Essential Keyboard Shortcuts + +| Shortcut | Action | +|----------|--------| +| `/` or `Ctrl+K` | Focus search bar | +| `Ctrl+U` | Open upload dialog | +| `Esc` | Close dialogs | +| `G then D` | Go to documents | +| `G then S` | Go to settings | + +## Sample Workflow + +### Legal Document Management + +```bash +# 1. Create label structure +Labels: "Contracts", "Invoices", "Legal", "2024" + +# 2. Set up source folder +Source: /shared/legal-docs (WebDAV) +Sync: Every 30 minutes + +# 3. Configure OCR +Language: English +Quality: High +Concurrent Jobs: 4 + +# 4. Upload initial batch +Select all PDFs β†’ Upload β†’ Apply "2024" label + +# 5. Create saved search +Search: label:Contracts AND date:2024 +Save as: "2024 Contracts" +``` + +### Research Paper Archive + +```bash +# 1. Configure for academic documents +OCR Language: Multiple (eng+deu+fra) +Max File Size: 100MB + +# 2. Create categories +Labels: "Published", "Draft", "Review", "Citations" + +# 3. Set up automated import +Watch Folder: /research/papers +Process: Auto-OCR and label by folder + +# 4. Advanced search setup +Boolean search: enabled +Fuzzy matching: 2 (for OCR errors) +``` + +## Performance Tips + +### For Faster OCR Processing + +```bash +# Increase concurrent jobs (if you have CPU cores) +CONCURRENT_OCR_JOBS=8 + +# Optimize for your document types +OCR_LANGUAGE=eng # Single language is faster +ENABLE_PREPROCESSING=false # Skip if documents are clean +``` + +### For Large Document Collections + +```bash +# Use S3 storage instead of local +S3_ENABLED=true +S3_BUCKET_NAME=readur-docs + +# Increase memory limits +MEMORY_LIMIT_MB=4096 + +# Enable compression +ENABLE_COMPRESSION=true +``` + +## Troubleshooting Quick Fixes + +### OCR Not Starting +```bash +# Check the queue +curl http://localhost:8000/api/admin/queue/status + +# Restart OCR workers +docker-compose restart readur +``` + +### Can't Login +```bash +# Reset to default password +docker exec readur python reset_admin_password.py +``` + +### Slow Search +```bash +# Rebuild search index +docker exec readur python rebuild_index.py +``` + +## Next Steps + +Now that you have Readur running: + +1. **[Configure OCR](../multi-language-ocr-guide.md)** for your language +2. **[Set up Sources](../sources-guide.md)** for automated import +3. **[Create Labels](../labels-and-organization.md)** for organization +4. **[Learn Advanced Search](../advanced-search.md)** techniques +5. **[Configure Backups](../deployment.md#backup-strategy)** for data safety + +## Getting Help + +- **Documentation**: [Full User Guide](../user-guide.md) +- **API Reference**: [REST API Docs](../api-reference.md) +- **Community**: [GitHub Discussions](https://github.com/readur/readur/discussions) +- **Issues**: [Report Bugs](https://github.com/readur/readur/issues) \ No newline at end of file diff --git a/docs/guide/overview.md b/docs/guide/overview.md new file mode 100644 index 0000000..28b0bd1 --- /dev/null +++ b/docs/guide/overview.md @@ -0,0 +1,192 @@ +# Readur User Guide Overview + +Welcome to the comprehensive guide for using Readur's document management system. This guide covers everything from basic operations to advanced features. + +## Guide Structure + +### Getting Started +- **[Installation](../getting-started/installation.md)** - Deploy Readur with Docker +- **[Quick Start](../getting-started/quickstart.md)** - 5-minute setup guide +- **[Configuration](../getting-started/configuration.md)** - Customize your deployment + +### Core Features +- **[Document Management](../user-guide.md#document-management)** - Upload, organize, and manage documents +- **[OCR Processing](../user-guide.md#ocr-processing)** - Extract text from scanned documents +- **[Search & Discovery](../user-guide.md#search-features)** - Find information quickly +- **[Labels & Organization](../labels-and-organization.md)** - Categorize and structure content + +### Advanced Features +- **[Sources & Sync](../sources-guide.md)** - Automated document import +- **[Advanced Search](../advanced-search.md)** - Complex queries and filters +- **[User Management](../user-management-guide.md)** - Roles and permissions +- **[API Integration](../api-reference.md)** - Programmatic access + +### Administration +- **[Deployment](../deployment.md)** - Production setup and scaling +- **[Monitoring](../health-monitoring-guide.md)** - System health and metrics +- **[Backup & Recovery](../deployment.md#backup-strategy)** - Data protection +- **[Migration](../migration-guide.md)** - Upgrades and data migration + +## Quick Navigation + +### By User Type + +#### Document Users +Start here if you need to: +- Upload and organize documents +- Search for specific content +- Export and share documents + +**Key Guides:** +1. [User Guide](../user-guide.md) +2. [Search Features](../advanced-search.md) +3. [Labels Guide](../labels-and-organization.md) + +#### System Administrators +Start here if you need to: +- Deploy and configure Readur +- Manage users and permissions +- Monitor system health +- Set up integrations + +**Key Guides:** +1. [Installation](../getting-started/installation.md) +2. [Configuration](../configuration-reference.md) +3. [User Management](../user-management-guide.md) +4. [Deployment](../deployment.md) + +#### Developers +Start here if you need to: +- Integrate with the API +- Customize Readur +- Contribute to development + +**Key Guides:** +1. [API Reference](../api-reference.md) +2. [Development Setup](../dev/development.md) +3. [Architecture](../dev/architecture.md) + +### By Task + +#### Initial Setup +1. [Install Readur](../getting-started/installation.md) +2. [Configure OCR languages](../multi-language-ocr-guide.md) +3. [Set up authentication](../oidc-setup.md) +4. [Create users](../user-management-guide.md) + +#### Document Processing +1. [Upload documents](../file-upload-guide.md) +2. [Configure OCR](../user-guide.md#ocr-processing) +3. [Monitor processing](../user-guide.md#ocr-status-indicators) +4. [Troubleshoot OCR](../dev/OCR_OPTIMIZATION_GUIDE.md) + +#### Search & Organization +1. [Basic search](../user-guide.md#search-features) +2. [Advanced search syntax](../advanced-search.md) +3. [Create labels](../labels-and-organization.md) +4. [Save searches](../user-guide.md#smart-collections) + +#### Integration & Automation +1. [Set up sources](../sources-guide.md) +2. [Configure watch folders](../WATCH_FOLDER.md) +3. [Use the API](../api-reference.md) +4. [Automate workflows](../api-reference.md#automation-examples) + +## Feature Highlights + +### Document Intelligence +- **OCR in 100+ Languages**: Process documents in virtually any language +- **Format Support**: PDF, images, Office documents, and text files +- **Batch Processing**: Handle thousands of documents efficiently +- **Quality Enhancement**: Automatic rotation, deskewing, and preprocessing + +### Search Capabilities +- **Full-Text Search**: Search within document content +- **Boolean Logic**: Complex queries with AND, OR, NOT +- **Fuzzy Matching**: Handle OCR errors and typos +- **Filters**: By date, type, size, labels, and more + +### Organization Tools +- **Flexible Labels**: Create custom categorization systems +- **Bulk Operations**: Apply changes to multiple documents +- **Smart Collections**: Saved searches that update automatically +- **Multiple Views**: List and grid layouts + +### Integration Options +- **REST API**: Complete programmatic access +- **Source Sync**: WebDAV, S3, local folders +- **SSO/OIDC**: Enterprise authentication +- **Webhooks**: Event-driven automation + +## Best Practices + +### Document Organization +1. **Consistent Naming**: Use descriptive, standardized file names +2. **Label Strategy**: Create a hierarchical label structure +3. **Regular Cleanup**: Archive or remove outdated documents +4. **Folder Structure**: Organize source folders logically + +### Performance Optimization +1. **OCR Settings**: Balance quality vs. speed for your needs +2. **Concurrent Jobs**: Match to available CPU cores +3. **Storage Backend**: Use S3 for large collections +4. **Search Indexing**: Schedule reindexing during off-hours + +### Security +1. **Change Defaults**: Always change default passwords +2. **Enable HTTPS**: Use SSL/TLS in production +3. **Regular Backups**: Automate database backups +4. **Access Control**: Use roles and permissions appropriately + +### Workflow Efficiency +1. **Bulk Upload**: Process similar documents together +2. **Automation**: Set up sources for automatic import +3. **Saved Searches**: Create shortcuts for common queries +4. **Keyboard Shortcuts**: Learn shortcuts for faster navigation + +## Troubleshooting Resources + +### Common Issues +- [OCR not starting](../user-guide.md#common-issues) +- [Search not finding documents](../advanced-search.md#troubleshooting) +- [Slow performance](../dev/OCR_OPTIMIZATION_GUIDE.md) +- [Upload failures](../file-upload-guide.md#troubleshooting) + +### Getting Help +- **Documentation Search**: Use the search bar above +- **GitHub Issues**: [Report bugs](https://github.com/readur/readur/issues) +- **Community Forum**: [Ask questions](https://github.com/readur/readur/discussions) +- **System Logs**: Check logs for detailed error information + +## Version Information + +This documentation covers Readur version 2.5.4 and later. Key features in recent versions: + +### Version 2.5.4 +- S3 storage backend support +- Enhanced source synchronization +- Per-user watch directories +- Improved health monitoring + +### Version 2.5.0 +- OIDC/SSO authentication +- Advanced search operators +- Bulk operations +- Performance improvements + +## Next Steps + +### New Users +1. Start with the [Quick Start Guide](../getting-started/quickstart.md) +2. Read the [User Guide](../user-guide.md) +3. Learn about [Search Features](../advanced-search.md) + +### Administrators +1. Review [Configuration Options](../configuration-reference.md) +2. Set up [Monitoring](../health-monitoring-guide.md) +3. Plan [Backup Strategy](../deployment.md#backup-strategy) + +### Advanced Users +1. Explore [API Integration](../api-reference.md) +2. Configure [Sources](../sources-guide.md) +3. Optimize [OCR Performance](../dev/OCR_OPTIMIZATION_GUIDE.md) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..e433bb0 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,136 @@ +# Readur Documentation + +## Intelligent Document Management with OCR + +Readur is a powerful document management system that transforms your unstructured documents into a searchable, organized knowledge base. Built for teams and individuals who need to efficiently manage, search, and access large document collections. + +## Key Capabilities + +
+ +- **[Getting Started](getting-started/installation.md)** + Deploy Readur with Docker in minutes - single command setup + +- **[User Guide](user-guide.md)** + Master document management, OCR processing, and search features + +- **[API Reference](api-reference.md)** + Automate workflows with REST API integration + +- **[Deployment Guide](deployment.md)** + Production deployment with SSL, monitoring, and scaling + +
+ +## Core Features + +### Document Processing +- **Automatic OCR**: Extract text from scanned PDFs and images in 100+ languages +- **Multiple Formats**: Support for PDF, PNG, JPG, TIFF, TXT, and Office documents +- **Batch Processing**: Upload and process hundreds of documents simultaneously +- **Smart Queue**: Priority-based OCR processing with configurable concurrency + +### Search & Discovery +- **Full-Text Search**: Find content within documents instantly +- **Advanced Filtering**: Search by date, type, size, labels, and OCR status +- **Boolean Operators**: Complex queries with AND, OR, NOT logic +- **Fuzzy Matching**: Handle typos and OCR errors automatically + +### Organization +- **Flexible Labels**: Create custom taxonomies for document categorization +- **Bulk Operations**: Apply changes to multiple documents at once +- **Smart Collections**: Saved searches that update automatically +- **Multiple Views**: List and grid layouts with customizable sorting + +### Integration & Automation +- **Source Synchronization**: Auto-import from WebDAV, S3, and local folders +- **REST API**: Complete programmatic access for automation +- **Watch Folders**: Monitor directories for automatic document ingestion +- **SSO Support**: Enterprise authentication with OIDC/OAuth2 + +## Quick Start Example + +```bash +# 1. Clone the repository +git clone https://github.com/readur/readur.git +cd readur + +# 2. Start with Docker Compose +docker-compose up -d + +# 3. Access the interface +open http://localhost:8000 + +# Default credentials: admin / readur2024 +``` + +## Common Use Cases + +### Digital Archives +Convert paper documents into searchable digital archives. Perfect for: +- Legal firms managing contracts and case files +- Medical practices digitizing patient records +- Government agencies preserving historical documents +- Libraries and research institutions + +### Business Document Management +Streamline document workflows and compliance: +- Invoice and receipt processing +- Contract management and search +- Policy and procedure documentation +- Compliance document tracking + +### Personal Knowledge Base +Organize personal documents and research: +- Academic papers and research notes +- Tax documents and financial records +- Technical documentation and manuals +- Personal correspondence and archives + +## System Architecture + +```mermaid +graph LR + A[Document Upload] --> B[OCR Queue] + B --> C[Text Extraction] + C --> D[PostgreSQL Database] + D --> E[Full-Text Search] + F[Source Sync] --> B + G[Watch Folders] --> B + H[API] --> D +``` + +## Performance & Scalability + +- **Concurrent Processing**: Handle multiple OCR jobs in parallel +- **Efficient Storage**: S3-compatible backend for unlimited scaling +- **PostgreSQL**: Enterprise-grade database with full-text search +- **Docker Native**: Container-based architecture for easy deployment +- **Resource Management**: Configurable memory and CPU limits + +## Getting Help + +### Documentation +- [User Guide](user-guide.md) - Complete feature documentation +- [Configuration Reference](configuration-reference.md) - All environment variables +- [Troubleshooting](s3-troubleshooting.md) - Common issues and solutions +- [Migration Guide](migration-guide.md) - Upgrade and migration procedures + +### Community & Support +- [GitHub Issues](https://github.com/readur/readur/issues) - Bug reports and feature requests +- [GitHub Discussions](https://github.com/readur/readur/discussions) - Community help +- [Developer Documentation](dev/) - Architecture and development setup + +## Latest Updates + +### Version 2.5.4 +- S3 storage backend support for unlimited scaling +- Enhanced source synchronization with health monitoring +- Improved OCR performance and accuracy +- OIDC/SSO authentication support +- Advanced search with boolean operators + +--- + +!!! tip "Production Ready" + Readur is actively used in production environments processing millions of documents. See our [Deployment Guide](deployment.md) for best practices. \ No newline at end of file diff --git a/docs/javascripts/extra.js b/docs/javascripts/extra.js new file mode 100644 index 0000000..b549285 --- /dev/null +++ b/docs/javascripts/extra.js @@ -0,0 +1,97 @@ +// Custom JavaScript for Readur documentation + +// Add copy button to code blocks +document.addEventListener('DOMContentLoaded', function() { + // Initialize copy buttons for code blocks (if not already handled by theme) + const codeBlocks = document.querySelectorAll('pre > code'); + + codeBlocks.forEach(function(codeBlock) { + // Check if copy button already exists + if (codeBlock.parentElement.querySelector('.copy-button')) { + return; + } + + const button = document.createElement('button'); + button.className = 'copy-button'; + button.textContent = 'Copy'; + button.setAttribute('aria-label', 'Copy code to clipboard'); + + button.addEventListener('click', function() { + const code = codeBlock.textContent; + navigator.clipboard.writeText(code).then(function() { + button.textContent = 'Copied!'; + setTimeout(function() { + button.textContent = 'Copy'; + }, 2000); + }).catch(function(err) { + console.error('Failed to copy code: ', err); + }); + }); + + codeBlock.parentElement.style.position = 'relative'; + codeBlock.parentElement.appendChild(button); + }); + + // Smooth scroll for anchor links + document.querySelectorAll('a[href^="#"]').forEach(anchor => { + anchor.addEventListener('click', function(e) { + const href = this.getAttribute('href'); + if (href !== '#' && href !== '#!') { + e.preventDefault(); + const target = document.querySelector(href); + if (target) { + target.scrollIntoView({ + behavior: 'smooth', + block: 'start' + }); + } + } + }); + }); + + // Add external link indicators + const externalLinks = document.querySelectorAll('a[href^="http"]:not([href*="readur.app"])'); + externalLinks.forEach(link => { + link.setAttribute('target', '_blank'); + link.setAttribute('rel', 'noopener noreferrer'); + link.classList.add('external-link'); + }); + + // Track documentation page views (if analytics enabled) + if (typeof gtag !== 'undefined') { + gtag('event', 'page_view', { + page_title: document.title, + page_location: window.location.href, + page_path: window.location.pathname + }); + } +}); + +// Add keyboard shortcuts +document.addEventListener('keydown', function(e) { + // Ctrl/Cmd + K for search + if ((e.ctrlKey || e.metaKey) && e.key === 'k') { + e.preventDefault(); + const searchInput = document.querySelector('.md-search__input'); + if (searchInput) { + searchInput.focus(); + } + } + + // Escape to close search + if (e.key === 'Escape') { + const searchInput = document.querySelector('.md-search__input'); + if (searchInput && document.activeElement === searchInput) { + searchInput.blur(); + } + } +}); + +// Custom console message +console.log( + '%c Welcome to Readur Documentation! ', + 'background: #4051b5; color: white; padding: 5px 10px; border-radius: 3px;' +); +console.log( + 'Found an issue? Report it at https://github.com/readur/readur/issues' +); \ No newline at end of file diff --git a/docs/sources-guide.md b/docs/sources-guide.md index 92451a5..4eb632a 100644 --- a/docs/sources-guide.md +++ b/docs/sources-guide.md @@ -573,7 +573,7 @@ Sources are continuously monitored and assigned health scores (0-100): ## Next Steps -- Configure [notifications](notifications.md) for sync events +- Configure [notifications](notifications-guide.md) for sync events - Set up [advanced search](advanced-search.md) to find synced documents - Review [OCR optimization](dev/OCR_OPTIMIZATION_GUIDE.md) for processing improvements - Explore [labels and organization](labels-and-organization.md) for document management \ No newline at end of file diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..c9603fb --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,119 @@ +/* Custom styles for Readur documentation */ + +/* Brand colors */ +:root { + --readur-primary: #4051b5; + --readur-accent: #526cfe; +} + +/* Grid cards for homepage */ +.grid.cards { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); + gap: 1rem; + margin: 2rem 0; +} + +.grid.cards > * { + padding: 1rem; + border: 1px solid var(--md-default-fg-color--lightest); + border-radius: 0.5rem; + transition: all 0.3s ease; +} + +.grid.cards > *:hover { + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); + transform: translateY(-2px); +} + +/* Custom admonitions */ +.md-typeset .admonition.example, +.md-typeset details.example { + border-color: rgb(43, 155, 70); +} + +.md-typeset .example > .admonition-title, +.md-typeset .example > summary { + background-color: rgba(43, 155, 70, 0.1); + border-color: rgb(43, 155, 70); +} + +.md-typeset .example > .admonition-title::before, +.md-typeset .example > summary::before { + background-color: rgb(43, 155, 70); + -webkit-mask-image: var(--md-admonition-icon--example); + mask-image: var(--md-admonition-icon--example); +} + +/* Code block enhancements */ +.md-typeset pre > code { + font-size: 0.85rem; +} + +/* Better table styling */ +.md-typeset table:not([class]) { + font-size: 0.9rem; +} + +.md-typeset table:not([class]) th { + background-color: var(--md-primary-fg-color); + color: var(--md-primary-bg-color); +} + +/* Responsive images */ +.md-typeset img { + max-width: 100%; + height: auto; +} + +/* Custom badges */ +.badge { + display: inline-block; + padding: 0.25em 0.5em; + font-size: 0.75rem; + font-weight: 600; + line-height: 1; + text-align: center; + white-space: nowrap; + vertical-align: baseline; + border-radius: 0.25rem; +} + +.badge-new { + background-color: #28a745; + color: white; +} + +.badge-beta { + background-color: #ffc107; + color: #333; +} + +.badge-deprecated { + background-color: #dc3545; + color: white; +} + +/* Improve search results */ +.md-search-result__meta { + color: var(--md-default-fg-color--light); + font-size: 0.75rem; +} + +/* Custom footer */ +.md-footer-meta__inner { + display: flex; + justify-content: space-between; + align-items: center; +} + +/* Announcement bar */ +.md-banner { + background-color: var(--readur-accent); + color: white; +} + +.md-banner a { + color: white; + text-decoration: underline; +} \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..8759c9b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,250 @@ +# MkDocs configuration for readur.app documentation +site_name: Readur Documentation +site_url: https://readur.app +site_description: Intelligent Document Management System with OCR - Transform your documents into a searchable knowledge base +site_author: Readur Team + +# Repository information (optional, adds edit links) +repo_name: readur/readur +repo_url: https://github.com/readur/readur +edit_uri: edit/main/docs/ + +# Copyright +copyright: Copyright © 2025 Readur + +# Theme configuration +theme: + name: material + + # Color scheme + palette: + # Light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + + # Font configuration + font: + text: Roboto + code: Roboto Mono + + # Features + features: + - announce.dismiss + - content.action.edit + - content.action.view + - content.code.annotate + - content.code.copy + - content.tooltips + - navigation.footer + - navigation.indexes + - navigation.instant + - navigation.instant.prefetch + - navigation.instant.progress + - navigation.path + - navigation.prune + - navigation.sections + - navigation.tabs + - navigation.tabs.sticky + - navigation.top + - navigation.tracking + - search.highlight + - search.share + - search.suggest + - toc.follow + - toc.integrate + + # Icons + icon: + logo: material/book-open-page-variant + repo: fontawesome/brands/github + +# Plugins +plugins: + - search: + separator: '[\s\-,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])' + lang: + - en + - minify: + minify_html: true + minify_js: true + minify_css: true + htmlmin_opts: + remove_comments: true + - git-revision-date-localized: + enable_creation_date: true + type: iso_datetime + fallback_to_build_date: true + +# Extensions +markdown_extensions: + # Python Markdown + - abbr + - admonition + - attr_list + - def_list + - footnotes + - md_in_html + - toc: + permalink: true + permalink_title: Anchor link to this section for reference + + # Python Markdown Extensions + - pymdownx.arithmatex: + generic: true + - pymdownx.betterem: + smart_enable: all + - pymdownx.caret + - pymdownx.details + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.keys + - pymdownx.mark + - pymdownx.smartsymbols + - pymdownx.snippets + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + combine_header_slug: true + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tilde + +# Extra CSS and JavaScript +extra_css: + - stylesheets/extra.css + +extra_javascript: + - javascripts/extra.js + # MathJax for mathematical notation (optional) + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + +# Extra configuration +extra: + # Social links + social: + - icon: fontawesome/brands/github + link: https://github.com/readur/readur + - icon: fontawesome/brands/twitter + link: https://twitter.com/readur + - icon: fontawesome/brands/discord + link: https://discord.gg/readur + + # Analytics (optional) + analytics: + provider: google + property: G-XXXXXXXXXX # Replace with your Google Analytics ID + feedback: + title: Was this page helpful? + ratings: + - icon: material/emoticon-happy-outline + name: This page was helpful + data: 1 + note: >- + Thanks for your feedback! + - icon: material/emoticon-sad-outline + name: This page could be improved + data: 0 + note: >- + Thanks for your feedback! Help us improve this page by + opening an issue. + + # Versioning (optional) + version: + provider: mike + default: stable + + # Cookie consent (optional) + consent: + title: Cookie consent + description: >- + We use cookies to recognize your repeated visits and preferences, as well + as to measure the effectiveness of our documentation and whether users + find what they're searching for. With your consent, you're helping us to + make our documentation better. + +# Navigation structure +nav: + - Home: index.md + + - Getting Started: + - Installation Guide: getting-started/installation.md + - Quick Start (5 min): getting-started/quickstart.md + - Configuration: getting-started/configuration.md + + - User Guide: + - Overview: guide/overview.md + - Complete Guide: user-guide.md + - Labels & Organization: labels-and-organization.md + + - Features: + - Advanced Search: advanced-search.md + - Sources & Sync: sources-guide.md + - File Upload: file-upload-guide.md + - Multi-Language OCR: multi-language-ocr-guide.md + - Analytics Dashboard: analytics-dashboard-guide.md + - Notifications: notifications-guide.md + + - Administration: + - Deployment: deployment.md + - User Management: user-management-guide.md + - OIDC/SSO Setup: oidc-setup.md + - Health Monitoring: health-monitoring-guide.md + - Storage Migration: administration/storage-migration.md + - CLI Tools: administration/cli-tools.md + + - Storage & Backend: + - S3 Storage Guide: s3-storage-guide.md + - S3 Troubleshooting: s3-troubleshooting.md + - Watch Folders: WATCH_FOLDER.md + - Per-User Directories: per-user-watch-directories.md + - Migration Guide: migration-guide.md + + - API & Integration: + - API Reference: api-reference.md + - Swagger UI: swagger-ui-guide.md + - Reverse Proxy: REVERSE_PROXY.md + - Configuration Reference: configuration-reference.md + + - Development: + - Developer Guide: dev/README.md + - Architecture: dev/architecture.md + - Development Setup: dev/development.md + - Testing: dev/TESTING.md + - E2E Testing: dev/README-E2E.md + - Test Infrastructure: dev/test-infrastructure.md + + - Optimization: + - OCR Optimization: dev/OCR_OPTIMIZATION_GUIDE.md + - Queue Architecture: dev/QUEUE_IMPROVEMENTS.md + - Database Guardrails: dev/DATABASE_GUARDRAILS.md + - Error System: dev/ERROR_SYSTEM.md + - Label Testing: dev/LABEL_TESTING.md + + - Troubleshooting: + - Migration Issues: administration/migration-troubleshooting.md + - S3 Issues: s3-troubleshooting.md \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e06ee06 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# MkDocs and Material theme requirements +mkdocs>=1.6.0 +mkdocs-material>=9.5.0 +mkdocs-material-extensions>=1.3.0 + +# Essential plugins +mkdocs-minify-plugin>=0.8.0 +mkdocs-git-revision-date-localized-plugin>=1.2.0 + +# Optional but recommended plugins +mkdocs-redirects>=1.2.0 +mkdocs-rss-plugin>=1.12.0 +mkdocs-glightbox>=0.3.0 + +# For advanced features +pillow>=10.0.0 # For social cards +cairosvg>=2.7.0 # For social cards + +# Search enhancements +mkdocs-material[imaging]>=9.5.0 \ No newline at end of file