feat(docs): add mkdocs requirements

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again

feat(mkdocs): try mkdocs again
This commit is contained in:
perf3ct 2025-08-16 04:53:48 +00:00
parent 8d8929adc5
commit ab962bfd04
No known key found for this signature in database
GPG Key ID: 569C4EEC436F5232
18 changed files with 2348 additions and 9 deletions

153
.github/workflows/deploy-docs.yml vendored Normal file
View File

@ -0,0 +1,153 @@
# GitHub Actions workflow for deploying MkDocs documentation to Cloudflare Pages
# This workflow builds and deploys your MkDocs site when changes are pushed to main
name: Deploy MkDocs Documentation
on:
# Trigger on push to main branch
push:
branches:
- main
# Only run when docs files change
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'requirements.txt'
- '.github/workflows/deploy-docs.yml'
# Allow manual triggering from Actions tab
workflow_dispatch:
# Run on pull requests for preview deployments
pull_request:
branches:
- main
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'requirements.txt'
- '.github/workflows/deploy-docs.yml'
jobs:
build-and-deploy:
name: Build and Deploy MkDocs
runs-on: ubuntu-latest
timeout-minutes: 10
# Required permissions for deployment
permissions:
contents: read
deployments: write
pull-requests: write # For PR preview comments
id-token: write # For OIDC authentication (if needed)
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for git info and mkdocs-git-revision-date plugin
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
cache-dependency-path: 'requirements.txt'
- name: Install MkDocs and Dependencies
run: |
pip install --upgrade pip
pip install -r requirements.txt
env:
PIP_DISABLE_PIP_VERSION_CHECK: 1
- name: Build MkDocs Site
run: |
# Build with strict mode but ignore the expected README.md warning
# MkDocs always warns when README.md exists alongside index.md
mkdocs build --strict --verbose || {
EXIT_CODE=$?
# Check if the only issue is the README.md conflict
if mkdocs build --strict 2>&1 | grep -q "WARNING.*README.md.*conflicts with.*index.md" && \
[ $(mkdocs build --strict 2>&1 | grep -c "WARNING") -eq 1 ]; then
echo "✅ Build succeeded with expected README.md warning"
mkdocs build --verbose
else
echo "❌ Build failed with unexpected errors"
exit $EXIT_CODE
fi
}
- name: Validate Built Site
run: |
# Basic validation that important files exist
test -f site/index.html || (echo "ERROR: site/index.html not found" && exit 1)
test -f site/sitemap.xml || (echo "ERROR: site/sitemap.xml not found" && exit 1)
test -d site/assets || (echo "ERROR: site/assets directory not found" && exit 1)
echo "✅ Site validation passed"
# Deploy using Wrangler (recommended by Cloudflare)
- name: Deploy to Cloudflare Pages
id: deploy
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
env:
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
uses: cloudflare/wrangler-action@v3
with:
command: pages deploy site --project-name=readur-docs --branch=${{ github.ref_name }}
# Deploy preview for PRs
- name: Deploy Preview to Cloudflare Pages
id: preview-deployment
if: github.event_name == 'pull_request'
env:
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
uses: cloudflare/wrangler-action@v3
with:
command: pages deploy site --project-name=readur-docs --branch=pr-${{ github.event.pull_request.number }}
# Post deployment URL as PR comment
- name: Comment PR with Preview URL
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const prNumber = context.issue.number;
// Construct preview URL based on Cloudflare Pages pattern
// Note: Actual URL may vary based on Cloudflare configuration
const previewUrl = `https://pr-${prNumber}.readur-docs.pages.dev`;
const mainUrl = 'https://readur.app';
// Check if we already commented
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber
});
const botComment = comments.data.find(comment =>
comment.user.type === 'Bot' &&
comment.body.includes('Documentation preview is ready')
);
const commentBody = `📚 Documentation preview is ready!\n\n🔗 Preview URL: ${previewUrl}\n📖 Production URL: ${mainUrl}\n\n✅ All checks passed\n\n_This preview will be updated automatically with new commits._`;
if (botComment) {
// Update existing comment
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: commentBody
});
} else {
// Create new comment
await github.rest.issues.createComment({
issue_number: prNumber,
owner: context.repo.owner,
repo: context.repo.repo,
body: commentBody
});
}

12
.gitignore vendored
View File

@ -1,12 +1,22 @@
# Rust
target/ target/
# Node
client/node_modules/ client/node_modules/
node_modules/ node_modules/
.env .env
assets/ assets/
frontend/dist/ frontend/dist/
.claude/settings.local.json # This file is used to store the local Claude settings.
# Python
venv/
site/
# Testing
readur_uploads/ readur_uploads/
readur_watch/ readur_watch/
test-results/ test-results/
uploads/ uploads/
# Misc.
.claude/settings.local.json .claude/settings.local.json

View File

@ -140,3 +140,5 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
**Made with ❤️ and ☕ by the Readur team** **Made with ❤️ and ☕ by the Readur team**

106
docs/README.md Normal file
View File

@ -0,0 +1,106 @@
# Readur Documentation
This directory contains the source files for the Readur documentation site, built with MkDocs and Material for MkDocs.
## Local Development
### Prerequisites
- Python 3.8+
- pip
### Setup
1. Install dependencies:
```bash
pip install -r ../requirements.txt
```
2. Start the development server:
```bash
mkdocs serve
```
The documentation will be available at `http://localhost:8000`.
### Building
To build the static site:
```bash
mkdocs build
```
The built site will be in the `site/` directory.
## Deployment
The documentation is automatically deployed to [readur.app](https://readur.app) via GitHub Actions when changes are pushed to the main branch.
### Manual Deployment
If you need to deploy manually:
1. Build the site:
```bash
mkdocs build
```
2. Deploy to Cloudflare Pages:
```bash
wrangler pages deploy site --project-name=readur-docs
```
## Structure
- `docs/` - Documentation source files (Markdown)
- `mkdocs.yml` - MkDocs configuration
- `requirements.txt` - Python dependencies
- `overrides/` - Theme customizations
- `stylesheets/` - Custom CSS
- `javascripts/` - Custom JavaScript
## Writing Documentation
### Adding New Pages
1. Create a new `.md` file in the appropriate directory
2. Add the page to the navigation in `mkdocs.yml`
3. Use Material for MkDocs features for rich content
### Markdown Extensions
We use several markdown extensions for enhanced functionality:
- **Admonitions** - For notes, warnings, tips
- **Code blocks** - With syntax highlighting
- **Tabs** - For grouped content
- **Tables** - For structured data
- **Emoji** - For visual elements
Example:
```markdown
!!! note "Important"
This is an important note.
=== "Tab 1"
Content for tab 1
=== "Tab 2"
Content for tab 2
```
## Contributing
Please follow these guidelines when contributing to the documentation:
1. Use clear, concise language
2. Include code examples where appropriate
3. Test all links and code samples
4. Run `mkdocs build --strict` before submitting
5. Update the navigation in `mkdocs.yml` for new pages
## Resources
- [MkDocs Documentation](https://www.mkdocs.org/)
- [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/)
- [Markdown Guide](https://www.markdownguide.org/)

View File

@ -612,7 +612,4 @@ Planned improvements to the error system:
## References ## References
- [Error Management Documentation](./ERROR_MANAGEMENT.md) - [API Reference](../api-reference.md)
- [API Error Response Standards](../api-reference.md#error-responses)
- [Frontend Error Handling Guide](../../frontend/ERROR_HANDLING.md)
- [Monitoring and Observability](./MONITORING.md)

View File

@ -34,7 +34,6 @@ This directory contains technical documentation for developers working on Readur
- [Configuration Reference](../configuration-reference.md) - Complete configuration options - [Configuration Reference](../configuration-reference.md) - Complete configuration options
- [User Guide](../user-guide.md) - How to use Readur features - [User Guide](../user-guide.md) - How to use Readur features
- [API Reference](../api-reference.md) - REST API documentation - [API Reference](../api-reference.md) - REST API documentation
- [New Features in 2.5.4](../new-features-2.5.4.md) - Latest features and improvements
## 🤝 Contributing ## 🤝 Contributing

View File

@ -344,7 +344,7 @@ Potential service boundaries:
## Next Steps ## Next Steps
- Review [deployment options](deployment.md) - Review [deployment options](../deployment.md)
- Explore [performance tuning](OCR_OPTIMIZATION_GUIDE.md) - Explore [performance tuning](OCR_OPTIMIZATION_GUIDE.md)
- Understand [database design](DATABASE_GUARDRAILS.md) - Understand [database design](DATABASE_GUARDRAILS.md)
- Learn about [testing strategy](TESTING.md) - Learn about [testing strategy](TESTING.md)

View File

@ -268,7 +268,7 @@ Style preferences:
## Contributing ## Contributing
We welcome contributions! Please see our [Contributing Guide](../CONTRIBUTING.md) for details. We welcome contributions!
### Getting Started ### Getting Started

View File

@ -0,0 +1,506 @@
# Configuration Guide
Configure Readur for your specific needs and optimize for your workload.
## Configuration Overview
Readur uses environment variables for configuration, making it easy to deploy in containerized environments. Configuration can be set through:
1. **Environment variables** - Direct system environment
2. **`.env` file** - Docker Compose automatically loads this
3. **`docker-compose.yml`** - Directly in the compose file
4. **Kubernetes ConfigMaps** - For K8s deployments
## Essential Configuration
### Security Settings
These MUST be changed from defaults in production:
```bash
# Generate secure secrets
JWT_SECRET=$(openssl rand -base64 32)
DB_PASSWORD=$(openssl rand -base64 32)
# Set admin password
ADMIN_PASSWORD=your_secure_password_here
# Enable HTTPS (reverse proxy recommended)
FORCE_HTTPS=true
SECURE_COOKIES=true
```
### Database Configuration
```bash
# PostgreSQL connection
DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur
# Connection pool settings
DB_POOL_SIZE=20
DB_MAX_OVERFLOW=40
DB_POOL_TIMEOUT=30
# PostgreSQL specific optimizations
POSTGRES_SHARED_BUFFERS=256MB
POSTGRES_EFFECTIVE_CACHE_SIZE=1GB
```
### Storage Configuration
#### Local Storage (Default)
```bash
# File storage paths
UPLOAD_PATH=/app/uploads
TEMP_PATH=/app/temp
# Size limits
MAX_FILE_SIZE_MB=50
TOTAL_STORAGE_LIMIT_GB=100
# File types
ALLOWED_FILE_TYPES=pdf,png,jpg,jpeg,tiff,bmp,gif,txt,rtf,doc,docx
```
#### S3 Storage (Scalable)
```bash
# Enable S3 backend
STORAGE_BACKEND=s3
S3_ENABLED=true
# AWS S3
S3_BUCKET_NAME=readur-documents
S3_REGION=us-east-1
AWS_ACCESS_KEY_ID=your_access_key
AWS_SECRET_ACCESS_KEY=your_secret_key
# Or S3-compatible (MinIO, Wasabi, etc.)
S3_ENDPOINT=https://s3.example.com
S3_PATH_STYLE=true # For MinIO
```
## OCR Configuration
### Language Settings
```bash
# Single language (fastest)
OCR_LANGUAGE=eng
# Multiple languages
OCR_LANGUAGE=eng+deu+fra+spa
# Available languages (partial list):
# eng - English
# deu - German (Deutsch)
# fra - French (Français)
# spa - Spanish (Español)
# ita - Italian (Italiano)
# por - Portuguese
# rus - Russian
# chi_sim - Chinese Simplified
# jpn - Japanese
# ara - Arabic
```
### Performance Tuning
```bash
# Concurrent processing
CONCURRENT_OCR_JOBS=4 # Match CPU cores
OCR_WORKER_THREADS=2 # Threads per job
# Timeouts and limits
OCR_TIMEOUT_SECONDS=300
OCR_MAX_PAGES=500
MAX_FILE_SIZE_MB=100
# Memory management
OCR_MEMORY_LIMIT_MB=512 # Per job
ENABLE_MEMORY_PROFILING=false
# Processing options
OCR_DPI=300 # Higher = better quality, slower
ENABLE_PREPROCESSING=true
ENABLE_AUTO_ROTATION=true
ENABLE_DESKEW=true
```
### Quality vs Speed
#### High Quality (Slow)
```bash
OCR_QUALITY_PRESET=high
OCR_DPI=300
ENABLE_PREPROCESSING=true
ENABLE_DESKEW=true
ENABLE_AUTO_ROTATION=true
OCR_ENGINE_MODE=3 # LSTM only
```
#### Balanced (Default)
```bash
OCR_QUALITY_PRESET=balanced
OCR_DPI=200
ENABLE_PREPROCESSING=true
ENABLE_DESKEW=false
ENABLE_AUTO_ROTATION=true
OCR_ENGINE_MODE=2 # LSTM + Legacy
```
#### Fast (Lower Quality)
```bash
OCR_QUALITY_PRESET=fast
OCR_DPI=150
ENABLE_PREPROCESSING=false
ENABLE_DESKEW=false
ENABLE_AUTO_ROTATION=false
OCR_ENGINE_MODE=0 # Legacy only
```
## Source Synchronization
### Watch Folders
```bash
# Global watch folder
WATCH_FOLDER=/app/watch
WATCH_INTERVAL_SECONDS=60
FILE_STABILITY_CHECK_MS=2000
# Per-user watch folders
ENABLE_PER_USER_WATCH=true
USER_WATCH_BASE_DIR=/app/user_watch
# Processing rules
WATCH_PROCESS_HIDDEN_FILES=false
WATCH_RECURSIVE=true
WATCH_MAX_DEPTH=5
DELETE_AFTER_IMPORT=false
```
### WebDAV Sources
```bash
# Default WebDAV settings
WEBDAV_TIMEOUT_SECONDS=30
WEBDAV_MAX_RETRIES=3
WEBDAV_CHUNK_SIZE_MB=10
WEBDAV_VERIFY_SSL=true
```
### S3 Sources
```bash
# S3 sync settings
S3_SYNC_INTERVAL_MINUTES=30
S3_BATCH_SIZE=100
S3_MULTIPART_THRESHOLD_MB=100
S3_CONCURRENT_DOWNLOADS=4
```
## Authentication & Security
### Local Authentication
```bash
# Password policy
PASSWORD_MIN_LENGTH=12
PASSWORD_REQUIRE_UPPERCASE=true
PASSWORD_REQUIRE_NUMBERS=true
PASSWORD_REQUIRE_SPECIAL=true
# Session management
SESSION_TIMEOUT_MINUTES=60
REMEMBER_ME_DURATION_DAYS=30
MAX_LOGIN_ATTEMPTS=5
LOCKOUT_DURATION_MINUTES=15
```
### OIDC/SSO Configuration
```bash
# Enable OIDC
OIDC_ENABLED=true
# Provider configuration
OIDC_ISSUER=https://login.microsoftonline.com/tenant-id/v2.0
OIDC_CLIENT_ID=your-client-id
OIDC_CLIENT_SECRET=your-client-secret
OIDC_REDIRECT_URI=https://readur.example.com/auth/callback
# Optional settings
OIDC_SCOPE=openid profile email
OIDC_USER_CLAIM=email
OIDC_GROUPS_CLAIM=groups
OIDC_ADMIN_GROUP=readur-admins
# Auto-provisioning
OIDC_AUTO_CREATE_USERS=true
OIDC_DEFAULT_ROLE=user
```
## Search Configuration
### Search Engine
```bash
# PostgreSQL Full-Text Search settings
SEARCH_LANGUAGE=english
SEARCH_RANKING_NORMALIZATION=32
ENABLE_PHRASE_SEARCH=true
ENABLE_FUZZY_SEARCH=true
FUZZY_SEARCH_DISTANCE=2
# Search results
SEARCH_RESULTS_PER_PAGE=20
SEARCH_SNIPPET_LENGTH=200
SEARCH_HIGHLIGHT_TAG=mark
```
### Search Performance
```bash
# Index management
AUTO_REINDEX=true
REINDEX_SCHEDULE=0 3 * * * # 3 AM daily
SEARCH_CACHE_TTL_SECONDS=300
SEARCH_CACHE_SIZE_MB=100
# Query optimization
MAX_SEARCH_TERMS=10
ENABLE_SEARCH_SUGGESTIONS=true
SUGGESTION_MIN_LENGTH=3
```
## Monitoring & Logging
### Logging Configuration
```bash
# Log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
LOG_LEVEL=INFO
LOG_FORMAT=json # or text
# Log outputs
LOG_TO_FILE=true
LOG_FILE_PATH=/app/logs/readur.log
LOG_FILE_MAX_SIZE_MB=100
LOG_FILE_BACKUP_COUNT=10
# Detailed logging
LOG_SQL_QUERIES=false
LOG_HTTP_REQUESTS=true
LOG_OCR_DETAILS=false
```
### Health Monitoring
```bash
# Health check endpoints
HEALTH_CHECK_ENABLED=true
HEALTH_CHECK_PATH=/health
METRICS_ENABLED=true
METRICS_PATH=/metrics
# Alerting thresholds
ALERT_QUEUE_SIZE=100
ALERT_OCR_FAILURE_RATE=0.1
ALERT_DISK_USAGE_PERCENT=80
ALERT_MEMORY_USAGE_PERCENT=90
```
## Performance Optimization
### System Resources
```bash
# Memory limits
MEMORY_LIMIT_MB=2048
MEMORY_SOFT_LIMIT_MB=1536
# CPU settings
CPU_CORES=4
WORKER_PROCESSES=auto # or specific number
WORKER_THREADS=2
# Connection limits
MAX_CONNECTIONS=100
CONNECTION_TIMEOUT=30
```
### Caching
```bash
# Enable caching layers
ENABLE_CACHE=true
CACHE_TYPE=redis # or memory
# Redis cache (if used)
REDIS_URL=redis://redis:6379/0
REDIS_MAX_CONNECTIONS=50
# Cache TTLs
DOCUMENT_CACHE_TTL=3600
SEARCH_CACHE_TTL=300
USER_CACHE_TTL=1800
```
### Queue Management
```bash
# Background job processing
QUEUE_TYPE=database # or redis
MAX_QUEUE_SIZE=1000
QUEUE_POLL_INTERVAL=5
# Job priorities
OCR_JOB_PRIORITY=5
SYNC_JOB_PRIORITY=3
CLEANUP_JOB_PRIORITY=1
# Retry configuration
MAX_JOB_RETRIES=3
RETRY_DELAY_SECONDS=60
EXPONENTIAL_BACKOFF=true
```
## Environment-Specific Configurations
### Development
```bash
# .env.development
DEBUG=true
LOG_LEVEL=DEBUG
RELOAD_ON_CHANGE=true
CONCURRENT_OCR_JOBS=1
DISABLE_RATE_LIMITING=true
```
### Staging
```bash
# .env.staging
DEBUG=false
LOG_LEVEL=INFO
CONCURRENT_OCR_JOBS=2
ENABLE_PROFILING=true
MOCK_EXTERNAL_SERVICES=true
```
### Production
```bash
# .env.production
DEBUG=false
LOG_LEVEL=WARNING
CONCURRENT_OCR_JOBS=8
ENABLE_RATE_LIMITING=true
SECURE_COOKIES=true
FORCE_HTTPS=true
```
## Configuration Validation
### Check Configuration
```bash
# Validate current configuration
docker exec readur python validate_config.py
# Test specific settings
docker exec readur python -c "
from config import settings
print(f'OCR Languages: {settings.OCR_LANGUAGE}')
print(f'Storage Backend: {settings.STORAGE_BACKEND}')
print(f'Max File Size: {settings.MAX_FILE_SIZE_MB}MB')
"
```
### Common Validation Errors
```bash
# Missing required S3 credentials
ERROR: S3_ENABLED=true but S3_BUCKET_NAME not set
# Invalid language code
ERROR: OCR_LANGUAGE 'xyz' not supported
# Insufficient resources
WARNING: CONCURRENT_OCR_JOBS=8 but only 2 CPU cores available
```
## Configuration Best Practices
### Security
1. **Never commit secrets** - Use `.env` files and add to `.gitignore`
2. **Rotate secrets regularly** - Especially JWT_SECRET
3. **Use strong passwords** - Minimum 16 characters for admin
4. **Enable HTTPS** - Always in production
5. **Restrict file types** - Only allow necessary formats
### Performance
1. **Match workers to cores** - CONCURRENT_OCR_JOBS ≤ CPU cores
2. **Monitor memory usage** - Adjust limits based on usage
3. **Use S3 for scale** - Local storage limited by disk
4. **Enable caching** - Reduces database load
5. **Tune PostgreSQL** - Adjust shared_buffers and work_mem
### Reliability
1. **Set reasonable timeouts** - Prevent hanging jobs
2. **Configure retries** - Handle transient failures
3. **Enable health checks** - For load balancer integration
4. **Set up logging** - Essential for troubleshooting
5. **Regular backups** - Automate database backups
## Configuration Examples
### Small Office (5-10 users)
```bash
# Minimal resources, local storage
CONCURRENT_OCR_JOBS=2
MEMORY_LIMIT_MB=1024
STORAGE_BACKEND=local
MAX_FILE_SIZE_MB=20
SEARCH_CACHE_TTL=600
```
### Medium Business (50-100 users)
```bash
# Balanced performance, S3 storage
CONCURRENT_OCR_JOBS=4
MEMORY_LIMIT_MB=4096
STORAGE_BACKEND=s3
MAX_FILE_SIZE_MB=50
ENABLE_CACHE=true
CACHE_TYPE=redis
```
### Enterprise (500+ users)
```bash
# High performance, full features
CONCURRENT_OCR_JOBS=16
MEMORY_LIMIT_MB=16384
STORAGE_BACKEND=s3
MAX_FILE_SIZE_MB=100
ENABLE_CACHE=true
CACHE_TYPE=redis
QUEUE_TYPE=redis
OIDC_ENABLED=true
```
## Next Steps
- [Installation Guide](installation.md) - Deploy Readur
- [User Guide](../user-guide.md) - Learn the interface
- [API Reference](../api-reference.md) - Integrate with Readur
- [Deployment Guide](../deployment.md) - Production setup

View File

@ -0,0 +1,500 @@
# Installation Guide
Deploy Readur document management system with OCR capabilities using Docker.
## Prerequisites
### System Requirements
#### Minimum Requirements
- **CPU**: 2 cores (x86_64 or ARM64)
- **RAM**: 4GB (system) + 1GB per concurrent OCR job
- **Storage**: 10GB for application + space for documents
- **OS**: Linux, macOS, or Windows with Docker support
#### Recommended for Production
- **CPU**: 4+ cores for parallel OCR processing
- **RAM**: 8GB minimum, 16GB for heavy workloads
- **Storage**: SSD for database, adequate space for document growth
- **Network**: Stable connection for source synchronization
### Software Requirements
```bash
# Check Docker version (20.10+ required)
docker --version
# Check Docker Compose version (2.0+ required)
docker-compose --version
# Verify Docker is running
docker ps
```
## Installation Methods
### Quick Start with Docker Compose (Recommended)
#### 1. Clone the Repository
```bash
# Clone the repository
git clone https://github.com/readur/readur.git
cd readur
# Review the configuration
cat docker-compose.yml
```
#### 2. Configure Environment
Create a `.env` file with your settings:
```bash
# Security - CHANGE THESE!
JWT_SECRET=$(openssl rand -base64 32)
DB_PASSWORD=$(openssl rand -base64 32)
ADMIN_PASSWORD=your_secure_password_here
# OCR Configuration
OCR_LANGUAGE=eng # or: deu, fra, spa, etc.
CONCURRENT_OCR_JOBS=2
# Storage Paths (create these directories)
UPLOAD_PATH=./data/uploads
WATCH_FOLDER=./data/watch
# Optional: S3 Storage (instead of local)
# STORAGE_BACKEND=s3
# S3_BUCKET=readur-documents
# S3_REGION=us-east-1
# AWS_ACCESS_KEY_ID=your_key
# AWS_SECRET_ACCESS_KEY=your_secret
```
#### 3. Create Required Directories
```bash
# Create data directories
mkdir -p data/{uploads,watch,postgres}
# Set appropriate permissions
chmod 755 data/uploads data/watch
```
#### 4. Start the Application
```bash
# Start all services
docker-compose up -d
# Monitor startup logs
docker-compose logs -f
# Wait for "Server started on 0.0.0.0:8000"
```
#### 5. Verify Installation
```bash
# Check service health
docker-compose ps
# Test the API endpoint
curl http://localhost:8000/health
# Expected response:
# {"status":"healthy","database":"connected","ocr":"ready"}
```
### Production Deployment with Custom Configuration
#### 1. Create Production Compose File
Create `docker-compose.prod.yml`:
```yaml
services:
readur:
image: readur:latest
ports:
- "8000:8000"
environment:
- DATABASE_URL=postgresql://readur:${DB_PASSWORD}@postgres:5432/readur
- JWT_SECRET=${JWT_SECRET}
- SERVER_ADDRESS=0.0.0.0:8000
- UPLOAD_PATH=/app/uploads
- CONCURRENT_OCR_JOBS=4
- MAX_FILE_SIZE_MB=100
volumes:
- ./data/uploads:/app/uploads
- /mnt/shared/documents:/app/watch:ro
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
deploy:
resources:
limits:
memory: 2G
cpus: '2.0'
postgres:
image: postgres:15-alpine
environment:
- POSTGRES_USER=readur
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=readur
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U readur"]
interval: 10s
timeout: 5s
retries: 5
restart: unless-stopped
volumes:
postgres_data:
```
#### 2. Deploy with Production Settings
```bash
# Use production configuration
docker-compose -f docker-compose.prod.yml up -d
# Enable automatic startup
sudo systemctl enable docker
```
### Kubernetes Deployment
#### Using Helm Chart
```bash
# Add Readur Helm repository
helm repo add readur https://charts.readur.app
helm repo update
# Install with custom values
helm install readur readur/readur \
--set image.tag=latest \
--set postgresql.auth.password=$DB_PASSWORD \
--set auth.jwtSecret=$JWT_SECRET \
--set persistence.size=50Gi \
--set ingress.enabled=true \
--set ingress.hostname=readur.example.com
```
#### Using Raw Manifests
```bash
# Apply Kubernetes manifests
kubectl create namespace readur
kubectl apply -f https://raw.githubusercontent.com/readur/readur/main/k8s/
# Check deployment status
kubectl -n readur get pods
kubectl -n readur get svc
```
### Docker Run (Development Only)
For quick testing without persistence:
```bash
# Run with in-memory database (data lost on restart)
docker run -d \
--name readur \
-p 8000:8000 \
-e DATABASE_URL=sqlite:///tmp/readur.db \
-e JWT_SECRET=dev-only-secret \
readur:latest
# Access logs
docker logs -f readur
```
## Post-Installation Setup
### Initial Login
1. **Access the Web Interface**
```
http://localhost:8000
```
2. **Login with Default Credentials**
- Username: `admin`
- Password: `readur2024`
⚠️ **Security**: Change the admin password immediately after first login
3. **Change Admin Password**
- Navigate to Settings → User Management
- Click on admin user
- Set a strong password
- Save changes
### Essential Configuration
#### 1. Configure OCR Languages
```bash
# Check available languages
docker exec readur tesseract --list-langs
# Add additional language packs if needed
docker exec readur apt-get update
docker exec readur apt-get install -y tesseract-ocr-deu # German
docker exec readur apt-get install -y tesseract-ocr-fra # French
docker exec readur apt-get install -y tesseract-ocr-spa # Spanish
```
#### 2. Set Up Document Sources
1. Navigate to Settings → Sources
2. Add your document sources:
- **Local Folders**: Mount volumes in docker-compose.yml
- **WebDAV**: Configure Nextcloud/ownCloud connections
- **S3 Buckets**: Add AWS S3 or compatible storage
#### 3. Configure User Authentication
**For Local Users:**
- Settings → User Management → Create User
- Assign appropriate roles (User or Admin)
**For SSO/OIDC:**
```bash
# Add to your .env file
OIDC_ENABLED=true
OIDC_ISSUER=https://auth.example.com
OIDC_CLIENT_ID=readur-client
OIDC_CLIENT_SECRET=your-secret
```
#### 4. Adjust Performance Settings
```bash
# Edit .env for your workload
CONCURRENT_OCR_JOBS=4 # Increase for faster processing
OCR_TIMEOUT_SECONDS=300 # Increase for large documents
MAX_FILE_SIZE_MB=100 # Adjust based on your documents
MEMORY_LIMIT_MB=2048 # Increase for better performance
```
## Verification & Health Checks
### Service Health
```bash
# Check all services are running
docker-compose ps
# Expected output:
NAME STATUS PORTS
readur running (healthy) 0.0.0.0:8000->8000/tcp
postgres running (healthy) 5432/tcp
```
### API Health Check
```bash
# Test the health endpoint
curl -s http://localhost:8000/health | jq
# Expected response:
{
"status": "healthy",
"version": "2.5.4",
"database": "connected",
"ocr_service": "ready",
"storage": "available",
"queue_size": 0
}
```
### Database Connectivity
```bash
# Test database connection
docker exec readur-postgres psql -U readur -c "SELECT version();"
# Check tables were created
docker exec readur-postgres psql -U readur -d readur -c "\dt"
```
### OCR Functionality
```bash
# Test OCR engine
docker exec readur tesseract --version
# Upload a test document
curl -X POST http://localhost:8000/api/upload \
-H "Authorization: Bearer YOUR_TOKEN" \
-F "file=@test.pdf"
```
## Troubleshooting Installation
### Common Issues and Solutions
#### Port Already in Use
```bash
# Check what's using port 8000
sudo lsof -i :8000
# Solution 1: Stop the conflicting service
sudo systemctl stop conflicting-service
# Solution 2: Use a different port
# Edit docker-compose.yml:
ports:
- "8080:8000" # Change 8080 to your preferred port
```
#### Database Connection Failed
```bash
# Check PostgreSQL logs
docker-compose logs postgres
# Common fixes:
# 1. Ensure PostgreSQL is fully started
docker-compose restart postgres
sleep 10
docker-compose restart readur
# 2. Reset database (WARNING: Deletes all data)
docker-compose down -v
docker-compose up -d
```
#### OCR Processing Stuck
```bash
# Check OCR queue status
curl http://localhost:8000/api/admin/queue/status
# Restart OCR workers
docker-compose restart readur
# Increase timeout for large files
# Add to .env:
OCR_TIMEOUT_SECONDS=600
```
#### Docker Permission Denied
```bash
# Linux: Add user to docker group
sudo usermod -aG docker $USER
newgrp docker
# Verify docker access
docker ps
```
#### Insufficient Memory
```bash
# Check container memory usage
docker stats readur
# Increase memory limits in docker-compose.yml:
deploy:
resources:
limits:
memory: 4G # Increase as needed
```
### Getting Help
1. **Check Logs**
```bash
# Application logs
docker-compose logs -f readur
# Database logs
docker-compose logs -f postgres
```
2. **Enable Debug Mode**
```bash
# Add to .env
LOG_LEVEL=DEBUG
# Restart services
docker-compose restart
```
3. **Community Support**
- [GitHub Issues](https://github.com/readur/readur/issues)
- [Documentation](https://docs.readur.app)
- [Discord Community](https://discord.gg/readur)
## Next Steps
### Essential Reading
1. **[User Guide](../user-guide.md)**
- Upload and manage documents
- Configure OCR processing
- Master search features
- Organize with labels
2. **[Configuration Reference](../configuration-reference.md)**
- Complete environment variable list
- Performance tuning
- Storage configuration
- Security settings
3. **[Deployment Guide](../deployment.md)**
- SSL/TLS setup with reverse proxy
- Backup and restore procedures
- Monitoring and alerts
- Scaling strategies
### Advanced Setup
4. **[Sources Guide](../sources-guide.md)**
- WebDAV integration
- S3 bucket synchronization
- Watch folder configuration
- Automated imports
5. **[OIDC Setup](../oidc-setup.md)**
- Enterprise SSO integration
- Azure AD configuration
- Google Workspace setup
- Keycloak integration
6. **[API Reference](../api-reference.md)**
- REST API endpoints
- Authentication
- Automation examples
- Webhook integration
### Quick Test
Upload your first document:
```bash
# 1. Login to get token
TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \
-H "Content-Type: application/json" \
-d '{"username":"admin","password":"readur2024"}' | jq -r .token)
# 2. Upload a PDF
curl -X POST http://localhost:8000/api/documents/upload \
-H "Authorization: Bearer $TOKEN" \
-F "file=@sample.pdf"
# 3. Check OCR status
curl -H "Authorization: Bearer $TOKEN" \
http://localhost:8000/api/documents
```

View File

@ -0,0 +1,252 @@
# Quick Start Guide
Get Readur running and process your first documents in under 5 minutes.
## Prerequisites
Ensure you have Docker and Docker Compose installed:
```bash
docker --version # Should be 20.10+
docker-compose --version # Should be 2.0+
```
## 5-Minute Setup
### Step 1: Get Readur
```bash
# Clone and enter the repository
git clone https://github.com/readur/readur.git
cd readur
```
### Step 2: Start Services
```bash
# Start with default configuration
docker-compose up -d
# Watch the logs (optional)
docker-compose logs -f
```
Wait about 30 seconds for services to initialize.
### Step 3: Access the Interface
Open your browser and navigate to:
```
http://localhost:8000
```
Login with default credentials:
- **Username**: `admin`
- **Password**: `readur2024`
### Step 4: Upload Your First Document
#### Via Web Interface
1. Click the **Upload** button in the top navigation
2. Drag and drop a PDF or image file
3. Click **Upload** to start processing
4. Wait for the OCR indicator to turn green
#### Via API (Optional)
```bash
# Get authentication token
TOKEN=$(curl -s -X POST http://localhost:8000/api/auth/login \
-H "Content-Type: application/json" \
-d '{"username":"admin","password":"readur2024"}' | jq -r .token)
# Upload a document
curl -X POST http://localhost:8000/api/documents/upload \
-H "Authorization: Bearer $TOKEN" \
-F "file=@your-document.pdf"
```
### Step 5: Search Your Documents
Once OCR processing completes (green indicator):
1. Use the **Search** bar at the top
2. Enter any text from your document
3. Press Enter to see results
4. Click on a result to view the document
## Common First Tasks
### Change Admin Password
**Important**: Do this immediately after installation.
1. Navigate to **Settings** → **User Management**
2. Click on the admin user
3. Enter a new secure password
4. Click **Save**
### Add Your First Source
Automatically import documents from external storage:
1. Go to **Settings** → **Sources**
2. Click **Add Source**
3. Choose your source type:
- **Local Folder**: For directories on the server
- **WebDAV**: For Nextcloud/ownCloud
- **S3**: For cloud storage
4. Configure connection details
5. Test and save
### Create Document Labels
Organize your documents with labels:
1. Navigate to **Settings** → **Labels**
2. Click **Create Label**
3. Enter a name and choose a color
4. Save the label
5. Apply to documents via:
- Document details page
- Bulk selection
- During upload
### Set Up Watch Folder
Monitor a directory for automatic document import:
```bash
# Create a watch directory
mkdir -p ./data/watch
# Add to docker-compose.yml volumes:
volumes:
- ./data/watch:/app/watch
# Restart Readur
docker-compose restart readur
```
Drop files into `./data/watch` - they'll be automatically imported.
## Essential Keyboard Shortcuts
| Shortcut | Action |
|----------|--------|
| `/` or `Ctrl+K` | Focus search bar |
| `Ctrl+U` | Open upload dialog |
| `Esc` | Close dialogs |
| `G then D` | Go to documents |
| `G then S` | Go to settings |
## Sample Workflow
### Legal Document Management
```bash
# 1. Create label structure
Labels: "Contracts", "Invoices", "Legal", "2024"
# 2. Set up source folder
Source: /shared/legal-docs (WebDAV)
Sync: Every 30 minutes
# 3. Configure OCR
Language: English
Quality: High
Concurrent Jobs: 4
# 4. Upload initial batch
Select all PDFs → Upload → Apply "2024" label
# 5. Create saved search
Search: label:Contracts AND date:2024
Save as: "2024 Contracts"
```
### Research Paper Archive
```bash
# 1. Configure for academic documents
OCR Language: Multiple (eng+deu+fra)
Max File Size: 100MB
# 2. Create categories
Labels: "Published", "Draft", "Review", "Citations"
# 3. Set up automated import
Watch Folder: /research/papers
Process: Auto-OCR and label by folder
# 4. Advanced search setup
Boolean search: enabled
Fuzzy matching: 2 (for OCR errors)
```
## Performance Tips
### For Faster OCR Processing
```bash
# Increase concurrent jobs (if you have CPU cores)
CONCURRENT_OCR_JOBS=8
# Optimize for your document types
OCR_LANGUAGE=eng # Single language is faster
ENABLE_PREPROCESSING=false # Skip if documents are clean
```
### For Large Document Collections
```bash
# Use S3 storage instead of local
S3_ENABLED=true
S3_BUCKET_NAME=readur-docs
# Increase memory limits
MEMORY_LIMIT_MB=4096
# Enable compression
ENABLE_COMPRESSION=true
```
## Troubleshooting Quick Fixes
### OCR Not Starting
```bash
# Check the queue
curl http://localhost:8000/api/admin/queue/status
# Restart OCR workers
docker-compose restart readur
```
### Can't Login
```bash
# Reset to default password
docker exec readur python reset_admin_password.py
```
### Slow Search
```bash
# Rebuild search index
docker exec readur python rebuild_index.py
```
## Next Steps
Now that you have Readur running:
1. **[Configure OCR](../multi-language-ocr-guide.md)** for your language
2. **[Set up Sources](../sources-guide.md)** for automated import
3. **[Create Labels](../labels-and-organization.md)** for organization
4. **[Learn Advanced Search](../advanced-search.md)** techniques
5. **[Configure Backups](../deployment.md#backup-strategy)** for data safety
## Getting Help
- **Documentation**: [Full User Guide](../user-guide.md)
- **API Reference**: [REST API Docs](../api-reference.md)
- **Community**: [GitHub Discussions](https://github.com/readur/readur/discussions)
- **Issues**: [Report Bugs](https://github.com/readur/readur/issues)

192
docs/guide/overview.md Normal file
View File

@ -0,0 +1,192 @@
# Readur User Guide Overview
Welcome to the comprehensive guide for using Readur's document management system. This guide covers everything from basic operations to advanced features.
## Guide Structure
### Getting Started
- **[Installation](../getting-started/installation.md)** - Deploy Readur with Docker
- **[Quick Start](../getting-started/quickstart.md)** - 5-minute setup guide
- **[Configuration](../getting-started/configuration.md)** - Customize your deployment
### Core Features
- **[Document Management](../user-guide.md#document-management)** - Upload, organize, and manage documents
- **[OCR Processing](../user-guide.md#ocr-processing)** - Extract text from scanned documents
- **[Search & Discovery](../user-guide.md#search-features)** - Find information quickly
- **[Labels & Organization](../labels-and-organization.md)** - Categorize and structure content
### Advanced Features
- **[Sources & Sync](../sources-guide.md)** - Automated document import
- **[Advanced Search](../advanced-search.md)** - Complex queries and filters
- **[User Management](../user-management-guide.md)** - Roles and permissions
- **[API Integration](../api-reference.md)** - Programmatic access
### Administration
- **[Deployment](../deployment.md)** - Production setup and scaling
- **[Monitoring](../health-monitoring-guide.md)** - System health and metrics
- **[Backup & Recovery](../deployment.md#backup-strategy)** - Data protection
- **[Migration](../migration-guide.md)** - Upgrades and data migration
## Quick Navigation
### By User Type
#### Document Users
Start here if you need to:
- Upload and organize documents
- Search for specific content
- Export and share documents
**Key Guides:**
1. [User Guide](../user-guide.md)
2. [Search Features](../advanced-search.md)
3. [Labels Guide](../labels-and-organization.md)
#### System Administrators
Start here if you need to:
- Deploy and configure Readur
- Manage users and permissions
- Monitor system health
- Set up integrations
**Key Guides:**
1. [Installation](../getting-started/installation.md)
2. [Configuration](../configuration-reference.md)
3. [User Management](../user-management-guide.md)
4. [Deployment](../deployment.md)
#### Developers
Start here if you need to:
- Integrate with the API
- Customize Readur
- Contribute to development
**Key Guides:**
1. [API Reference](../api-reference.md)
2. [Development Setup](../dev/development.md)
3. [Architecture](../dev/architecture.md)
### By Task
#### Initial Setup
1. [Install Readur](../getting-started/installation.md)
2. [Configure OCR languages](../multi-language-ocr-guide.md)
3. [Set up authentication](../oidc-setup.md)
4. [Create users](../user-management-guide.md)
#### Document Processing
1. [Upload documents](../file-upload-guide.md)
2. [Configure OCR](../user-guide.md#ocr-processing)
3. [Monitor processing](../user-guide.md#ocr-status-indicators)
4. [Troubleshoot OCR](../dev/OCR_OPTIMIZATION_GUIDE.md)
#### Search & Organization
1. [Basic search](../user-guide.md#search-features)
2. [Advanced search syntax](../advanced-search.md)
3. [Create labels](../labels-and-organization.md)
4. [Save searches](../user-guide.md#smart-collections)
#### Integration & Automation
1. [Set up sources](../sources-guide.md)
2. [Configure watch folders](../WATCH_FOLDER.md)
3. [Use the API](../api-reference.md)
4. [Automate workflows](../api-reference.md#automation-examples)
## Feature Highlights
### Document Intelligence
- **OCR in 100+ Languages**: Process documents in virtually any language
- **Format Support**: PDF, images, Office documents, and text files
- **Batch Processing**: Handle thousands of documents efficiently
- **Quality Enhancement**: Automatic rotation, deskewing, and preprocessing
### Search Capabilities
- **Full-Text Search**: Search within document content
- **Boolean Logic**: Complex queries with AND, OR, NOT
- **Fuzzy Matching**: Handle OCR errors and typos
- **Filters**: By date, type, size, labels, and more
### Organization Tools
- **Flexible Labels**: Create custom categorization systems
- **Bulk Operations**: Apply changes to multiple documents
- **Smart Collections**: Saved searches that update automatically
- **Multiple Views**: List and grid layouts
### Integration Options
- **REST API**: Complete programmatic access
- **Source Sync**: WebDAV, S3, local folders
- **SSO/OIDC**: Enterprise authentication
- **Webhooks**: Event-driven automation
## Best Practices
### Document Organization
1. **Consistent Naming**: Use descriptive, standardized file names
2. **Label Strategy**: Create a hierarchical label structure
3. **Regular Cleanup**: Archive or remove outdated documents
4. **Folder Structure**: Organize source folders logically
### Performance Optimization
1. **OCR Settings**: Balance quality vs. speed for your needs
2. **Concurrent Jobs**: Match to available CPU cores
3. **Storage Backend**: Use S3 for large collections
4. **Search Indexing**: Schedule reindexing during off-hours
### Security
1. **Change Defaults**: Always change default passwords
2. **Enable HTTPS**: Use SSL/TLS in production
3. **Regular Backups**: Automate database backups
4. **Access Control**: Use roles and permissions appropriately
### Workflow Efficiency
1. **Bulk Upload**: Process similar documents together
2. **Automation**: Set up sources for automatic import
3. **Saved Searches**: Create shortcuts for common queries
4. **Keyboard Shortcuts**: Learn shortcuts for faster navigation
## Troubleshooting Resources
### Common Issues
- [OCR not starting](../user-guide.md#common-issues)
- [Search not finding documents](../advanced-search.md#troubleshooting)
- [Slow performance](../dev/OCR_OPTIMIZATION_GUIDE.md)
- [Upload failures](../file-upload-guide.md#troubleshooting)
### Getting Help
- **Documentation Search**: Use the search bar above
- **GitHub Issues**: [Report bugs](https://github.com/readur/readur/issues)
- **Community Forum**: [Ask questions](https://github.com/readur/readur/discussions)
- **System Logs**: Check logs for detailed error information
## Version Information
This documentation covers Readur version 2.5.4 and later. Key features in recent versions:
### Version 2.5.4
- S3 storage backend support
- Enhanced source synchronization
- Per-user watch directories
- Improved health monitoring
### Version 2.5.0
- OIDC/SSO authentication
- Advanced search operators
- Bulk operations
- Performance improvements
## Next Steps
### New Users
1. Start with the [Quick Start Guide](../getting-started/quickstart.md)
2. Read the [User Guide](../user-guide.md)
3. Learn about [Search Features](../advanced-search.md)
### Administrators
1. Review [Configuration Options](../configuration-reference.md)
2. Set up [Monitoring](../health-monitoring-guide.md)
3. Plan [Backup Strategy](../deployment.md#backup-strategy)
### Advanced Users
1. Explore [API Integration](../api-reference.md)
2. Configure [Sources](../sources-guide.md)
3. Optimize [OCR Performance](../dev/OCR_OPTIMIZATION_GUIDE.md)

136
docs/index.md Normal file
View File

@ -0,0 +1,136 @@
# Readur Documentation
## Intelligent Document Management with OCR
Readur is a powerful document management system that transforms your unstructured documents into a searchable, organized knowledge base. Built for teams and individuals who need to efficiently manage, search, and access large document collections.
## Key Capabilities
<div class="grid cards" markdown>
- **[Getting Started](getting-started/installation.md)**
Deploy Readur with Docker in minutes - single command setup
- **[User Guide](user-guide.md)**
Master document management, OCR processing, and search features
- **[API Reference](api-reference.md)**
Automate workflows with REST API integration
- **[Deployment Guide](deployment.md)**
Production deployment with SSL, monitoring, and scaling
</div>
## Core Features
### Document Processing
- **Automatic OCR**: Extract text from scanned PDFs and images in 100+ languages
- **Multiple Formats**: Support for PDF, PNG, JPG, TIFF, TXT, and Office documents
- **Batch Processing**: Upload and process hundreds of documents simultaneously
- **Smart Queue**: Priority-based OCR processing with configurable concurrency
### Search & Discovery
- **Full-Text Search**: Find content within documents instantly
- **Advanced Filtering**: Search by date, type, size, labels, and OCR status
- **Boolean Operators**: Complex queries with AND, OR, NOT logic
- **Fuzzy Matching**: Handle typos and OCR errors automatically
### Organization
- **Flexible Labels**: Create custom taxonomies for document categorization
- **Bulk Operations**: Apply changes to multiple documents at once
- **Smart Collections**: Saved searches that update automatically
- **Multiple Views**: List and grid layouts with customizable sorting
### Integration & Automation
- **Source Synchronization**: Auto-import from WebDAV, S3, and local folders
- **REST API**: Complete programmatic access for automation
- **Watch Folders**: Monitor directories for automatic document ingestion
- **SSO Support**: Enterprise authentication with OIDC/OAuth2
## Quick Start Example
```bash
# 1. Clone the repository
git clone https://github.com/readur/readur.git
cd readur
# 2. Start with Docker Compose
docker-compose up -d
# 3. Access the interface
open http://localhost:8000
# Default credentials: admin / readur2024
```
## Common Use Cases
### Digital Archives
Convert paper documents into searchable digital archives. Perfect for:
- Legal firms managing contracts and case files
- Medical practices digitizing patient records
- Government agencies preserving historical documents
- Libraries and research institutions
### Business Document Management
Streamline document workflows and compliance:
- Invoice and receipt processing
- Contract management and search
- Policy and procedure documentation
- Compliance document tracking
### Personal Knowledge Base
Organize personal documents and research:
- Academic papers and research notes
- Tax documents and financial records
- Technical documentation and manuals
- Personal correspondence and archives
## System Architecture
```mermaid
graph LR
A[Document Upload] --> B[OCR Queue]
B --> C[Text Extraction]
C --> D[PostgreSQL Database]
D --> E[Full-Text Search]
F[Source Sync] --> B
G[Watch Folders] --> B
H[API] --> D
```
## Performance & Scalability
- **Concurrent Processing**: Handle multiple OCR jobs in parallel
- **Efficient Storage**: S3-compatible backend for unlimited scaling
- **PostgreSQL**: Enterprise-grade database with full-text search
- **Docker Native**: Container-based architecture for easy deployment
- **Resource Management**: Configurable memory and CPU limits
## Getting Help
### Documentation
- [User Guide](user-guide.md) - Complete feature documentation
- [Configuration Reference](configuration-reference.md) - All environment variables
- [Troubleshooting](s3-troubleshooting.md) - Common issues and solutions
- [Migration Guide](migration-guide.md) - Upgrade and migration procedures
### Community & Support
- [GitHub Issues](https://github.com/readur/readur/issues) - Bug reports and feature requests
- [GitHub Discussions](https://github.com/readur/readur/discussions) - Community help
- [Developer Documentation](dev/) - Architecture and development setup
## Latest Updates
### Version 2.5.4
- S3 storage backend support for unlimited scaling
- Enhanced source synchronization with health monitoring
- Improved OCR performance and accuracy
- OIDC/SSO authentication support
- Advanced search with boolean operators
---
!!! tip "Production Ready"
Readur is actively used in production environments processing millions of documents. See our [Deployment Guide](deployment.md) for best practices.

97
docs/javascripts/extra.js Normal file
View File

@ -0,0 +1,97 @@
// Custom JavaScript for Readur documentation
// Add copy button to code blocks
document.addEventListener('DOMContentLoaded', function() {
// Initialize copy buttons for code blocks (if not already handled by theme)
const codeBlocks = document.querySelectorAll('pre > code');
codeBlocks.forEach(function(codeBlock) {
// Check if copy button already exists
if (codeBlock.parentElement.querySelector('.copy-button')) {
return;
}
const button = document.createElement('button');
button.className = 'copy-button';
button.textContent = 'Copy';
button.setAttribute('aria-label', 'Copy code to clipboard');
button.addEventListener('click', function() {
const code = codeBlock.textContent;
navigator.clipboard.writeText(code).then(function() {
button.textContent = 'Copied!';
setTimeout(function() {
button.textContent = 'Copy';
}, 2000);
}).catch(function(err) {
console.error('Failed to copy code: ', err);
});
});
codeBlock.parentElement.style.position = 'relative';
codeBlock.parentElement.appendChild(button);
});
// Smooth scroll for anchor links
document.querySelectorAll('a[href^="#"]').forEach(anchor => {
anchor.addEventListener('click', function(e) {
const href = this.getAttribute('href');
if (href !== '#' && href !== '#!') {
e.preventDefault();
const target = document.querySelector(href);
if (target) {
target.scrollIntoView({
behavior: 'smooth',
block: 'start'
});
}
}
});
});
// Add external link indicators
const externalLinks = document.querySelectorAll('a[href^="http"]:not([href*="readur.app"])');
externalLinks.forEach(link => {
link.setAttribute('target', '_blank');
link.setAttribute('rel', 'noopener noreferrer');
link.classList.add('external-link');
});
// Track documentation page views (if analytics enabled)
if (typeof gtag !== 'undefined') {
gtag('event', 'page_view', {
page_title: document.title,
page_location: window.location.href,
page_path: window.location.pathname
});
}
});
// Add keyboard shortcuts
document.addEventListener('keydown', function(e) {
// Ctrl/Cmd + K for search
if ((e.ctrlKey || e.metaKey) && e.key === 'k') {
e.preventDefault();
const searchInput = document.querySelector('.md-search__input');
if (searchInput) {
searchInput.focus();
}
}
// Escape to close search
if (e.key === 'Escape') {
const searchInput = document.querySelector('.md-search__input');
if (searchInput && document.activeElement === searchInput) {
searchInput.blur();
}
}
});
// Custom console message
console.log(
'%c Welcome to Readur Documentation! ',
'background: #4051b5; color: white; padding: 5px 10px; border-radius: 3px;'
);
console.log(
'Found an issue? Report it at https://github.com/readur/readur/issues'
);

View File

@ -573,7 +573,7 @@ Sources are continuously monitored and assigned health scores (0-100):
## Next Steps ## Next Steps
- Configure [notifications](notifications.md) for sync events - Configure [notifications](notifications-guide.md) for sync events
- Set up [advanced search](advanced-search.md) to find synced documents - Set up [advanced search](advanced-search.md) to find synced documents
- Review [OCR optimization](dev/OCR_OPTIMIZATION_GUIDE.md) for processing improvements - Review [OCR optimization](dev/OCR_OPTIMIZATION_GUIDE.md) for processing improvements
- Explore [labels and organization](labels-and-organization.md) for document management - Explore [labels and organization](labels-and-organization.md) for document management

119
docs/stylesheets/extra.css Normal file
View File

@ -0,0 +1,119 @@
/* Custom styles for Readur documentation */
/* Brand colors */
:root {
--readur-primary: #4051b5;
--readur-accent: #526cfe;
}
/* Grid cards for homepage */
.grid.cards {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 1rem;
margin: 2rem 0;
}
.grid.cards > * {
padding: 1rem;
border: 1px solid var(--md-default-fg-color--lightest);
border-radius: 0.5rem;
transition: all 0.3s ease;
}
.grid.cards > *:hover {
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
transform: translateY(-2px);
}
/* Custom admonitions */
.md-typeset .admonition.example,
.md-typeset details.example {
border-color: rgb(43, 155, 70);
}
.md-typeset .example > .admonition-title,
.md-typeset .example > summary {
background-color: rgba(43, 155, 70, 0.1);
border-color: rgb(43, 155, 70);
}
.md-typeset .example > .admonition-title::before,
.md-typeset .example > summary::before {
background-color: rgb(43, 155, 70);
-webkit-mask-image: var(--md-admonition-icon--example);
mask-image: var(--md-admonition-icon--example);
}
/* Code block enhancements */
.md-typeset pre > code {
font-size: 0.85rem;
}
/* Better table styling */
.md-typeset table:not([class]) {
font-size: 0.9rem;
}
.md-typeset table:not([class]) th {
background-color: var(--md-primary-fg-color);
color: var(--md-primary-bg-color);
}
/* Responsive images */
.md-typeset img {
max-width: 100%;
height: auto;
}
/* Custom badges */
.badge {
display: inline-block;
padding: 0.25em 0.5em;
font-size: 0.75rem;
font-weight: 600;
line-height: 1;
text-align: center;
white-space: nowrap;
vertical-align: baseline;
border-radius: 0.25rem;
}
.badge-new {
background-color: #28a745;
color: white;
}
.badge-beta {
background-color: #ffc107;
color: #333;
}
.badge-deprecated {
background-color: #dc3545;
color: white;
}
/* Improve search results */
.md-search-result__meta {
color: var(--md-default-fg-color--light);
font-size: 0.75rem;
}
/* Custom footer */
.md-footer-meta__inner {
display: flex;
justify-content: space-between;
align-items: center;
}
/* Announcement bar */
.md-banner {
background-color: var(--readur-accent);
color: white;
}
.md-banner a {
color: white;
text-decoration: underline;
}

250
mkdocs.yml Normal file
View File

@ -0,0 +1,250 @@
# MkDocs configuration for readur.app documentation
site_name: Readur Documentation
site_url: https://readur.app
site_description: Intelligent Document Management System with OCR - Transform your documents into a searchable knowledge base
site_author: Readur Team
# Repository information (optional, adds edit links)
repo_name: readur/readur
repo_url: https://github.com/readur/readur
edit_uri: edit/main/docs/
# Copyright
copyright: Copyright &copy; 2025 Readur
# Theme configuration
theme:
name: material
# Color scheme
palette:
# Light mode
- media: "(prefers-color-scheme: light)"
scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Dark mode
- media: "(prefers-color-scheme: dark)"
scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
# Font configuration
font:
text: Roboto
code: Roboto Mono
# Features
features:
- announce.dismiss
- content.action.edit
- content.action.view
- content.code.annotate
- content.code.copy
- content.tooltips
- navigation.footer
- navigation.indexes
- navigation.instant
- navigation.instant.prefetch
- navigation.instant.progress
- navigation.path
- navigation.prune
- navigation.sections
- navigation.tabs
- navigation.tabs.sticky
- navigation.top
- navigation.tracking
- search.highlight
- search.share
- search.suggest
- toc.follow
- toc.integrate
# Icons
icon:
logo: material/book-open-page-variant
repo: fontawesome/brands/github
# Plugins
plugins:
- search:
separator: '[\s\-,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
lang:
- en
- minify:
minify_html: true
minify_js: true
minify_css: true
htmlmin_opts:
remove_comments: true
- git-revision-date-localized:
enable_creation_date: true
type: iso_datetime
fallback_to_build_date: true
# Extensions
markdown_extensions:
# Python Markdown
- abbr
- admonition
- attr_list
- def_list
- footnotes
- md_in_html
- toc:
permalink: true
permalink_title: Anchor link to this section for reference
# Python Markdown Extensions
- pymdownx.arithmatex:
generic: true
- pymdownx.betterem:
smart_enable: all
- pymdownx.caret
- pymdownx.details
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- pymdownx.highlight:
anchor_linenums: true
line_spans: __span
pygments_lang_class: true
- pymdownx.inlinehilite
- pymdownx.keys
- pymdownx.mark
- pymdownx.smartsymbols
- pymdownx.snippets
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.tabbed:
alternate_style: true
combine_header_slug: true
- pymdownx.tasklist:
custom_checkbox: true
- pymdownx.tilde
# Extra CSS and JavaScript
extra_css:
- stylesheets/extra.css
extra_javascript:
- javascripts/extra.js
# MathJax for mathematical notation (optional)
- javascripts/mathjax.js
- https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
# Extra configuration
extra:
# Social links
social:
- icon: fontawesome/brands/github
link: https://github.com/readur/readur
- icon: fontawesome/brands/twitter
link: https://twitter.com/readur
- icon: fontawesome/brands/discord
link: https://discord.gg/readur
# Analytics (optional)
analytics:
provider: google
property: G-XXXXXXXXXX # Replace with your Google Analytics ID
feedback:
title: Was this page helpful?
ratings:
- icon: material/emoticon-happy-outline
name: This page was helpful
data: 1
note: >-
Thanks for your feedback!
- icon: material/emoticon-sad-outline
name: This page could be improved
data: 0
note: >-
Thanks for your feedback! Help us improve this page by
<a href="https://github.com/readur/readur/issues/new/?title=[Feedback]+{title}+-+{url}" target="_blank" rel="noopener">opening an issue</a>.
# Versioning (optional)
version:
provider: mike
default: stable
# Cookie consent (optional)
consent:
title: Cookie consent
description: >-
We use cookies to recognize your repeated visits and preferences, as well
as to measure the effectiveness of our documentation and whether users
find what they're searching for. With your consent, you're helping us to
make our documentation better.
# Navigation structure
nav:
- Home: index.md
- Getting Started:
- Installation Guide: getting-started/installation.md
- Quick Start (5 min): getting-started/quickstart.md
- Configuration: getting-started/configuration.md
- User Guide:
- Overview: guide/overview.md
- Complete Guide: user-guide.md
- Labels & Organization: labels-and-organization.md
- Features:
- Advanced Search: advanced-search.md
- Sources & Sync: sources-guide.md
- File Upload: file-upload-guide.md
- Multi-Language OCR: multi-language-ocr-guide.md
- Analytics Dashboard: analytics-dashboard-guide.md
- Notifications: notifications-guide.md
- Administration:
- Deployment: deployment.md
- User Management: user-management-guide.md
- OIDC/SSO Setup: oidc-setup.md
- Health Monitoring: health-monitoring-guide.md
- Storage Migration: administration/storage-migration.md
- CLI Tools: administration/cli-tools.md
- Storage & Backend:
- S3 Storage Guide: s3-storage-guide.md
- S3 Troubleshooting: s3-troubleshooting.md
- Watch Folders: WATCH_FOLDER.md
- Per-User Directories: per-user-watch-directories.md
- Migration Guide: migration-guide.md
- API & Integration:
- API Reference: api-reference.md
- Swagger UI: swagger-ui-guide.md
- Reverse Proxy: REVERSE_PROXY.md
- Configuration Reference: configuration-reference.md
- Development:
- Developer Guide: dev/README.md
- Architecture: dev/architecture.md
- Development Setup: dev/development.md
- Testing: dev/TESTING.md
- E2E Testing: dev/README-E2E.md
- Test Infrastructure: dev/test-infrastructure.md
- Optimization:
- OCR Optimization: dev/OCR_OPTIMIZATION_GUIDE.md
- Queue Architecture: dev/QUEUE_IMPROVEMENTS.md
- Database Guardrails: dev/DATABASE_GUARDRAILS.md
- Error System: dev/ERROR_SYSTEM.md
- Label Testing: dev/LABEL_TESTING.md
- Troubleshooting:
- Migration Issues: administration/migration-troubleshooting.md
- S3 Issues: s3-troubleshooting.md

20
requirements.txt Normal file
View File

@ -0,0 +1,20 @@
# MkDocs and Material theme requirements
mkdocs>=1.6.0
mkdocs-material>=9.5.0
mkdocs-material-extensions>=1.3.0
# Essential plugins
mkdocs-minify-plugin>=0.8.0
mkdocs-git-revision-date-localized-plugin>=1.2.0
# Optional but recommended plugins
mkdocs-redirects>=1.2.0
mkdocs-rss-plugin>=1.12.0
mkdocs-glightbox>=0.3.0
# For advanced features
pillow>=10.0.0 # For social cards
cairosvg>=2.7.0 # For social cards
# Search enhancements
mkdocs-material[imaging]>=9.5.0