{error}
- - )} - > - )} - +{error}
+diff --git a/.env.example b/.env.example index 4e21839..5f0300d 100644 --- a/.env.example +++ b/.env.example @@ -17,6 +17,15 @@ CORS_ORIGINS=http://localhost:3000 # Upload Configuration MAX_UPLOAD_SIZE_MB=100 +# PostgreSQL Configuration +POSTGRES_USER=ocr_user +POSTGRES_PASSWORD=ocr_password +POSTGRES_DB=ocr_db +DATABASE_URL=postgresql://ocr_user:ocr_password@postgres:5432/ocr_db + +# OCR Image Storage (host path mounted into container) +OCR_IMAGES_DIR=/data/ocr_images + # Processing Configuration BASE_SIZE=1024 IMAGE_SIZE=640 diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..e04068b --- /dev/null +++ b/backend/database.py @@ -0,0 +1,71 @@ +import os +import psycopg2 +import psycopg2.extras +from contextlib import contextmanager +from decouple import config as env_config + +DATABASE_URL = env_config( + "DATABASE_URL", + default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db" +) + + +def _get_conn(): + return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor) + + +def init_db(): + """Create tables if they don't exist. Called once at startup.""" + conn = None + try: + conn = _get_conn() + with conn.cursor() as cur: + cur.execute(""" + CREATE TABLE IF NOT EXISTS ocr_jobs ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + author TEXT, + book TEXT, + chapter TEXT, + page TEXT, + submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + image_path TEXT NOT NULL, + original_filename TEXT, + ocr_text TEXT, + status TEXT NOT NULL DEFAULT 'unreviewed', + reviewed_text TEXT, + reviewer_name TEXT, + reviewed_at TIMESTAMPTZ, + mode TEXT + ) + """) + # Index for fast full-text-style searches on common fields + cur.execute(""" + CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status) + """) + cur.execute(""" + CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC) + """) + conn.commit() + print("Database initialized.") + except Exception as exc: + print(f"Database init failed: {exc}") + if conn: + conn.rollback() + raise + finally: + if conn: + conn.close() + + +@contextmanager +def get_db(): + """Yield a connection and auto-commit/rollback.""" + conn = _get_conn() + try: + yield conn + conn.commit() + except Exception: + conn.rollback() + raise + finally: + conn.close() diff --git a/backend/main.py b/backend/main.py index 7283956..59b4fe8 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,14 +1,17 @@ import os import re +import uuid import tempfile import shutil import base64 from typing import List, Dict, Any, Optional from contextlib import asynccontextmanager +from datetime import datetime, timezone -from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.responses import JSONResponse, StreamingResponse, FileResponse +from pydantic import BaseModel import torch from transformers import AutoModel, AutoTokenizer from PIL import Image @@ -24,6 +27,9 @@ from pdf_utils import ( clean_markdown_content ) from format_converter import DocumentConverter +from database import init_db, get_db + +OCR_IMAGES_DIR = env_config("OCR_IMAGES_DIR", default="/data/ocr_images") # ----------------------------- # Lifespan context for model loading @@ -36,6 +42,15 @@ async def lifespan(app: FastAPI): """Load model on startup, cleanup on shutdown""" global model, tokenizer + # Image storage directory + os.makedirs(OCR_IMAGES_DIR, exist_ok=True) + + # Database + try: + init_db() + except Exception as exc: + print(f"Warning: database initialization failed: {exc}") + # Environment setup os.environ.pop("TRANSFORMERS_CACHE", None) MODEL_NAME = env_config("MODEL_NAME", default="deepseek-ai/DeepSeek-OCR") @@ -581,6 +596,238 @@ async def process_pdf( print(traceback.format_exc()) raise HTTPException(status_code=500, detail="An internal error occurred during PDF processing.") +# ----------------------------- +# Job management routes +# ----------------------------- + +class ReviewRequest(BaseModel): + reviewed_text: str + reviewer_name: str + + +def _job_row_to_dict(row) -> Dict[str, Any]: + """Convert a DB row (RealDictRow) to a plain dict with serialisable values.""" + d = dict(row) + for key, val in d.items(): + if isinstance(val, datetime): + d[key] = val.isoformat() + elif val is not None and hasattr(val, '__str__') and type(val).__name__ == 'UUID': + d[key] = str(val) + return d + + +@app.post("/api/jobs") +async def commit_job( + image: UploadFile = File(...), + author: str = Form(""), + book: str = Form(""), + chapter: str = Form(""), + page: str = Form(""), + ocr_text: str = Form(""), + mode: str = Form("plain_ocr"), +): + """Commit an OCR job: save the image and insert a DB record.""" + job_id = str(uuid.uuid4()) + + # Determine file extension from original filename or content type + original_filename = image.filename or "image" + ext = os.path.splitext(original_filename)[1].lower() + if not ext: + ct = (image.content_type or "").lower() + ext_map = { + "image/png": ".png", "image/jpeg": ".jpg", "image/jpg": ".jpg", + "image/webp": ".webp", "image/gif": ".gif", "image/bmp": ".bmp", + } + ext = ext_map.get(ct, ".png") + + image_path = os.path.join(OCR_IMAGES_DIR, f"{job_id}{ext}") + + try: + content = await image.read() + with open(image_path, "wb") as f: + f.write(content) + except Exception as exc: + raise HTTPException(status_code=500, detail="Failed to save image file.") + + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute( + """ + INSERT INTO ocr_jobs + (id, author, book, chapter, page, image_path, original_filename, + ocr_text, mode, status) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, 'unreviewed') + RETURNING * + """, + (job_id, author or None, book or None, chapter or None, + page or None, image_path, original_filename, + ocr_text or None, mode), + ) + row = cur.fetchone() + except Exception as exc: + # Clean up saved image if DB insert fails + try: + os.remove(image_path) + except Exception: + pass + print(f"Job commit DB error: {exc}") + raise HTTPException(status_code=500, detail="Failed to save job to database.") + + return JSONResponse(_job_row_to_dict(row), status_code=201) + + +@app.get("/api/jobs") +async def list_jobs( + search: Optional[str] = Query(None, description="General text search across all fields"), + author: Optional[str] = Query(None), + book: Optional[str] = Query(None), + chapter: Optional[str] = Query(None), + status: Optional[str] = Query(None, description="unreviewed | reviewed"), + limit: int = Query(20, ge=1, le=200), + offset: int = Query(0, ge=0), +): + """Search and list jobs. All filters are optional and combinable.""" + conditions = [] + params: List[Any] = [] + + if search: + conditions.append( + "(author ILIKE %s OR book ILIKE %s OR chapter ILIKE %s " + "OR page ILIKE %s OR ocr_text ILIKE %s OR reviewer_name ILIKE %s)" + ) + like = f"%{search}%" + params.extend([like, like, like, like, like, like]) + + if author: + conditions.append("author ILIKE %s") + params.append(f"%{author}%") + + if book: + conditions.append("book ILIKE %s") + params.append(f"%{book}%") + + if chapter: + conditions.append("chapter ILIKE %s") + params.append(f"%{chapter}%") + + if status: + conditions.append("status = %s") + params.append(status) + + where = ("WHERE " + " AND ".join(conditions)) if conditions else "" + + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute( + f"SELECT COUNT(*) AS total FROM ocr_jobs {where}", + params, + ) + total = cur.fetchone()["total"] + + cur.execute( + f""" + SELECT id, author, book, chapter, page, submitted_at, status, + reviewer_name, reviewed_at, mode, original_filename + FROM ocr_jobs {where} + ORDER BY submitted_at DESC + LIMIT %s OFFSET %s + """, + params + [limit, offset], + ) + rows = [_job_row_to_dict(r) for r in cur.fetchall()] + except Exception as exc: + print(f"list_jobs DB error: {exc}") + raise HTTPException(status_code=500, detail="Database error.") + + return JSONResponse({"total": total, "limit": limit, "offset": offset, "jobs": rows}) + + +@app.get("/api/jobs/{job_id}") +async def get_job(job_id: str): + """Retrieve full job record including OCR text.""" + try: + uuid.UUID(job_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid job ID.") + + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute("SELECT * FROM ocr_jobs WHERE id = %s", (job_id,)) + row = cur.fetchone() + except Exception as exc: + print(f"get_job DB error: {exc}") + raise HTTPException(status_code=500, detail="Database error.") + + if not row: + raise HTTPException(status_code=404, detail="Job not found.") + + return JSONResponse(_job_row_to_dict(row)) + + +@app.get("/api/jobs/{job_id}/image") +async def get_job_image(job_id: str): + """Serve the stored image for a job.""" + try: + uuid.UUID(job_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid job ID.") + + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute("SELECT image_path FROM ocr_jobs WHERE id = %s", (job_id,)) + row = cur.fetchone() + except Exception as exc: + print(f"get_job_image DB error: {exc}") + raise HTTPException(status_code=500, detail="Database error.") + + if not row: + raise HTTPException(status_code=404, detail="Job not found.") + + path = row["image_path"] + if not os.path.isfile(path): + raise HTTPException(status_code=404, detail="Image file not found on disk.") + + return FileResponse(path) + + +@app.put("/api/jobs/{job_id}/review") +async def review_job(job_id: str, body: ReviewRequest): + """Mark a job as reviewed with the corrected text and reviewer name.""" + try: + uuid.UUID(job_id) + except ValueError: + raise HTTPException(status_code=400, detail="Invalid job ID.") + + try: + with get_db() as conn: + with conn.cursor() as cur: + cur.execute( + """ + UPDATE ocr_jobs + SET status = 'reviewed', + reviewed_text = %s, + reviewer_name = %s, + reviewed_at = NOW() + WHERE id = %s + RETURNING * + """, + (body.reviewed_text, body.reviewer_name, job_id), + ) + row = cur.fetchone() + except Exception as exc: + print(f"review_job DB error: {exc}") + raise HTTPException(status_code=500, detail="Database error.") + + if not row: + raise HTTPException(status_code=404, detail="Job not found.") + + return JSONResponse(_job_row_to_dict(row)) + + if __name__ == "__main__": host = env_config("API_HOST", default="0.0.0.0") port = env_config("API_PORT", default=8000, cast=int) diff --git a/backend/requirements.txt b/backend/requirements.txt index 49259d1..2eaad54 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -15,3 +15,4 @@ PyMuPDF>=1.23.0 img2pdf>=0.5.0 python-docx>=1.1.0 markdown>=3.5.0 +psycopg2-binary>=2.9.0 diff --git a/docker-compose.yml b/docker-compose.yml index ae5946d..0ba5d28 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,19 @@ services: + postgres: + image: postgres:16-alpine + container_name: deepseek-ocr-postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-ocr_user} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-ocr_password} + POSTGRES_DB: ${POSTGRES_DB:-ocr_db} + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-ocr_user} -d ${POSTGRES_DB:-ocr_db}"] + interval: 5s + timeout: 5s + retries: 10 + backend: build: ./backend container_name: deepseek-ocr-backend @@ -10,8 +25,14 @@ services: API_HOST: ${API_HOST:-0.0.0.0} API_PORT: ${API_PORT:-8000} MAX_UPLOAD_SIZE_MB: ${MAX_UPLOAD_SIZE_MB:-100} + DATABASE_URL: ${DATABASE_URL:-postgresql://ocr_user:ocr_password@postgres:5432/ocr_db} + OCR_IMAGES_DIR: ${OCR_IMAGES_DIR:-/data/ocr_images} volumes: - ./models:/models + - ./ocr_images:/data/ocr_images + depends_on: + postgres: + condition: service_healthy deploy: resources: reservations: @@ -22,8 +43,6 @@ services: shm_size: "4g" ports: - "${API_PORT:-8000}:${API_PORT:-8000}" - networks: - - ocr-network frontend: build: ./frontend @@ -32,9 +51,10 @@ services: - "${FRONTEND_PORT:-3000}:80" depends_on: - backend - networks: - - ocr-network + +volumes: + postgres_data: networks: - ocr-network: - driver: bridge + default: + name: rw-research diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 6fe19cb..39ca412 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,16 +1,21 @@ import { useState, useCallback } from 'react' import { motion, AnimatePresence } from 'framer-motion' -import { Sparkles, Zap, Loader2, Settings, Image as ImageIcon, FileText } from 'lucide-react' +import { Sparkles, Zap, Loader2, Settings, Image as ImageIcon, FileText, Layers } from 'lucide-react' import ImageUpload from './components/ImageUpload' import ModeSelector from './components/ModeSelector' import ResultPanel from './components/ResultPanel' import AdvancedSettings from './components/AdvancedSettings' import PDFProcessor from './components/PDFProcessor' +import MetadataForm from './components/MetadataForm' +import JobsPanel from './components/JobsPanel' import axios from 'axios' const API_BASE = import.meta.env.VITE_API_URL || '/api' function App() { + const [view, setView] = useState('new_job') // 'new_job' | 'jobs' + + // OCR state const [mode, setMode] = useState('plain_ocr') const [fileType, setFileType] = useState('image') // 'image' or 'pdf' const [image, setImage] = useState(null) @@ -20,7 +25,7 @@ function App() { const [error, setError] = useState(null) const [showAdvanced, setShowAdvanced] = useState(false) const [includeCaption, setIncludeCaption] = useState(false) - + // Form state const [prompt, setPrompt] = useState('') const [findTerm, setFindTerm] = useState('') @@ -31,12 +36,16 @@ function App() { test_compress: false }) + // Job metadata + const [metadata, setMetadata] = useState({ author: '', book: '', chapter: '', page: '' }) + + // Job commit state + const [commitLoading, setCommitLoading] = useState(false) + const [commitResult, setCommitResult] = useState(null) + const handleFileTypeChange = useCallback((newType) => { - // Clear current file when switching types setImage(null) - if (imagePreview) { - URL.revokeObjectURL(imagePreview) - } + if (imagePreview) URL.revokeObjectURL(imagePreview) setImagePreview(null) setError(null) setResult(null) @@ -45,24 +54,17 @@ function App() { const handleImageSelect = useCallback((file) => { if (file === null) { - // Clear everything when removing image setImage(null) - if (imagePreview && fileType === 'image') { - URL.revokeObjectURL(imagePreview) - } + if (imagePreview && fileType === 'image') URL.revokeObjectURL(imagePreview) setImagePreview(null) setError(null) setResult(null) } else { setImage(file) - // Only create preview URL for images, not PDFs - if (fileType === 'image') { - setImagePreview(URL.createObjectURL(file)) - } else { - setImagePreview(file) // Just store the file for PDFs - } + setImagePreview(fileType === 'image' ? URL.createObjectURL(file) : file) setError(null) setResult(null) + setCommitResult(null) } }, [imagePreview, fileType]) @@ -71,16 +73,15 @@ function App() { setError('Please upload an image first') return } - setLoading(true) setError(null) + setCommitResult(null) try { const formData = new FormData() formData.append('image', image) formData.append('mode', mode) formData.append('prompt', prompt) - // Enable grounding only for find mode formData.append('grounding', mode === 'find_ref') formData.append('include_caption', includeCaption) formData.append('find_term', findTerm) @@ -91,11 +92,8 @@ function App() { formData.append('test_compress', advancedSettings.test_compress) const response = await axios.post(`${API_BASE}/ocr`, formData, { - headers: { - 'Content-Type': 'multipart/form-data', - }, + headers: { 'Content-Type': 'multipart/form-data' }, }) - setResult(response.data) } catch (err) { setError(err.response?.data?.detail || err.message || 'An error occurred') @@ -104,23 +102,38 @@ function App() { } } - const handleCopy = useCallback(() => { - if (result?.text) { - navigator.clipboard.writeText(result.text) + const handleCommitJob = useCallback(async () => { + if (!image || !result?.text) return + setCommitLoading(true) + setCommitResult(null) + try { + const formData = new FormData() + formData.append('image', image) + formData.append('author', metadata.author) + formData.append('book', metadata.book) + formData.append('chapter', metadata.chapter) + formData.append('page', metadata.page) + formData.append('ocr_text', result.text) + formData.append('mode', mode) + + const response = await axios.post(`${API_BASE}/jobs`, formData, { + headers: { 'Content-Type': 'multipart/form-data' }, + }) + setCommitResult({ success: true, job: response.data }) + } catch (err) { + setCommitResult({ success: false, error: err.response?.data?.detail || err.message }) + } finally { + setCommitLoading(false) } + }, [image, result, metadata, mode]) + + const handleCopy = useCallback(() => { + if (result?.text) navigator.clipboard.writeText(result.text) }, [result]) const handleDownload = useCallback(() => { if (!result?.text) return - - const extensions = { - plain_ocr: 'txt', - describe: 'txt', - find_ref: 'txt', - freeform: 'txt', - } - - const ext = extensions[mode] || 'txt' + const ext = { plain_ocr: 'txt', describe: 'txt', find_ref: 'txt', freeform: 'txt' }[mode] || 'txt' const blob = new Blob([result.text], { type: 'text/plain' }) const url = URL.createObjectURL(blob) const a = document.createElement('a') @@ -138,27 +151,13 @@ function App() {
{error}
- - )} - > - )} - +{error}
+