Files
rw-deepseek-ocr/backend/database.py
2026-06-19 17:47:53 +01:00

92 lines
3.0 KiB
Python

import os
import psycopg2
import psycopg2.extras
from contextlib import contextmanager
from decouple import config as env_config
DATABASE_URL = env_config(
"DATABASE_URL",
default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db"
)
def _get_conn():
return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor)
def init_db():
"""Create tables if they don't exist. Called once at startup."""
conn = None
try:
conn = _get_conn()
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS ocr_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
author TEXT,
book TEXT,
chapter TEXT,
page TEXT,
submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
image_path TEXT NOT NULL,
original_filename TEXT,
ocr_text TEXT,
status TEXT NOT NULL DEFAULT 'unreviewed',
reviewed_text TEXT,
reviewer_name TEXT,
reviewed_at TIMESTAMPTZ,
mode TEXT
)
""")
# Index for fast full-text-style searches on common fields
cur.execute("""
CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC)
""")
# Add columns introduced after initial schema (safe to run repeatedly)
cur.execute("""
ALTER TABLE ocr_jobs
ADD COLUMN IF NOT EXISTS describe_text TEXT
""")
cur.execute("""
ALTER TABLE ocr_jobs
ADD COLUMN IF NOT EXISTS freeform_text TEXT
""")
cur.execute("""
ALTER TABLE ocr_jobs
ADD COLUMN IF NOT EXISTS qdrant_synced_at TIMESTAMPTZ
""")
# Unique constraint: prevent duplicate (author, chapter, page) submissions.
# Applies only when all three fields are non-null.
cur.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS ocr_jobs_author_chapter_page_unique
ON ocr_jobs (author, chapter, page)
WHERE author IS NOT NULL AND chapter IS NOT NULL AND page IS NOT NULL
""")
conn.commit()
print("Database initialized.")
except Exception as exc:
print(f"Database init failed: {exc}")
if conn:
conn.rollback()
raise
finally:
if conn:
conn.close()
@contextmanager
def get_db():
"""Yield a connection and auto-commit/rollback."""
conn = _get_conn()
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()