92 lines
3.0 KiB
Python
92 lines
3.0 KiB
Python
import os
|
|
import psycopg2
|
|
import psycopg2.extras
|
|
from contextlib import contextmanager
|
|
from decouple import config as env_config
|
|
|
|
DATABASE_URL = env_config(
|
|
"DATABASE_URL",
|
|
default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db"
|
|
)
|
|
|
|
|
|
def _get_conn():
|
|
return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor)
|
|
|
|
|
|
def init_db():
|
|
"""Create tables if they don't exist. Called once at startup."""
|
|
conn = None
|
|
try:
|
|
conn = _get_conn()
|
|
with conn.cursor() as cur:
|
|
cur.execute("""
|
|
CREATE TABLE IF NOT EXISTS ocr_jobs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
author TEXT,
|
|
book TEXT,
|
|
chapter TEXT,
|
|
page TEXT,
|
|
submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
image_path TEXT NOT NULL,
|
|
original_filename TEXT,
|
|
ocr_text TEXT,
|
|
status TEXT NOT NULL DEFAULT 'unreviewed',
|
|
reviewed_text TEXT,
|
|
reviewer_name TEXT,
|
|
reviewed_at TIMESTAMPTZ,
|
|
mode TEXT
|
|
)
|
|
""")
|
|
# Index for fast full-text-style searches on common fields
|
|
cur.execute("""
|
|
CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status)
|
|
""")
|
|
cur.execute("""
|
|
CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC)
|
|
""")
|
|
# Add columns introduced after initial schema (safe to run repeatedly)
|
|
cur.execute("""
|
|
ALTER TABLE ocr_jobs
|
|
ADD COLUMN IF NOT EXISTS describe_text TEXT
|
|
""")
|
|
cur.execute("""
|
|
ALTER TABLE ocr_jobs
|
|
ADD COLUMN IF NOT EXISTS freeform_text TEXT
|
|
""")
|
|
cur.execute("""
|
|
ALTER TABLE ocr_jobs
|
|
ADD COLUMN IF NOT EXISTS qdrant_synced_at TIMESTAMPTZ
|
|
""")
|
|
# Unique constraint: prevent duplicate (author, chapter, page) submissions.
|
|
# Applies only when all three fields are non-null.
|
|
cur.execute("""
|
|
CREATE UNIQUE INDEX IF NOT EXISTS ocr_jobs_author_chapter_page_unique
|
|
ON ocr_jobs (author, chapter, page)
|
|
WHERE author IS NOT NULL AND chapter IS NOT NULL AND page IS NOT NULL
|
|
""")
|
|
conn.commit()
|
|
print("Database initialized.")
|
|
except Exception as exc:
|
|
print(f"Database init failed: {exc}")
|
|
if conn:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
|
|
@contextmanager
|
|
def get_db():
|
|
"""Yield a connection and auto-commit/rollback."""
|
|
conn = _get_conn()
|
|
try:
|
|
yield conn
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
conn.close()
|