Files
rw-deepseek-ocr/backend/database.py
Aaron Roberts 1d15b5f0c1 Add unique constraint to prevent duplicate (author, chapter, page) submissions
Adds a PostgreSQL partial unique index on (author, chapter, page) where all
three fields are non-null, and returns HTTP 409 when a duplicate is detected.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-09 18:19:54 +01:00

79 lines
2.5 KiB
Python

import os
import psycopg2
import psycopg2.extras
from contextlib import contextmanager
from decouple import config as env_config
DATABASE_URL = env_config(
"DATABASE_URL",
default="postgresql://ocr_user:ocr_password@postgres:5432/ocr_db"
)
def _get_conn():
return psycopg2.connect(DATABASE_URL, cursor_factory=psycopg2.extras.RealDictCursor)
def init_db():
"""Create tables if they don't exist. Called once at startup."""
conn = None
try:
conn = _get_conn()
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS ocr_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
author TEXT,
book TEXT,
chapter TEXT,
page TEXT,
submitted_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
image_path TEXT NOT NULL,
original_filename TEXT,
ocr_text TEXT,
status TEXT NOT NULL DEFAULT 'unreviewed',
reviewed_text TEXT,
reviewer_name TEXT,
reviewed_at TIMESTAMPTZ,
mode TEXT
)
""")
# Index for fast full-text-style searches on common fields
cur.execute("""
CREATE INDEX IF NOT EXISTS ocr_jobs_status_idx ON ocr_jobs(status)
""")
cur.execute("""
CREATE INDEX IF NOT EXISTS ocr_jobs_submitted_at_idx ON ocr_jobs(submitted_at DESC)
""")
# Unique constraint: prevent duplicate (author, chapter, page) submissions.
# Applies only when all three fields are non-null.
cur.execute("""
CREATE UNIQUE INDEX IF NOT EXISTS ocr_jobs_author_chapter_page_unique
ON ocr_jobs (author, chapter, page)
WHERE author IS NOT NULL AND chapter IS NOT NULL AND page IS NOT NULL
""")
conn.commit()
print("Database initialized.")
except Exception as exc:
print(f"Database init failed: {exc}")
if conn:
conn.rollback()
raise
finally:
if conn:
conn.close()
@contextmanager
def get_db():
"""Yield a connection and auto-commit/rollback."""
conn = _get_conn()
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()