Files
rw-deepseek-ocr/backend/test_security.py
Ray Dumasia 3dac0741b1 Fix RCE vulnerability and harden security
- Replace eval() with ast.literal_eval() in pdf_utils.py to fix
  unauthenticated remote code execution via crafted PDF uploads
  (reported by OX Security)
- Sanitize HTML output with DOMPurify to prevent XSS
- Restrict CORS origins (configurable via CORS_ORIGINS env var)
- Suppress raw exception details in API error responses
- Cap Image.MAX_IMAGE_PIXELS to prevent decompression bomb DoS
- Add security regression test suite

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 09:01:52 +01:00

151 lines
4.9 KiB
Python

"""
Security regression tests for the eval() RCE vulnerability (OX Security disclosure).
The vulnerability allowed arbitrary code execution via crafted OCR output
that was passed to eval() in parse_coordinates(). The fix uses ast.literal_eval()
which only allows literal data structures.
This test is self-contained and does not require backend dependencies.
Run: python test_security.py
"""
import ast
def parse_coordinates(ref_text, image_width, image_height):
"""
Minimal reproduction of pdf_utils.parse_coordinates using the patched code.
This mirrors the fixed version that uses ast.literal_eval() instead of eval().
"""
try:
label_type = ref_text[1]
cor_list = ast.literal_eval(ref_text[2])
scaled_boxes = []
for points in cor_list:
x1, y1, x2, y2 = points
scaled_box = [
int(x1 / 999 * image_width),
int(y1 / 999 * image_height),
int(x2 / 999 * image_width),
int(y2 / 999 * image_height)
]
scaled_boxes.append(scaled_box)
return {
'label': label_type,
'boxes': scaled_boxes
}
except Exception as e:
print(f" [Blocked] {type(e).__name__}: {e}")
return None
def test_legitimate_coordinates():
"""Verify that normal coordinate parsing still works."""
ref_text = ("full_match", "text", "[[312, 339, 480, 681]]")
result = parse_coordinates(ref_text, 1000, 1000)
assert result is not None, "Legitimate coordinates should parse successfully"
assert result['label'] == 'text'
assert len(result['boxes']) == 1
print("PASS: Legitimate coordinates parse correctly")
def test_multiple_boxes():
"""Verify multiple bounding boxes still work."""
ref_text = ("full_match", "image", "[[100, 200, 300, 400], [500, 600, 700, 800]]")
result = parse_coordinates(ref_text, 1000, 1000)
assert result is not None, "Multiple boxes should parse successfully"
assert len(result['boxes']) == 2
print("PASS: Multiple bounding boxes parse correctly")
def test_rce_blocked_import_os():
"""The original exploit: __import__('os').system('...') must be blocked."""
malicious = "__import__('os').system('echo HACKED')"
ref_text = ("full_match", "exploit", malicious)
result = parse_coordinates(ref_text, 1000, 1000)
assert result is None, "Code execution payload should be rejected"
print("PASS: __import__('os').system() payload is blocked")
def test_rce_blocked_exec():
"""exec() based payloads must be blocked."""
malicious = "exec('import os; os.system(\"echo HACKED\")')"
ref_text = ("full_match", "exploit", malicious)
result = parse_coordinates(ref_text, 1000, 1000)
assert result is None, "exec() payload should be rejected"
print("PASS: exec() payload is blocked")
def test_rce_blocked_eval():
"""Nested eval() payloads must be blocked."""
malicious = "eval('__import__(\"os\").popen(\"id\").read()')"
ref_text = ("full_match", "exploit", malicious)
result = parse_coordinates(ref_text, 1000, 1000)
assert result is None, "Nested eval() payload should be rejected"
print("PASS: Nested eval() payload is blocked")
def test_rce_blocked_lambda():
"""Lambda-based payloads must be blocked."""
malicious = "(lambda: __import__('os').system('echo HACKED'))()"
ref_text = ("full_match", "exploit", malicious)
result = parse_coordinates(ref_text, 1000, 1000)
assert result is None, "Lambda payload should be rejected"
print("PASS: Lambda payload is blocked")
def test_rce_blocked_comprehension():
"""List comprehension code execution must be blocked."""
malicious = "[__import__('os').system('echo HACKED') for x in [1]]"
ref_text = ("full_match", "exploit", malicious)
result = parse_coordinates(ref_text, 1000, 1000)
assert result is None, "List comprehension payload should be rejected"
print("PASS: List comprehension payload is blocked")
if __name__ == "__main__":
print("=" * 60)
print("Security Regression Tests (OX Security RCE disclosure)")
print("=" * 60)
print()
tests = [
test_legitimate_coordinates,
test_multiple_boxes,
test_rce_blocked_import_os,
test_rce_blocked_exec,
test_rce_blocked_eval,
test_rce_blocked_lambda,
test_rce_blocked_comprehension,
]
passed = 0
failed = 0
for test in tests:
try:
test()
passed += 1
except AssertionError as e:
print(f"FAIL: {test.__name__}: {e}")
failed += 1
except Exception as e:
print(f"ERROR: {test.__name__}: {e}")
failed += 1
print()
print(f"Results: {passed} passed, {failed} failed out of {len(tests)} tests")
if failed == 0:
print("All security tests passed - RCE vulnerability is patched.")
else:
print("WARNING: Some tests failed!")