Files
training-software/api/upload_validators.py
Paperclip CTO 8054c1e1e4 feat(TRA-233): Django M1 foundation scaffold
- Environment-split settings: base/local/test/prod with django-environ
- Postgres + Redis + Celery wiring (broker, beat, result backend)
- All 9 domain app stubs: accounts, courses, cms, tracking, quizzes,
  training, certificates, reports, notifications
- api app: /healthz/ endpoint, custom DRF exception handler,
  SecurityAuditMiddleware, permissions/throttle/upload-validation stubs
- DRF global baseline: JWT+session auth, closed-by-default permissions,
  cursor/page pagination, drf-spectacular schema generation
- Dockerfile (multi-env build arg), docker-compose.yml (local),
  docker-compose.test.yml (CI-friendly tmpfs Postgres)
- pytest.ini with smoke + settings marker definitions
- tests/test_smoke.py: startup, URL resolution, healthcheck shape
- tests/test_settings_matrix.py: per-profile security assertions
- .github/workflows/ci.yml: test, lint, schema CI jobs
- .env.example with all required vars documented
- .gitignore

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-05-07 09:11:23 +02:00

88 lines
2.7 KiB
Python

import os
from django.conf import settings
from django.core.exceptions import ValidationError
from django.utils.translation import gettext_lazy as _
# Maps file extension to (mime_type, list_of_(offset, magic_bytes) tuples)
_SIGNATURE_MAP: dict[str, list[tuple[int, bytes]]] = {
".pdf": [(0, b"%PDF")],
".jpg": [(0, b"\xFF\xD8\xFF")],
".jpeg": [(0, b"\xFF\xD8\xFF")],
".png": [(0, b"\x89PNG")],
".gif": [(0, b"GIF87a"), (0, b"GIF89a")],
# ZIP-based Office formats (DOCX, XLSX, PPTX)
".docx": [(0, b"PK\x03\x04")],
".xlsx": [(0, b"PK\x03\x04")],
".pptx": [(0, b"PK\x03\x04")],
# Legacy Office (compound document)
".doc": [(0, b"\xD0\xCF\x11\xE0")],
".xls": [(0, b"\xD0\xCF\x11\xE0")],
".ppt": [(0, b"\xD0\xCF\x11\xE0")],
# MP4 (ftyp box at offset 4)
".mp4": [(4, b"ftyp")],
".webm": [(0, b"\x1aE\xdf\xa3")],
}
# Extensions without signature checks (plain text formats)
_NO_SIGNATURE_EXTS = {".csv", ".txt", ".avi", ".mov"}
def _allowed_extensions() -> set[str]:
return {ext.lower() for ext in getattr(settings, "ALLOWED_UPLOAD_EXTENSIONS", [])}
def validate_file_extension(file) -> None:
ext = os.path.splitext(file.name)[1].lower()
if ext not in _allowed_extensions():
raise ValidationError(
_("File type '%(ext)s' is not permitted."),
params={"ext": ext},
code="invalid_extension",
)
def validate_file_size(file) -> None:
max_bytes = getattr(settings, "MAX_UPLOAD_SIZE_BYTES", 100 * 1024 * 1024)
if file.size > max_bytes:
raise ValidationError(
_("File size %(size_mb).1f MB exceeds the %(max_mb)d MB limit."),
params={
"size_mb": file.size / 1024 / 1024,
"max_mb": max_bytes // 1024 // 1024,
},
code="file_too_large",
)
def validate_file_signature(file) -> None:
"""Reject files whose magic bytes contradict the declared extension."""
ext = os.path.splitext(file.name)[1].lower()
if ext in _NO_SIGNATURE_EXTS:
return
signatures = _SIGNATURE_MAP.get(ext)
if not signatures:
return
file.seek(0)
header = file.read(16)
file.seek(0)
for offset, magic in signatures:
if header[offset : offset + len(magic)] == magic:
return
raise ValidationError(
_("File content does not match the declared file type '%(ext)s'."),
params={"ext": ext},
code="invalid_file_signature",
)
def validate_upload(file) -> None:
"""Run all upload validators in sequence (extension → size → signature)."""
validate_file_extension(file)
validate_file_size(file)
validate_file_signature(file)