|
| 1 | +"""Enhanced spectrum submission form with client-side processing and multi-spectrum support.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import json |
| 6 | +from typing import TYPE_CHECKING, TypedDict |
| 7 | + |
| 8 | +from django import forms |
| 9 | +from django.apps import apps |
| 10 | +from django.db import transaction |
| 11 | +from django.utils.text import slugify |
| 12 | + |
| 13 | +from proteins.extrest.entrez import is_valid_doi |
| 14 | +from proteins.models import Dye, DyeState, FluorState, Spectrum, State |
| 15 | +from references.models import Reference |
| 16 | + |
| 17 | +if TYPE_CHECKING: |
| 18 | + from django.contrib.auth.models import User |
| 19 | + |
| 20 | + |
| 21 | +class SpectrumJSONData(TypedDict): |
| 22 | + """Type definition for spectrum JSON data from frontend. |
| 23 | +
|
| 24 | + Must match the SpectrumJSON typedef in form-controller.js. |
| 25 | +
|
| 26 | + All fields are always present in the dict. Fields marked with | None |
| 27 | + can have null values when not applicable (e.g., ph/solvent for non-bio |
| 28 | + categories, scale_factor when not provided, peak_wave when not found). |
| 29 | + """ |
| 30 | + |
| 31 | + # Required string/list fields (never None) |
| 32 | + data: list[list[float]] |
| 33 | + category: str |
| 34 | + owner: str |
| 35 | + subtype: str |
| 36 | + column_name: str |
| 37 | + |
| 38 | + # Always present but can be None |
| 39 | + owner_slug: str | None # Protein slug for autocomplete categories |
| 40 | + scale_factor: float | None |
| 41 | + ph: float | None |
| 42 | + solvent: str | None |
| 43 | + peak_wave: int | None |
| 44 | + |
| 45 | + |
| 46 | +MAX_SPECTRA_PER_SUBMISSION = 20 |
| 47 | +MAX_DATA_POINTS_PER_SPECTRUM = 2000 |
| 48 | + |
| 49 | + |
| 50 | +def _validate_spectrum_json(raw: str | bytes) -> list[SpectrumJSONData]: |
| 51 | + if not raw or raw == "[]": |
| 52 | + raise forms.ValidationError("No spectrum data provided.") |
| 53 | + |
| 54 | + try: |
| 55 | + spectra = json.loads(raw) |
| 56 | + except json.JSONDecodeError as e: |
| 57 | + raise forms.ValidationError(f"Invalid JSON: {e}") from e |
| 58 | + |
| 59 | + if not isinstance(spectra, list) or len(spectra) == 0: |
| 60 | + raise forms.ValidationError("Expected a non-empty array of spectra.") |
| 61 | + |
| 62 | + if len(spectra) > MAX_SPECTRA_PER_SUBMISSION: |
| 63 | + raise forms.ValidationError( |
| 64 | + f"Too many spectra ({len(spectra)}). " |
| 65 | + f"Maximum {MAX_SPECTRA_PER_SUBMISSION} per submission." |
| 66 | + ) |
| 67 | + |
| 68 | + valid_subtypes = dict(Spectrum.SUBTYPE_CHOICES) |
| 69 | + valid_categories = dict(Spectrum.CATEGORIES) |
| 70 | + |
| 71 | + for i, spec in enumerate(spectra): |
| 72 | + if not isinstance(spec, dict): |
| 73 | + raise forms.ValidationError(f"Spectrum {i + 1} is not a valid object.") |
| 74 | + |
| 75 | + # Validate data |
| 76 | + if "data" not in spec: |
| 77 | + raise forms.ValidationError(f"Spectrum {i + 1} is missing 'data' field.") |
| 78 | + |
| 79 | + data = spec["data"] |
| 80 | + if not isinstance(data, list) or len(data) < 2: |
| 81 | + raise forms.ValidationError(f"Spectrum {i + 1} must have at least 2 data points.") |
| 82 | + |
| 83 | + if len(data) > MAX_DATA_POINTS_PER_SPECTRUM: |
| 84 | + raise forms.ValidationError( |
| 85 | + f"Spectrum {i + 1} has too many data points ({len(data)}). " |
| 86 | + f"Maximum {MAX_DATA_POINTS_PER_SPECTRUM}." |
| 87 | + ) |
| 88 | + |
| 89 | + for j, point in enumerate(data): |
| 90 | + if not isinstance(point, list) or len(point) != 2: |
| 91 | + raise forms.ValidationError( |
| 92 | + f"Spectrum {i + 1}, point {j + 1}: must be [wavelength, value]." |
| 93 | + ) |
| 94 | + if not all(isinstance(v, (int, float)) for v in point): |
| 95 | + raise forms.ValidationError( |
| 96 | + f"Spectrum {i + 1}, point {j + 1}: values must be numbers." |
| 97 | + ) |
| 98 | + |
| 99 | + # Validate category |
| 100 | + if "category" not in spec or not spec["category"]: |
| 101 | + raise forms.ValidationError(f"Spectrum {i + 1} is missing category.") |
| 102 | + if spec["category"] not in valid_categories: |
| 103 | + raise forms.ValidationError( |
| 104 | + f"Spectrum {i + 1} has invalid category: {spec['category']}" |
| 105 | + ) |
| 106 | + |
| 107 | + # Validate subtype |
| 108 | + if "subtype" not in spec or not spec["subtype"]: |
| 109 | + raise forms.ValidationError(f"Spectrum {i + 1} is missing subtype.") |
| 110 | + if spec["subtype"] not in valid_subtypes: |
| 111 | + raise forms.ValidationError(f"Spectrum {i + 1} has invalid subtype: {spec['subtype']}") |
| 112 | + |
| 113 | + # Validate owner |
| 114 | + if "owner" not in spec or not spec.get("owner", "").strip(): |
| 115 | + raise forms.ValidationError(f"Spectrum {i + 1} is missing owner.") |
| 116 | + |
| 117 | + # Check for duplicate spectra within this submission |
| 118 | + # Use (category, owner, subtype) as the unique key |
| 119 | + seen = {} |
| 120 | + for i, spec in enumerate(spectra): |
| 121 | + key = (spec["category"], spec["owner"].strip().lower(), spec["subtype"]) |
| 122 | + if key in seen: |
| 123 | + first_idx = seen[key] |
| 124 | + raise forms.ValidationError( |
| 125 | + f"Duplicate spectrum detected: Spectra {first_idx + 1} and {i + 1} have the same " |
| 126 | + f"owner ({spec['owner']}), category, and subtype ({spec['subtype']})." |
| 127 | + ) |
| 128 | + seen[key] = i |
| 129 | + |
| 130 | + return spectra |
| 131 | + |
| 132 | + |
| 133 | +class SpectrumFormV2(forms.Form): |
| 134 | + """Enhanced spectrum submission form supporting multi-spectrum file uploads. |
| 135 | +
|
| 136 | + This form handles client-side processing of spectrum data. The JavaScript frontend |
| 137 | + parses CSV/TSV files, allows column selection, normalizes data, and sends processed |
| 138 | + spectra as JSON with per-spectrum metadata (category, owner, subtype, etc.). |
| 139 | + """ |
| 140 | + |
| 141 | + # Lookup for non-protein, non-dye categories (filter/camera/light) |
| 142 | + OWNER_LOOKUP = { |
| 143 | + Spectrum.FILTER: ("owner_filter", "Filter"), |
| 144 | + Spectrum.CAMERA: ("owner_camera", "Camera"), |
| 145 | + Spectrum.LIGHT: ("owner_light", "Light"), |
| 146 | + } |
| 147 | + |
| 148 | + # Hidden field containing JSON array of processed spectra from JavaScript |
| 149 | + # Structure: [{ "data": [[wave, value]...], "category": "p", "owner": "EGFP", |
| 150 | + # "subtype": "ex", "peak_wave": 488, ... }, ...] |
| 151 | + spectra_json = forms.CharField( |
| 152 | + widget=forms.HiddenInput(), |
| 153 | + required=True, |
| 154 | + error_messages={"required": "Please upload a file and configure your spectra."}, |
| 155 | + ) |
| 156 | + |
| 157 | + # File upload field (for initial parsing by JavaScript - not required on POST) |
| 158 | + file = forms.FileField( |
| 159 | + required=False, |
| 160 | + label="Spectrum File", |
| 161 | + help_text="Upload CSV or TSV file.", |
| 162 | + ) |
| 163 | + |
| 164 | + # Shared source fields |
| 165 | + source = forms.CharField( |
| 166 | + max_length=200, |
| 167 | + required=False, |
| 168 | + label="Source", |
| 169 | + help_text="Citation or source of the data", |
| 170 | + ) |
| 171 | + |
| 172 | + primary_reference = forms.CharField( |
| 173 | + max_length=200, |
| 174 | + required=False, |
| 175 | + label="Primary Reference (DOI)", |
| 176 | + help_text="Enter a valid DOI (e.g., 10.1234/example)", |
| 177 | + ) |
| 178 | + |
| 179 | + # Confirmation checkbox |
| 180 | + confirmation = forms.BooleanField( |
| 181 | + required=True, |
| 182 | + label="I confirm the validity of this data", |
| 183 | + ) |
| 184 | + |
| 185 | + def __init__(self, *args, **kwargs): |
| 186 | + self.user: User | None = kwargs.pop("user", None) |
| 187 | + super().__init__(*args, **kwargs) |
| 188 | + |
| 189 | + def clean_spectra_json(self) -> list[SpectrumJSONData]: |
| 190 | + """Parse and validate the JSON array of processed spectra.""" |
| 191 | + raw = self.cleaned_data.get("spectra_json", "") |
| 192 | + return _validate_spectrum_json(raw) |
| 193 | + |
| 194 | + def clean_primary_reference(self) -> str: |
| 195 | + """Validate that the DOI is resolvable if provided.""" |
| 196 | + doi = self.cleaned_data.get("primary_reference", "").strip() |
| 197 | + if doi and not is_valid_doi(doi): |
| 198 | + raise forms.ValidationError( |
| 199 | + f"Could not find a reference for DOI: {doi}. Please check that it is correct." |
| 200 | + ) |
| 201 | + |
| 202 | + return doi |
| 203 | + |
| 204 | + def clean(self): |
| 205 | + """Validate that at least one of source or primary_reference is provided.""" |
| 206 | + cleaned_data = super().clean() |
| 207 | + source = cleaned_data.get("source", "").strip() |
| 208 | + reference = cleaned_data.get("primary_reference", "").strip() |
| 209 | + |
| 210 | + # Check if user attempted to provide a reference (even if it failed validation) |
| 211 | + # by looking at the raw data, not just cleaned_data |
| 212 | + attempted_reference = self.data.get("primary_reference", "").strip() |
| 213 | + |
| 214 | + if not source and not reference and not attempted_reference: |
| 215 | + raise forms.ValidationError( |
| 216 | + "Please provide at least one of Source or Primary Reference." |
| 217 | + ) |
| 218 | + |
| 219 | + return cleaned_data |
| 220 | + |
| 221 | + def _get_or_create_owner(self, category: str, owner_name: str, owner_slug: str | None = None): |
| 222 | + """Get or create owner objects based on category. |
| 223 | +
|
| 224 | + Args: |
| 225 | + category: The spectrum category (protein, dye, filter, etc.) |
| 226 | + owner_name: Display name of the owner |
| 227 | + owner_slug: For proteins, this is the Protein.slug from Select2 autocomplete |
| 228 | +
|
| 229 | + Returns: |
| 230 | + Tuple of (owner_fluor, owner_filter, owner_camera, owner_light) |
| 231 | + """ |
| 232 | + owner_fluor = owner_filter = owner_camera = owner_light = None |
| 233 | + |
| 234 | + if category == Spectrum.PROTEIN: |
| 235 | + # For proteins, owner_slug is the Protein.slug from Select2 autocomplete |
| 236 | + if not owner_slug: |
| 237 | + raise forms.ValidationError( |
| 238 | + f"Protein '{owner_name}' must be selected from the autocomplete dropdown." |
| 239 | + ) |
| 240 | + try: |
| 241 | + owner_fluor = State.objects.select_related("protein").get(protein__slug=owner_slug) |
| 242 | + except State.DoesNotExist: |
| 243 | + raise forms.ValidationError(f"Protein not found: {owner_name}") from None |
| 244 | + except State.MultipleObjectsReturned: |
| 245 | + # Get the default state |
| 246 | + owner_fluor = ( |
| 247 | + State.objects.select_related("protein") |
| 248 | + .filter(protein__slug=owner_slug) |
| 249 | + .first() |
| 250 | + ) |
| 251 | + |
| 252 | + elif category == Spectrum.DYE: |
| 253 | + dye, created = Dye.objects.get_or_create( |
| 254 | + slug=slugify(owner_name), |
| 255 | + defaults={"name": owner_name, "created_by": self.user}, |
| 256 | + ) |
| 257 | + if not created and self.user: |
| 258 | + dye.updated_by = self.user |
| 259 | + dye.save() |
| 260 | + |
| 261 | + dye_state, _ = DyeState.objects.get_or_create( |
| 262 | + dye=dye, |
| 263 | + name=FluorState.DEFAULT_NAME, |
| 264 | + defaults={"created_by": self.user}, |
| 265 | + ) |
| 266 | + owner_fluor = dye_state |
| 267 | + |
| 268 | + elif category in self.OWNER_LOOKUP: |
| 269 | + model_name = self.OWNER_LOOKUP[category][1] |
| 270 | + owner_model = apps.get_model("proteins", model_name) |
| 271 | + owner_obj, created = owner_model.objects.get_or_create( |
| 272 | + name=owner_name, |
| 273 | + defaults={"created_by": self.user}, |
| 274 | + ) |
| 275 | + if not created and self.user: |
| 276 | + owner_obj.updated_by = self.user |
| 277 | + owner_obj.save() |
| 278 | + |
| 279 | + if category == Spectrum.FILTER: |
| 280 | + owner_filter = owner_obj |
| 281 | + elif category == Spectrum.CAMERA: |
| 282 | + owner_camera = owner_obj |
| 283 | + elif category == Spectrum.LIGHT: |
| 284 | + owner_light = owner_obj |
| 285 | + |
| 286 | + return owner_fluor, owner_filter, owner_camera, owner_light |
| 287 | + |
| 288 | + @transaction.atomic |
| 289 | + def save(self) -> list[Spectrum]: |
| 290 | + """Create Spectrum objects for each processed spectrum. |
| 291 | +
|
| 292 | + Returns: |
| 293 | + List of created Spectrum objects. |
| 294 | + """ |
| 295 | + spectra_data = self.cleaned_data["spectra_json"] |
| 296 | + source = self.cleaned_data.get("source", "") |
| 297 | + |
| 298 | + # Convert DOI string to Reference instance if provided |
| 299 | + reference_doi = self.cleaned_data.get("primary_reference", "").strip() |
| 300 | + reference = None |
| 301 | + if reference_doi: |
| 302 | + reference, _ = Reference.objects.get_or_create(doi=reference_doi) |
| 303 | + |
| 304 | + created_spectra = [] |
| 305 | + |
| 306 | + for spec_data in spectra_data: |
| 307 | + category = spec_data["category"] |
| 308 | + owner_name = spec_data["owner"] |
| 309 | + owner_slug = spec_data.get("owner_slug") |
| 310 | + |
| 311 | + owner_fluor, owner_filter, owner_camera, owner_light = self._get_or_create_owner( |
| 312 | + category, owner_name, owner_slug |
| 313 | + ) |
| 314 | + |
| 315 | + spectrum = Spectrum( |
| 316 | + category=category, |
| 317 | + subtype=spec_data["subtype"], |
| 318 | + owner_fluor=owner_fluor, |
| 319 | + owner_filter=owner_filter, |
| 320 | + owner_camera=owner_camera, |
| 321 | + owner_light=owner_light, |
| 322 | + ph=spec_data.get("ph"), |
| 323 | + solvent=spec_data.get("solvent") or "", |
| 324 | + source=source, |
| 325 | + reference=reference, |
| 326 | + created_by=self.user, |
| 327 | + status=Spectrum.STATUS.approved |
| 328 | + if self.user and self.user.is_staff |
| 329 | + else Spectrum.STATUS.pending, |
| 330 | + ) |
| 331 | + |
| 332 | + # Set data (handles normalization) |
| 333 | + spectrum.data = spec_data["data"] |
| 334 | + |
| 335 | + # Override computed values if provided |
| 336 | + if spec_data.get("peak_wave"): |
| 337 | + spectrum.peak_wave = spec_data["peak_wave"] |
| 338 | + if spec_data.get("scale_factor"): |
| 339 | + spectrum.scale_factor = spec_data["scale_factor"] |
| 340 | + |
| 341 | + spectrum.full_clean() |
| 342 | + spectrum.save() |
| 343 | + created_spectra.append(spectrum) |
| 344 | + |
| 345 | + return created_spectra |
0 commit comments