Skip to content

Commit ff36d59

Browse files
authored
Refactor Spectrum schema, and spectrum submit form (#396)
* Refactor Spectrum model for optimized data storage and migration; remove legacy data field and update related properties * stub spectrum Data * Add peak_wave field and compute its value during spectrum data migration * v2 spectrum form * Refactor SpectrumFormV2 validation logic and add end-to-end tests for form submission * clean code * duplicate checking * use udsv * Enhance spectrum processing logic to conditionally normalize data based on category selection and update chart y-axis configuration accordingly * Update status indicator emojis and enhance chart processing for dual axis support * better typing * Add duplicate spectrum validation in form processing and backend * cleanup * add violation error message * Refactor spectrum migration and model constraints for clarity and validation * cleanup * fix type * more tests * fix remove all * use new form by default * change links * Add DOI validation and enhance spectrum submission form - Implemented `is_valid_doi` function to check DOI existence via Crossref API. - Integrated DOI validation in `clean_primary_reference` method of SpectrumFormV2. - Updated spectrum form template to display form errors. - Improved CSV parser to validate column count and wavelength data. - Enhanced form controller to manage confirmation checkbox state and restore form state after validation errors. - Added tests for DOI validation and form submission scenarios. * refactor and add test * fix capitalization bug * preserve precision * feat: Enhance Spectrum Model and Form Functionality - Added a unique constraint to the Spectrum model to ensure unique (owner, subtype) combinations. - Updated the spectrum form template to improve UI with a smaller font size for the column picker. - Implemented category code retrieval for spectrum owners in the similar spectrum owners AJAX view. - Introduced new tests for peak snapping behavior in the spectrum form, ensuring correct peak selection and normalization. - Enhanced duplicate detection logic for proteins and dyes, disabling existing subtypes in the dropdown. - Improved the column picker JavaScript to adjust preview rows and table height. - Refactored duplicate checker logic to return existing subtypes and handle exact matches more effectively. - Updated form controller to manage owner slug and improve user experience with scale factor selection. * better parsing * final touches * add contact to erro * feat: Add unique constraint for spectrum owner and subtype; update URL for slug-based submission * fix: Update ignore patterns for WebKit and Firefox request cancellations in e2e tests
1 parent 588555f commit ff36d59

30 files changed

+4911
-312
lines changed

backend/proteins/extrest/entrez.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import time
99
from collections.abc import MutableMapping, Sequence
1010
from typing import TYPE_CHECKING, Literal, TypedDict, cast
11+
from urllib.parse import quote
1112

13+
import requests
1214
from Bio import Entrez, SeqIO
1315
from django.core.cache import cache
1416
from habanero import Crossref
@@ -109,6 +111,22 @@ def _merge_info(dict1: MutableMapping, dict2: MutableMapping, exclude=()) -> Mut
109111
return dict1
110112

111113

114+
def is_valid_doi(doi: str) -> bool:
115+
"""Check if DOI exists via Crossref API."""
116+
if not doi or len(doi) > 200:
117+
return False
118+
try:
119+
encoded_doi = quote(doi, safe="")
120+
resp = requests.head(
121+
f"https://api.crossref.org/works/{encoded_doi}",
122+
timeout=3,
123+
allow_redirects=True,
124+
)
125+
return resp.status_code == 200
126+
except requests.RequestException:
127+
return False
128+
129+
112130
def doi_lookup(doi: str) -> DoiInfo:
113131
info = _crossref(doi)
114132
pmid = _doi2pmid(doi)

backend/proteins/forms/spectrum.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ def clean(self):
134134
return cleaned_data
135135

136136
def save(self, commit=True):
137+
# Set spectrum data from form - data is now a property, not a model field,
138+
# so ModelForm won't automatically set it on the instance
139+
if self.cleaned_data.get("data"):
140+
self.instance.data = self.cleaned_data["data"]
141+
137142
cat = self.cleaned_data.get("category")
138143
if cat == Spectrum.DYE:
139144
# Dyes require special handling: create Dye first, then DyeState
Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
"""Enhanced spectrum submission form with client-side processing and multi-spectrum support."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from typing import TYPE_CHECKING, TypedDict
7+
8+
from django import forms
9+
from django.apps import apps
10+
from django.db import transaction
11+
from django.utils.text import slugify
12+
13+
from proteins.extrest.entrez import is_valid_doi
14+
from proteins.models import Dye, DyeState, FluorState, Spectrum, State
15+
from references.models import Reference
16+
17+
if TYPE_CHECKING:
18+
from django.contrib.auth.models import User
19+
20+
21+
class SpectrumJSONData(TypedDict):
22+
"""Type definition for spectrum JSON data from frontend.
23+
24+
Must match the SpectrumJSON typedef in form-controller.js.
25+
26+
All fields are always present in the dict. Fields marked with | None
27+
can have null values when not applicable (e.g., ph/solvent for non-bio
28+
categories, scale_factor when not provided, peak_wave when not found).
29+
"""
30+
31+
# Required string/list fields (never None)
32+
data: list[list[float]]
33+
category: str
34+
owner: str
35+
subtype: str
36+
column_name: str
37+
38+
# Always present but can be None
39+
owner_slug: str | None # Protein slug for autocomplete categories
40+
scale_factor: float | None
41+
ph: float | None
42+
solvent: str | None
43+
peak_wave: int | None
44+
45+
46+
MAX_SPECTRA_PER_SUBMISSION = 20
47+
MAX_DATA_POINTS_PER_SPECTRUM = 2000
48+
49+
50+
def _validate_spectrum_json(raw: str | bytes) -> list[SpectrumJSONData]:
51+
if not raw or raw == "[]":
52+
raise forms.ValidationError("No spectrum data provided.")
53+
54+
try:
55+
spectra = json.loads(raw)
56+
except json.JSONDecodeError as e:
57+
raise forms.ValidationError(f"Invalid JSON: {e}") from e
58+
59+
if not isinstance(spectra, list) or len(spectra) == 0:
60+
raise forms.ValidationError("Expected a non-empty array of spectra.")
61+
62+
if len(spectra) > MAX_SPECTRA_PER_SUBMISSION:
63+
raise forms.ValidationError(
64+
f"Too many spectra ({len(spectra)}). "
65+
f"Maximum {MAX_SPECTRA_PER_SUBMISSION} per submission."
66+
)
67+
68+
valid_subtypes = dict(Spectrum.SUBTYPE_CHOICES)
69+
valid_categories = dict(Spectrum.CATEGORIES)
70+
71+
for i, spec in enumerate(spectra):
72+
if not isinstance(spec, dict):
73+
raise forms.ValidationError(f"Spectrum {i + 1} is not a valid object.")
74+
75+
# Validate data
76+
if "data" not in spec:
77+
raise forms.ValidationError(f"Spectrum {i + 1} is missing 'data' field.")
78+
79+
data = spec["data"]
80+
if not isinstance(data, list) or len(data) < 2:
81+
raise forms.ValidationError(f"Spectrum {i + 1} must have at least 2 data points.")
82+
83+
if len(data) > MAX_DATA_POINTS_PER_SPECTRUM:
84+
raise forms.ValidationError(
85+
f"Spectrum {i + 1} has too many data points ({len(data)}). "
86+
f"Maximum {MAX_DATA_POINTS_PER_SPECTRUM}."
87+
)
88+
89+
for j, point in enumerate(data):
90+
if not isinstance(point, list) or len(point) != 2:
91+
raise forms.ValidationError(
92+
f"Spectrum {i + 1}, point {j + 1}: must be [wavelength, value]."
93+
)
94+
if not all(isinstance(v, (int, float)) for v in point):
95+
raise forms.ValidationError(
96+
f"Spectrum {i + 1}, point {j + 1}: values must be numbers."
97+
)
98+
99+
# Validate category
100+
if "category" not in spec or not spec["category"]:
101+
raise forms.ValidationError(f"Spectrum {i + 1} is missing category.")
102+
if spec["category"] not in valid_categories:
103+
raise forms.ValidationError(
104+
f"Spectrum {i + 1} has invalid category: {spec['category']}"
105+
)
106+
107+
# Validate subtype
108+
if "subtype" not in spec or not spec["subtype"]:
109+
raise forms.ValidationError(f"Spectrum {i + 1} is missing subtype.")
110+
if spec["subtype"] not in valid_subtypes:
111+
raise forms.ValidationError(f"Spectrum {i + 1} has invalid subtype: {spec['subtype']}")
112+
113+
# Validate owner
114+
if "owner" not in spec or not spec.get("owner", "").strip():
115+
raise forms.ValidationError(f"Spectrum {i + 1} is missing owner.")
116+
117+
# Check for duplicate spectra within this submission
118+
# Use (category, owner, subtype) as the unique key
119+
seen = {}
120+
for i, spec in enumerate(spectra):
121+
key = (spec["category"], spec["owner"].strip().lower(), spec["subtype"])
122+
if key in seen:
123+
first_idx = seen[key]
124+
raise forms.ValidationError(
125+
f"Duplicate spectrum detected: Spectra {first_idx + 1} and {i + 1} have the same "
126+
f"owner ({spec['owner']}), category, and subtype ({spec['subtype']})."
127+
)
128+
seen[key] = i
129+
130+
return spectra
131+
132+
133+
class SpectrumFormV2(forms.Form):
134+
"""Enhanced spectrum submission form supporting multi-spectrum file uploads.
135+
136+
This form handles client-side processing of spectrum data. The JavaScript frontend
137+
parses CSV/TSV files, allows column selection, normalizes data, and sends processed
138+
spectra as JSON with per-spectrum metadata (category, owner, subtype, etc.).
139+
"""
140+
141+
# Lookup for non-protein, non-dye categories (filter/camera/light)
142+
OWNER_LOOKUP = {
143+
Spectrum.FILTER: ("owner_filter", "Filter"),
144+
Spectrum.CAMERA: ("owner_camera", "Camera"),
145+
Spectrum.LIGHT: ("owner_light", "Light"),
146+
}
147+
148+
# Hidden field containing JSON array of processed spectra from JavaScript
149+
# Structure: [{ "data": [[wave, value]...], "category": "p", "owner": "EGFP",
150+
# "subtype": "ex", "peak_wave": 488, ... }, ...]
151+
spectra_json = forms.CharField(
152+
widget=forms.HiddenInput(),
153+
required=True,
154+
error_messages={"required": "Please upload a file and configure your spectra."},
155+
)
156+
157+
# File upload field (for initial parsing by JavaScript - not required on POST)
158+
file = forms.FileField(
159+
required=False,
160+
label="Spectrum File",
161+
help_text="Upload CSV or TSV file.",
162+
)
163+
164+
# Shared source fields
165+
source = forms.CharField(
166+
max_length=200,
167+
required=False,
168+
label="Source",
169+
help_text="Citation or source of the data",
170+
)
171+
172+
primary_reference = forms.CharField(
173+
max_length=200,
174+
required=False,
175+
label="Primary Reference (DOI)",
176+
help_text="Enter a valid DOI (e.g., 10.1234/example)",
177+
)
178+
179+
# Confirmation checkbox
180+
confirmation = forms.BooleanField(
181+
required=True,
182+
label="I confirm the validity of this data",
183+
)
184+
185+
def __init__(self, *args, **kwargs):
186+
self.user: User | None = kwargs.pop("user", None)
187+
super().__init__(*args, **kwargs)
188+
189+
def clean_spectra_json(self) -> list[SpectrumJSONData]:
190+
"""Parse and validate the JSON array of processed spectra."""
191+
raw = self.cleaned_data.get("spectra_json", "")
192+
return _validate_spectrum_json(raw)
193+
194+
def clean_primary_reference(self) -> str:
195+
"""Validate that the DOI is resolvable if provided."""
196+
doi = self.cleaned_data.get("primary_reference", "").strip()
197+
if doi and not is_valid_doi(doi):
198+
raise forms.ValidationError(
199+
f"Could not find a reference for DOI: {doi}. Please check that it is correct."
200+
)
201+
202+
return doi
203+
204+
def clean(self):
205+
"""Validate that at least one of source or primary_reference is provided."""
206+
cleaned_data = super().clean()
207+
source = cleaned_data.get("source", "").strip()
208+
reference = cleaned_data.get("primary_reference", "").strip()
209+
210+
# Check if user attempted to provide a reference (even if it failed validation)
211+
# by looking at the raw data, not just cleaned_data
212+
attempted_reference = self.data.get("primary_reference", "").strip()
213+
214+
if not source and not reference and not attempted_reference:
215+
raise forms.ValidationError(
216+
"Please provide at least one of Source or Primary Reference."
217+
)
218+
219+
return cleaned_data
220+
221+
def _get_or_create_owner(self, category: str, owner_name: str, owner_slug: str | None = None):
222+
"""Get or create owner objects based on category.
223+
224+
Args:
225+
category: The spectrum category (protein, dye, filter, etc.)
226+
owner_name: Display name of the owner
227+
owner_slug: For proteins, this is the Protein.slug from Select2 autocomplete
228+
229+
Returns:
230+
Tuple of (owner_fluor, owner_filter, owner_camera, owner_light)
231+
"""
232+
owner_fluor = owner_filter = owner_camera = owner_light = None
233+
234+
if category == Spectrum.PROTEIN:
235+
# For proteins, owner_slug is the Protein.slug from Select2 autocomplete
236+
if not owner_slug:
237+
raise forms.ValidationError(
238+
f"Protein '{owner_name}' must be selected from the autocomplete dropdown."
239+
)
240+
try:
241+
owner_fluor = State.objects.select_related("protein").get(protein__slug=owner_slug)
242+
except State.DoesNotExist:
243+
raise forms.ValidationError(f"Protein not found: {owner_name}") from None
244+
except State.MultipleObjectsReturned:
245+
# Get the default state
246+
owner_fluor = (
247+
State.objects.select_related("protein")
248+
.filter(protein__slug=owner_slug)
249+
.first()
250+
)
251+
252+
elif category == Spectrum.DYE:
253+
dye, created = Dye.objects.get_or_create(
254+
slug=slugify(owner_name),
255+
defaults={"name": owner_name, "created_by": self.user},
256+
)
257+
if not created and self.user:
258+
dye.updated_by = self.user
259+
dye.save()
260+
261+
dye_state, _ = DyeState.objects.get_or_create(
262+
dye=dye,
263+
name=FluorState.DEFAULT_NAME,
264+
defaults={"created_by": self.user},
265+
)
266+
owner_fluor = dye_state
267+
268+
elif category in self.OWNER_LOOKUP:
269+
model_name = self.OWNER_LOOKUP[category][1]
270+
owner_model = apps.get_model("proteins", model_name)
271+
owner_obj, created = owner_model.objects.get_or_create(
272+
name=owner_name,
273+
defaults={"created_by": self.user},
274+
)
275+
if not created and self.user:
276+
owner_obj.updated_by = self.user
277+
owner_obj.save()
278+
279+
if category == Spectrum.FILTER:
280+
owner_filter = owner_obj
281+
elif category == Spectrum.CAMERA:
282+
owner_camera = owner_obj
283+
elif category == Spectrum.LIGHT:
284+
owner_light = owner_obj
285+
286+
return owner_fluor, owner_filter, owner_camera, owner_light
287+
288+
@transaction.atomic
289+
def save(self) -> list[Spectrum]:
290+
"""Create Spectrum objects for each processed spectrum.
291+
292+
Returns:
293+
List of created Spectrum objects.
294+
"""
295+
spectra_data = self.cleaned_data["spectra_json"]
296+
source = self.cleaned_data.get("source", "")
297+
298+
# Convert DOI string to Reference instance if provided
299+
reference_doi = self.cleaned_data.get("primary_reference", "").strip()
300+
reference = None
301+
if reference_doi:
302+
reference, _ = Reference.objects.get_or_create(doi=reference_doi)
303+
304+
created_spectra = []
305+
306+
for spec_data in spectra_data:
307+
category = spec_data["category"]
308+
owner_name = spec_data["owner"]
309+
owner_slug = spec_data.get("owner_slug")
310+
311+
owner_fluor, owner_filter, owner_camera, owner_light = self._get_or_create_owner(
312+
category, owner_name, owner_slug
313+
)
314+
315+
spectrum = Spectrum(
316+
category=category,
317+
subtype=spec_data["subtype"],
318+
owner_fluor=owner_fluor,
319+
owner_filter=owner_filter,
320+
owner_camera=owner_camera,
321+
owner_light=owner_light,
322+
ph=spec_data.get("ph"),
323+
solvent=spec_data.get("solvent") or "",
324+
source=source,
325+
reference=reference,
326+
created_by=self.user,
327+
status=Spectrum.STATUS.approved
328+
if self.user and self.user.is_staff
329+
else Spectrum.STATUS.pending,
330+
)
331+
332+
# Set data (handles normalization)
333+
spectrum.data = spec_data["data"]
334+
335+
# Override computed values if provided
336+
if spec_data.get("peak_wave"):
337+
spectrum.peak_wave = spec_data["peak_wave"]
338+
if spec_data.get("scale_factor"):
339+
spectrum.scale_factor = spec_data["scale_factor"]
340+
341+
spectrum.full_clean()
342+
spectrum.save()
343+
created_spectra.append(spectrum)
344+
345+
return created_spectra

0 commit comments

Comments
 (0)