From e52e43d12d205ca891963b336df965c488ed53a0 Mon Sep 17 00:00:00 2001 From: costero-e Date: Tue, 12 Nov 2024 18:57:31 +0100 Subject: [PATCH] adding assemblyId required if refName is chr --- beacon/connections/mongo/filters.py | 2 +- beacon/connections/mongo/reindex.py | 7 ++-- beacon/request/parameters.py | 57 +++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/beacon/connections/mongo/filters.py b/beacon/connections/mongo/filters.py index 6976c8f..fdbdf2d 100644 --- a/beacon/connections/mongo/filters.py +++ b/beacon/connections/mongo/filters.py @@ -812,7 +812,7 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col elif filter.value == 'Y': dict_regex['$regex']='^NC_0000'+'24' else: - dict_regex['$regex']='^NC_0000'+filter.value+'.'+'10:g'+'|'+'^NC_0000'+filter.value+'.'+'11:g'+'|'+'^NC_0000'+filter.value+'.'+'9:g' + dict_regex['$regex']='^NC_0000'+filter.value elif '>' in filter.value:# pragma: no cover dict_regex=filter.value elif '.' in filter.value:# pragma: no cover diff --git a/beacon/connections/mongo/reindex.py b/beacon/connections/mongo/reindex.py index a700e06..8ca8fc4 100644 --- a/beacon/connections/mongo/reindex.py +++ b/beacon/connections/mongo/reindex.py @@ -65,13 +65,12 @@ #client.beacon.genomicVariations.create_index([("caseLevelData.biosampleId", 1)]) #client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1)]) client.beacon.genomicVariations.create_index([("datasetId", 1)]) -#client.beacon.genomicVariations.create_index([("variantInternalId", 1)]) +client.beacon.genomicVariations.create_index([("variantInternalId", 1)]) client.beacon.genomicVariations.create_index([("variation.location.interval.start.value", 1)]) #client.beacon.genomicVariations.create_index([("variation.location.interval.start.value", 1), ("variation.location.interval.end.value", -1)]) -#client.beacon.genomicVariations.create_index([("identifiers.genomicHGVSId", 1), ("variation.location.interval.start.value", 1), ("caseLevelData.biosampleId", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) -#client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) +client.beacon.genomicVariations.create_index([("identifiers.genomicHGVSId", 1)]) #client.beacon.genomicVariations.create_index([("datasetId", 1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) -client.beacon.genomicVariations.create_index([("molecularAttributes.geneIds", 1), ("variantInternalId", 1), ("variation.variantType", 1)]) +client.beacon.genomicVariations.create_index([("molecularAttributes.geneIds", 1), ("variation.variantType", 1)]) #client.beacon.individuals.create_index([("$**", "text")]) #client.beacon.runs.create_index([("$**", "text")]) #collection_name = client.beacon.analyses diff --git a/beacon/request/parameters.py b/beacon/request/parameters.py index a0c0b29..d159402 100644 --- a/beacon/request/parameters.py +++ b/beacon/request/parameters.py @@ -4,7 +4,8 @@ ValidationError, field_validator, Field, - PrivateAttr + PrivateAttr, + model_validator ) from strenum import StrEnum from typing import List, Optional, Union @@ -92,7 +93,22 @@ class SequenceQuery(BaseModel): referenceBases: str clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] ='GRCh38' + assemblyId: Optional[str] =None + @model_validator(mode='after') + @classmethod + def referenceName_must_have_assemblyId_if_not_HGVSId(cls, values): + if values.referenceName in ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','X','Y','MT',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]: + try: + if values.assemblyId == None: + err = 'if referenceName is just the chromosome: assemblyId parameter is required' + errcode=400 + raise_exception(err, errcode) + else: + pass + except Exception as e: + raise ValueError + else: + raise ValueError class RangeQuery(BaseModel): referenceName: Union[str,int] @@ -105,7 +121,22 @@ class RangeQuery(BaseModel): variantMaxLength: Optional[int] =None clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] ='GRCh38' + assemblyId: Optional[str] =None + @model_validator(mode='after') + @classmethod + def referenceName_must_have_assemblyId_if_not_HGVSId_2(cls, values): + if values.referenceName in ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','X','Y','MT',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]: + try: + if values.assemblyId == None: + err = 'if referenceName is just the chromosome: assemblyId parameter is required' + errcode=400 + raise_exception(err, errcode) + else: + pass + except Exception as e: + raise ValueError + else: + raise ValueError class DatasetsRequested(BaseModel): datasets: list @@ -117,7 +148,6 @@ class GeneIdQuery(BaseModel): aminoacidChange: Optional[str] =None variantMinLength: Optional[int] =None variantMaxLength: Optional[int] =None - assemblyId: Optional[str] ='GRCh38' class BracketQuery(BaseModel): referenceName: Union[str,int] @@ -126,7 +156,7 @@ class BracketQuery(BaseModel): variantType: Optional[str] =None clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] ='GRCh38' + assemblyId: Optional[str] =None @field_validator('start') @classmethod def start_must_be_array_of_integers(cls, v: list) -> list: @@ -143,15 +173,28 @@ def end_must_be_array_of_integers(cls, v: list) -> list: pass else: raise ValueError + @model_validator(mode='after') + @classmethod + def referenceName_must_have_assemblyId_if_not_HGVSId_3(cls, values): + if values.referenceName in ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','X','Y','MT',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]: + try: + if values.assemblyId == None: + err = 'if referenceName is just the chromosome: assemblyId parameter is required' + errcode=400 + raise_exception(err, errcode) + else: + pass + except Exception as e: + raise ValueError + else: + raise ValueError class GenomicAlleleQuery(BaseModel): genomicAlleleShortForm: str - assemblyId: Optional[str] ='GRCh38' class AminoacidChangeQuery(BaseModel): aminoacidChange: str geneId: str - assemblyId: Optional[str] ='GRCh38' class RequestParams(CamelModel): meta: RequestMeta = RequestMeta()