From 74809468a3f178d564fafc6460d5a354fe7fb5a9 Mon Sep 17 00:00:00 2001 From: costero-e Date: Tue, 12 Nov 2024 14:49:43 +0100 Subject: [PATCH] new index and making genome ref 38 by default --- beacon/connections/mongo/reindex.py | 8 ++-- .../connections/mongo/request_parameters.py | 42 +++++++++++++++++-- beacon/connections/mongo/utils.py | 12 ++---- beacon/request/parameters.py | 12 +++--- 4 files changed, 53 insertions(+), 21 deletions(-) diff --git a/beacon/connections/mongo/reindex.py b/beacon/connections/mongo/reindex.py index 4628952..a700e06 100644 --- a/beacon/connections/mongo/reindex.py +++ b/beacon/connections/mongo/reindex.py @@ -65,10 +65,12 @@ #client.beacon.genomicVariations.create_index([("caseLevelData.biosampleId", 1)]) #client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1)]) client.beacon.genomicVariations.create_index([("datasetId", 1)]) -client.beacon.genomicVariations.create_index([("variantInternalId", 1)]) +#client.beacon.genomicVariations.create_index([("variantInternalId", 1)]) +client.beacon.genomicVariations.create_index([("variation.location.interval.start.value", 1)]) +#client.beacon.genomicVariations.create_index([("variation.location.interval.start.value", 1), ("variation.location.interval.end.value", -1)]) #client.beacon.genomicVariations.create_index([("identifiers.genomicHGVSId", 1), ("variation.location.interval.start.value", 1), ("caseLevelData.biosampleId", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) -client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) -client.beacon.genomicVariations.create_index([("datasetId", 1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) +#client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) +#client.beacon.genomicVariations.create_index([("datasetId", 1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) client.beacon.genomicVariations.create_index([("molecularAttributes.geneIds", 1), ("variantInternalId", 1), ("variation.variantType", 1)]) #client.beacon.individuals.create_index([("$**", "text")]) #client.beacon.runs.create_index([("$**", "text")]) diff --git a/beacon/connections/mongo/request_parameters.py b/beacon/connections/mongo/request_parameters.py index 2d3cd91..90c9b3a 100644 --- a/beacon/connections/mongo/request_parameters.py +++ b/beacon/connections/mongo/request_parameters.py @@ -45,14 +45,36 @@ def generate_position_filter_start(self, key: str, value: List[int]) -> List[Alp return filters @log_with_args(level) -def generate_position_filter_end(self, key: str, value: List[int]) -> List[AlphanumericFilter]: +def generate_position_filter_start_equal(self, key: str, value: List[int]) -> List[AlphanumericFilter]: filters = [] if len(value) == 1: filters.append(AlphanumericFilter( id=VARIANTS_PROPERTY_MAP[key], value=value[0], + operator=Operator.EQUAL + )) + elif len(value) == 2:# pragma: no cover + filters.append(AlphanumericFilter( + id=VARIANTS_PROPERTY_MAP[key], + value=value[0], + operator=Operator.GREATER_EQUAL + )) + filters.append(AlphanumericFilter( + id=VARIANTS_PROPERTY_MAP[key], + value=value[1], operator=Operator.LESS_EQUAL )) + return filters + +@log_with_args(level) +def generate_position_filter_end(self, key: str, value: List[int]) -> List[AlphanumericFilter]: + filters = [] + if len(value) == 1: + filters.append(AlphanumericFilter( + id=VARIANTS_PROPERTY_MAP["start"], + value=value[0], + operator=Operator.LESS + )) elif len(value) == 2:# pragma: no cover filters.append(AlphanumericFilter( id=VARIANTS_PROPERTY_MAP[key], @@ -79,11 +101,18 @@ def apply_request_parameters(self, query: Dict[str, List[dict]], qparams: Reques subquery["$and"] = [] subqueryor={} subqueryor["$or"] = [] + equal=True + for k, v in reqparam.items(): + if k == 'end': + equal=False for k, v in reqparam.items(): if k == "start": if isinstance(v, str): v = v.split(',') - filters = generate_position_filter_start(self, k, v) + if equal == False: + filters = generate_position_filter_start(self, k, v) + else: + filters = generate_position_filter_start_equal(self, k, v) for filter in filters: subquery["$and"].append(apply_alphanumeric_filter({}, filter, collection, dataset)) elif k == "end": @@ -149,11 +178,18 @@ def apply_request_parameters(self, query: Dict[str, List[dict]], qparams: Reques subquery["$and"] = [] subqueryor={} subqueryor["$or"] = [] + equal=True + for k, v in qparams.query.request_parameters.items(): + if k == 'end': + equal=False for k, v in qparams.query.request_parameters.items(): if k == "start": if isinstance(v, str): v = v.split(',') - filters = generate_position_filter_start(self, k, v) + if equal == False: + filters = generate_position_filter_start(self, k, v) + else: + filters = generate_position_filter_start_equal(self, k, v) for filter in filters: query["$and"].append(apply_alphanumeric_filter(self, {}, filter, collection, dataset)) elif k == "end": diff --git a/beacon/connections/mongo/utils.py b/beacon/connections/mongo/utils.py index 69250db..a6da8b7 100644 --- a/beacon/connections/mongo/utils.py +++ b/beacon/connections/mongo/utils.py @@ -59,23 +59,16 @@ def get_count(self, collection: Collection, query: dict) -> int: try: counts=list(counts) if counts == []: - match_dict={} - match_dict['$match']=query - count_dict={} - aggregated_query=[] - count_dict["$count"]='Total' - aggregated_query.append(match_dict) - aggregated_query.append(count_dict) - total=list(collection.aggregate(aggregated_query)) + total_counts=collection.count_documents(query) insert_dict={} insert_dict['id']=str(query) - total_counts=total[0]['Total'] insert_dict['num_results']=total_counts# pragma: no cover insert_dict['collection']=str(collection)# pragma: no cover insert_total=client.beacon.counts.insert_one(insert_dict)# pragma: no cover else: total_counts=counts[0]["num_results"] except Exception as e:# pragma: no cover + LOG.debug(e) insert_dict={} insert_dict['id']=str(query) total_counts=0 @@ -121,6 +114,7 @@ def get_docs_by_response_type(self, include: str, query: dict, dataset: str, lim queryid={} queryid['datasetId']=dataset query_count["$or"].append(queryid) + LOG.debug(query_count) if query_count["$or"]!=[]: dataset_count = get_count(self, mongo_collection, query_count) if dataset_count == 0: diff --git a/beacon/request/parameters.py b/beacon/request/parameters.py index 35c66e1..a0c0b29 100644 --- a/beacon/request/parameters.py +++ b/beacon/request/parameters.py @@ -92,7 +92,7 @@ class SequenceQuery(BaseModel): referenceBases: str clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' class RangeQuery(BaseModel): referenceName: Union[str,int] @@ -105,7 +105,7 @@ class RangeQuery(BaseModel): variantMaxLength: Optional[int] =None clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' class DatasetsRequested(BaseModel): datasets: list @@ -117,7 +117,7 @@ class GeneIdQuery(BaseModel): aminoacidChange: Optional[str] =None variantMinLength: Optional[int] =None variantMaxLength: Optional[int] =None - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' class BracketQuery(BaseModel): referenceName: Union[str,int] @@ -126,7 +126,7 @@ class BracketQuery(BaseModel): variantType: Optional[str] =None clinicalRelevance: Optional[str] =None mateName: Optional[str] =None - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' @field_validator('start') @classmethod def start_must_be_array_of_integers(cls, v: list) -> list: @@ -146,12 +146,12 @@ def end_must_be_array_of_integers(cls, v: list) -> list: class GenomicAlleleQuery(BaseModel): genomicAlleleShortForm: str - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' class AminoacidChangeQuery(BaseModel): aminoacidChange: str geneId: str - assemblyId: Optional[str] =None + assemblyId: Optional[str] ='GRCh38' class RequestParams(CamelModel): meta: RequestMeta = RequestMeta()