Skip to content

Commit

Permalink
Merge pull request #22 from EGA-archive/zygosity
Browse files Browse the repository at this point in the history
Adding zygosity queries
  • Loading branch information
costero-e authored Dec 20, 2024
2 parents 3fa2651 + 8d26daf commit 164671f
Show file tree
Hide file tree
Showing 19 changed files with 351,826 additions and 26 deletions.
14 changes: 13 additions & 1 deletion beacon/connections/mongo/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,19 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
query_cl={ position: "y", "datasetId": dataset}
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
Expand Down
14 changes: 13 additions & 1 deletion beacon/connections/mongo/biosamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,19 @@ def get_variants_of_biosample(self, entry_id: Optional[str], qparams: RequestPar
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
query_cl={ position: "y", "datasetId": dataset}
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
Expand Down
2 changes: 1 addition & 1 deletion beacon/connections/mongo/data/caseLevelData.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion beacon/connections/mongo/data/genomicVariations.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion beacon/connections/mongo/data/targets.json

Large diffs are not rendered by default.

96 changes: 84 additions & 12 deletions beacon/connections/mongo/g_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,27 @@ def get_biosamples_of_variant(self, entry_id: Optional[str], qparams: RequestPar
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
finalids=biosampleIds
try:
finalids=[]
Expand Down Expand Up @@ -138,9 +156,27 @@ def get_runs_of_variant(self, entry_id: Optional[str], qparams: RequestParams, d
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalids=[]
for bioid in biosampleIds:
Expand Down Expand Up @@ -188,9 +224,27 @@ def get_analyses_of_variant(self, entry_id: Optional[str], qparams: RequestParam
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalids=[]
for bioid in biosampleIds:
Expand Down Expand Up @@ -238,9 +292,27 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
biosampleIds.append(list_of_targets[int(key)])
qparams.query.filters.remove(filter)
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
else:
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalquery={}
finalquery["$or"]=[]
Expand Down
14 changes: 13 additions & 1 deletion beacon/connections/mongo/individuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,19 @@ def get_variants_of_individual(self, entry_id: Optional[str], qparams: RequestPa
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
query_cl={ position: "y", "datasetId": dataset}
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
Expand Down
14 changes: 13 additions & 1 deletion beacon/connections/mongo/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,19 @@ def get_variants_of_run(self, entry_id: Optional[str], qparams: RequestParams, d
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
query_cl={ position: "y", "datasetId": dataset}
filters=qparams.query.filters
if filters != []:
for filter in filters:
if filter['id']=='GENO:GENO_0000458':
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
elif filter['id']=='GENO:GENO_0000136':
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
qparams.query.filters.remove(filter)
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
else:
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ services:
volumes:
- ./beacon/connections/mongo/mongo-init/:/docker-entrypoint-initdb.d/:ro
- ./beacon/connections/mongo/data/:/data
- ./beacon/connections/mongo/data/db:/data/db
- ./beacon/connections/mongo/data/configdb:/data/configdb
- ./beacon/connections/mongo/data/caseLevelData:/data/caseLevelData
networks:
- pub
#command: --verbose
Expand Down
3 changes: 2 additions & 1 deletion ri-tools/conf/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

#### VCF Conversion config parameters ####
allele_frequency=1 # introduce float number, leave 1 if you want to convert all the variants
reference_genome='GRCh37' # Choose one between NCBI36, GRCh37, GRCh38
reference_genome='GRCh38' # Choose one between NCBI36, GRCh37, GRCh38
datasetId='CINECA_synthetic_cohort_EUROPE_UK1'
case_level_data=True
num_rows=7000000
population='Finnish'
zygosity=True

### MongoDB parameters ###
database_host = 'mongo'
Expand Down
30,051 changes: 30,050 additions & 1 deletion ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/analyses.json

Large diffs are not rendered by default.

90,147 changes: 90,146 additions & 1 deletion ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/biosamples.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

117 changes: 116 additions & 1 deletion ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/cohorts.json
Original file line number Diff line number Diff line change
@@ -1 +1,116 @@
[{"cohortType": "study-defined", "collectionEvents": [{"eventDiseases": {"availability": true, "availabilityCount": 1705, "distribution": {"diseases": {"acutebronchitis": 121, "agranulocytosis": 111, "asthma": 134, "bipolaraffectivedisorder": 134, "cardiomyopathy": 133, "dentalcaries": 139, "eatingdisorders": 134, "fibrosisandcirrhosisofliver": 132, "gastro-oesophagealrefluxdisease": 140, "haemorrhoids": 127, "influenzaduetocertainidentifiedinfluenzavirus": 135, "insulin-dependentdiabetesmellitus": 165, "irondeficiencyanaemia": 142, "multiplesclerosis": 125, "obesity": 136, "sarcoidosis": 136, "schizophrenia": 138, "thyroiditis": 141, "varicoseveinsoflowerextremities": 139}}}, "eventEthnicities": {"availability": true, "availabilityCount": 2287, "distribution": {"ethnicities": {"African": 119, "AnyotherAsianbackground": 120, "AnyotherBlackbackground": 104, "Anyothermixedbackground": 92, "Anyotherwhitebackground": 114, "AsianorAsianBritish": 125, "Bangladeshi": 96, "BlackorBlackBritish": 131, "British": 114, "Caribbean": 127, "Chinese": 100, "Indian": 110, "Irish": 111, "Mixed": 127, "Otherethnicgroup": 116, "Pakistani": 115, "White": 105, "WhiteandAsian": 114, "WhiteandBlackAfrican": 115, "WhiteandBlackCaribbean": 132}}}, "eventGenders": {"availability": true, "availabilityCount": 1597, "distribution": {"genders": {"female": 1271, "male": 1233}}}}], "id": "CINECA_synthetic_cohort_UK1", "inclusionCriteria": {"ageRange": {"end": {"iso8601duration": "P65Y"}, "start": {"iso8601duration": "P18Y"}}, "genders": [{"id": "NCIT:C16576", "label": "female"}, {"id": "NCIT:C20197", "label": "male"}], "locations": [{"id": "GAZ:00150372", "label": "UK"}]}, "name": "CINECA synthetic cohort UK1"}]
[
{
"datasetId": "CINECA_synthetic_cohort_EUROPE_UK1",
"cohortType": "study-defined",
"collectionEvents": [
{
"eventDiseases": {
"availability": true,
"availabilityCount": 1705,
"distribution": {
"diseases": {
"acutebronchitis": 121,
"agranulocytosis": 111,
"asthma": 134,
"bipolaraffectivedisorder": 134,
"cardiomyopathy": 133,
"dentalcaries": 139,
"eatingdisorders": 134,
"fibrosisandcirrhosisofliver": 132,
"gastro-oesophagealrefluxdisease": 140,
"haemorrhoids": 127,
"influenzaduetocertainidentifiedinfluenzavirus": 135,
"insulin-dependentdiabetesmellitus": 165,
"irondeficiencyanaemia": 142,
"multiplesclerosis": 125,
"obesity": 136,
"sarcoidosis": 136,
"schizophrenia": 138,
"thyroiditis": 141,
"varicoseveinsoflowerextremities": 139
}
}
},
"eventEthnicities": {
"availability": true,
"availabilityCount": 2287,
"distribution": {
"ethnicities": {
"African": 119,
"AnyotherAsianbackground": 120,
"AnyotherBlackbackground": 104,
"Anyothermixedbackground": 92,
"Anyotherwhitebackground": 114,
"AsianorAsianBritish": 125,
"Bangladeshi": 96,
"BlackorBlackBritish": 131,
"British": 114,
"Caribbean": 127,
"Chinese": 100,
"Indian": 110,
"Irish": 111,
"Mixed": 127,
"Otherethnicgroup": 116,
"Pakistani": 115,
"White": 105,
"WhiteandAsian": 114,
"WhiteandBlackAfrican": 115,
"WhiteandBlackCaribbean": 132
}
}
},
"eventGenders": {
"availability": true,
"availabilityCount": 1597,
"distribution": {
"genders": {
"female": 1271,
"male": 1233
}
}
},
"eventLocations": {
"availability": true,
"availabilityCount": 1597,
"distribution": {
"locations": {
"England": 322,
"NorthernIreland": 317,
"RepublicofIreland": 311,
"Scotland": 308,
"Wales": 339
}
}
}
}
],
"id": "CINECA_synthetic_cohort_UK1",
"inclusionCriteria": {
"ageRange": {
"end": {
"iso8601duration": "P65Y"
},
"start": {
"iso8601duration": "P18Y"
}
},
"genders": [
{
"id": "NCIT:C16576",
"label": "female"
},
{
"id": "NCIT:C20197",
"label": "male"
}
],
"locations": [
{
"id": "GAZ:00150372",
"label": "UK"
}
]
},
"name": "CINECA synthetic cohort UK1"
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
{
"type": "alphanumeric",
"id": "libraryStrategy",
"scopes": [
"run"
]
},
{
"type": "alphanumeric",
"id": "molecularAttributes.geneIds",
"scopes": [
"genomicVariation"
]
},
{
"type": "alphanumeric",
"id": "diseases.ageOfOnset.iso8601duration",
"scopes": [
"individual"
]
}
]

Large diffs are not rendered by default.

183,767 changes: 183,766 additions & 1 deletion ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/individuals.json

Large diffs are not rendered by default.

47,579 changes: 47,578 additions & 1 deletion ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/runs.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

0 comments on commit 164671f

Please sign in to comment.