Skip to content

Commit 164671f

Browse files
authored
Merge pull request #22 from EGA-archive/zygosity
Adding zygosity queries
2 parents 3fa2651 + 8d26daf commit 164671f

File tree

19 files changed

+351826
-26
lines changed

19 files changed

+351826
-26
lines changed

beacon/connections/mongo/analyses.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,19 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara
7171
schema = DefaultSchemas.GENOMICVARIATIONS
7272
return schema, 0, -1, None, dataset
7373
position=str(position)
74-
query_cl={ position: "y", "datasetId": dataset}
74+
filters=qparams.query.filters
75+
if filters != []:
76+
for filter in filters:
77+
if filter['id']=='GENO:GENO_0000458':
78+
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
79+
qparams.query.filters.remove(filter)
80+
elif filter['id']=='GENO:GENO_0000136':
81+
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
82+
qparams.query.filters.remove(filter)
83+
else:
84+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
85+
else:
86+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
7587
string_of_ids = client.beacon.caseLevelData \
7688
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
7789
HGVSIds=list(string_of_ids)

beacon/connections/mongo/biosamples.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,19 @@ def get_variants_of_biosample(self, entry_id: Optional[str], qparams: RequestPar
6666
schema = DefaultSchemas.GENOMICVARIATIONS
6767
return schema, 0, -1, None, dataset
6868
position=str(position)
69-
query_cl={ position: "y", "datasetId": dataset}
69+
filters=qparams.query.filters
70+
if filters != []:
71+
for filter in filters:
72+
if filter['id']=='GENO:GENO_0000458':
73+
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
74+
qparams.query.filters.remove(filter)
75+
elif filter['id']=='GENO:GENO_0000136':
76+
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
77+
qparams.query.filters.remove(filter)
78+
else:
79+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
80+
else:
81+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
7082
string_of_ids = client.beacon.caseLevelData \
7183
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
7284
HGVSIds=list(string_of_ids)

beacon/connections/mongo/data/caseLevelData.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

beacon/connections/mongo/data/genomicVariations.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

beacon/connections/mongo/data/targets.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

beacon/connections/mongo/g_variants.py

Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,27 @@ def get_biosamples_of_variant(self, entry_id: Optional[str], qparams: RequestPar
8787
list_of_targets=targets[0]["biosampleIds"]
8888
list_of_positions_strings= string_of_ids[0]
8989
biosampleIds=[]
90-
for key, value in list_of_positions_strings.items():
91-
if key != 'datasetId' and key != 'id' and key != '_id':
92-
biosampleIds.append(list_of_targets[int(key)])
90+
filters=qparams.query.filters
91+
if filters != []:
92+
for filter in filters:
93+
if filter['id']=='GENO:GENO_0000458':
94+
for key, value in list_of_positions_strings.items():
95+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
96+
biosampleIds.append(list_of_targets[int(key)])
97+
qparams.query.filters.remove(filter)
98+
elif filter['id']=='GENO:GENO_0000136':
99+
for key, value in list_of_positions_strings.items():
100+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
101+
biosampleIds.append(list_of_targets[int(key)])
102+
qparams.query.filters.remove(filter)
103+
else:
104+
for key, value in list_of_positions_strings.items():
105+
if key != 'datasetId' and key != 'id' and key != '_id':
106+
biosampleIds.append(list_of_targets[int(key)])
107+
else:
108+
for key, value in list_of_positions_strings.items():
109+
if key != 'datasetId' and key != 'id' and key != '_id':
110+
biosampleIds.append(list_of_targets[int(key)])
93111
finalids=biosampleIds
94112
try:
95113
finalids=[]
@@ -138,9 +156,27 @@ def get_runs_of_variant(self, entry_id: Optional[str], qparams: RequestParams, d
138156
list_of_targets=targets[0]["biosampleIds"]
139157
list_of_positions_strings= string_of_ids[0]
140158
biosampleIds=[]
141-
for key, value in list_of_positions_strings.items():
142-
if key != 'datasetId' and key != 'id' and key != '_id':
143-
biosampleIds.append(list_of_targets[int(key)])
159+
filters=qparams.query.filters
160+
if filters != []:
161+
for filter in filters:
162+
if filter['id']=='GENO:GENO_0000458':
163+
for key, value in list_of_positions_strings.items():
164+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
165+
biosampleIds.append(list_of_targets[int(key)])
166+
qparams.query.filters.remove(filter)
167+
elif filter['id']=='GENO:GENO_0000136':
168+
for key, value in list_of_positions_strings.items():
169+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
170+
biosampleIds.append(list_of_targets[int(key)])
171+
qparams.query.filters.remove(filter)
172+
else:
173+
for key, value in list_of_positions_strings.items():
174+
if key != 'datasetId' and key != 'id' and key != '_id':
175+
biosampleIds.append(list_of_targets[int(key)])
176+
else:
177+
for key, value in list_of_positions_strings.items():
178+
if key != 'datasetId' and key != 'id' and key != '_id':
179+
biosampleIds.append(list_of_targets[int(key)])
144180
try:
145181
finalids=[]
146182
for bioid in biosampleIds:
@@ -188,9 +224,27 @@ def get_analyses_of_variant(self, entry_id: Optional[str], qparams: RequestParam
188224
list_of_targets=targets[0]["biosampleIds"]
189225
list_of_positions_strings= string_of_ids[0]
190226
biosampleIds=[]
191-
for key, value in list_of_positions_strings.items():
192-
if key != 'datasetId' and key != 'id' and key != '_id':
193-
biosampleIds.append(list_of_targets[int(key)])
227+
filters=qparams.query.filters
228+
if filters != []:
229+
for filter in filters:
230+
if filter['id']=='GENO:GENO_0000458':
231+
for key, value in list_of_positions_strings.items():
232+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
233+
biosampleIds.append(list_of_targets[int(key)])
234+
qparams.query.filters.remove(filter)
235+
elif filter['id']=='GENO:GENO_0000136':
236+
for key, value in list_of_positions_strings.items():
237+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
238+
biosampleIds.append(list_of_targets[int(key)])
239+
qparams.query.filters.remove(filter)
240+
else:
241+
for key, value in list_of_positions_strings.items():
242+
if key != 'datasetId' and key != 'id' and key != '_id':
243+
biosampleIds.append(list_of_targets[int(key)])
244+
else:
245+
for key, value in list_of_positions_strings.items():
246+
if key != 'datasetId' and key != 'id' and key != '_id':
247+
biosampleIds.append(list_of_targets[int(key)])
194248
try:
195249
finalids=[]
196250
for bioid in biosampleIds:
@@ -238,9 +292,27 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa
238292
list_of_targets=targets[0]["biosampleIds"]
239293
list_of_positions_strings= string_of_ids[0]
240294
biosampleIds=[]
241-
for key, value in list_of_positions_strings.items():
242-
if key != 'datasetId' and key != 'id' and key != '_id':
243-
biosampleIds.append(list_of_targets[int(key)])
295+
filters=qparams.query.filters
296+
if filters != []:
297+
for filter in filters:
298+
if filter['id']=='GENO:GENO_0000458':
299+
for key, value in list_of_positions_strings.items():
300+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '11':
301+
biosampleIds.append(list_of_targets[int(key)])
302+
qparams.query.filters.remove(filter)
303+
elif filter['id']=='GENO:GENO_0000136':
304+
for key, value in list_of_positions_strings.items():
305+
if key != 'datasetId' and key != 'id' and key != '_id' and value != '10' and value != '01':
306+
biosampleIds.append(list_of_targets[int(key)])
307+
qparams.query.filters.remove(filter)
308+
else:
309+
for key, value in list_of_positions_strings.items():
310+
if key != 'datasetId' and key != 'id' and key != '_id':
311+
biosampleIds.append(list_of_targets[int(key)])
312+
else:
313+
for key, value in list_of_positions_strings.items():
314+
if key != 'datasetId' and key != 'id' and key != '_id':
315+
biosampleIds.append(list_of_targets[int(key)])
244316
try:
245317
finalquery={}
246318
finalquery["$or"]=[]

beacon/connections/mongo/individuals.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,19 @@ def get_variants_of_individual(self, entry_id: Optional[str], qparams: RequestPa
6666
schema = DefaultSchemas.GENOMICVARIATIONS
6767
return schema, 0, -1, None, dataset
6868
position=str(position)
69-
query_cl={ position: "y", "datasetId": dataset}
69+
filters=qparams.query.filters
70+
if filters != []:
71+
for filter in filters:
72+
if filter['id']=='GENO:GENO_0000458':
73+
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
74+
qparams.query.filters.remove(filter)
75+
elif filter['id']=='GENO:GENO_0000136':
76+
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
77+
qparams.query.filters.remove(filter)
78+
else:
79+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
80+
else:
81+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
7082
string_of_ids = client.beacon.caseLevelData \
7183
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
7284
HGVSIds=list(string_of_ids)

beacon/connections/mongo/runs.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,19 @@ def get_variants_of_run(self, entry_id: Optional[str], qparams: RequestParams, d
6767
schema = DefaultSchemas.GENOMICVARIATIONS
6868
return schema, 0, -1, None, dataset
6969
position=str(position)
70-
query_cl={ position: "y", "datasetId": dataset}
70+
filters=qparams.query.filters
71+
if filters != []:
72+
for filter in filters:
73+
if filter['id']=='GENO:GENO_0000458':
74+
query_cl={"$or": [{ position: "10", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
75+
qparams.query.filters.remove(filter)
76+
elif filter['id']=='GENO:GENO_0000136':
77+
query_cl={"$or": [{ position: "11", "datasetId": dataset}]}
78+
qparams.query.filters.remove(filter)
79+
else:
80+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
81+
else:
82+
query_cl={"$or": [{ position: "10", "datasetId": dataset},{ position: "11", "datasetId": dataset}, { position: "01", "datasetId": dataset}]}
7183
string_of_ids = client.beacon.caseLevelData \
7284
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
7385
HGVSIds=list(string_of_ids)

docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ services:
7979
volumes:
8080
- ./beacon/connections/mongo/mongo-init/:/docker-entrypoint-initdb.d/:ro
8181
- ./beacon/connections/mongo/data/:/data
82+
- ./beacon/connections/mongo/data/db:/data/db
83+
- ./beacon/connections/mongo/data/configdb:/data/configdb
84+
- ./beacon/connections/mongo/data/caseLevelData:/data/caseLevelData
8285
networks:
8386
- pub
8487
#command: --verbose

ri-tools/conf/conf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
#### VCF Conversion config parameters ####
66
allele_frequency=1 # introduce float number, leave 1 if you want to convert all the variants
7-
reference_genome='GRCh37' # Choose one between NCBI36, GRCh37, GRCh38
7+
reference_genome='GRCh38' # Choose one between NCBI36, GRCh37, GRCh38
88
datasetId='CINECA_synthetic_cohort_EUROPE_UK1'
99
case_level_data=True
1010
num_rows=7000000
1111
population='Finnish'
12+
zygosity=True
1213

1314
### MongoDB parameters ###
1415
database_host = 'mongo'

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/analyses.json

Lines changed: 30050 additions & 1 deletion
Large diffs are not rendered by default.

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/biosamples.json

Lines changed: 90146 additions & 1 deletion
Large diffs are not rendered by default.

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/caseLevelData.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,116 @@
1-
[{"cohortType": "study-defined", "collectionEvents": [{"eventDiseases": {"availability": true, "availabilityCount": 1705, "distribution": {"diseases": {"acutebronchitis": 121, "agranulocytosis": 111, "asthma": 134, "bipolaraffectivedisorder": 134, "cardiomyopathy": 133, "dentalcaries": 139, "eatingdisorders": 134, "fibrosisandcirrhosisofliver": 132, "gastro-oesophagealrefluxdisease": 140, "haemorrhoids": 127, "influenzaduetocertainidentifiedinfluenzavirus": 135, "insulin-dependentdiabetesmellitus": 165, "irondeficiencyanaemia": 142, "multiplesclerosis": 125, "obesity": 136, "sarcoidosis": 136, "schizophrenia": 138, "thyroiditis": 141, "varicoseveinsoflowerextremities": 139}}}, "eventEthnicities": {"availability": true, "availabilityCount": 2287, "distribution": {"ethnicities": {"African": 119, "AnyotherAsianbackground": 120, "AnyotherBlackbackground": 104, "Anyothermixedbackground": 92, "Anyotherwhitebackground": 114, "AsianorAsianBritish": 125, "Bangladeshi": 96, "BlackorBlackBritish": 131, "British": 114, "Caribbean": 127, "Chinese": 100, "Indian": 110, "Irish": 111, "Mixed": 127, "Otherethnicgroup": 116, "Pakistani": 115, "White": 105, "WhiteandAsian": 114, "WhiteandBlackAfrican": 115, "WhiteandBlackCaribbean": 132}}}, "eventGenders": {"availability": true, "availabilityCount": 1597, "distribution": {"genders": {"female": 1271, "male": 1233}}}}], "id": "CINECA_synthetic_cohort_UK1", "inclusionCriteria": {"ageRange": {"end": {"iso8601duration": "P65Y"}, "start": {"iso8601duration": "P18Y"}}, "genders": [{"id": "NCIT:C16576", "label": "female"}, {"id": "NCIT:C20197", "label": "male"}], "locations": [{"id": "GAZ:00150372", "label": "UK"}]}, "name": "CINECA synthetic cohort UK1"}]
1+
[
2+
{
3+
"datasetId": "CINECA_synthetic_cohort_EUROPE_UK1",
4+
"cohortType": "study-defined",
5+
"collectionEvents": [
6+
{
7+
"eventDiseases": {
8+
"availability": true,
9+
"availabilityCount": 1705,
10+
"distribution": {
11+
"diseases": {
12+
"acutebronchitis": 121,
13+
"agranulocytosis": 111,
14+
"asthma": 134,
15+
"bipolaraffectivedisorder": 134,
16+
"cardiomyopathy": 133,
17+
"dentalcaries": 139,
18+
"eatingdisorders": 134,
19+
"fibrosisandcirrhosisofliver": 132,
20+
"gastro-oesophagealrefluxdisease": 140,
21+
"haemorrhoids": 127,
22+
"influenzaduetocertainidentifiedinfluenzavirus": 135,
23+
"insulin-dependentdiabetesmellitus": 165,
24+
"irondeficiencyanaemia": 142,
25+
"multiplesclerosis": 125,
26+
"obesity": 136,
27+
"sarcoidosis": 136,
28+
"schizophrenia": 138,
29+
"thyroiditis": 141,
30+
"varicoseveinsoflowerextremities": 139
31+
}
32+
}
33+
},
34+
"eventEthnicities": {
35+
"availability": true,
36+
"availabilityCount": 2287,
37+
"distribution": {
38+
"ethnicities": {
39+
"African": 119,
40+
"AnyotherAsianbackground": 120,
41+
"AnyotherBlackbackground": 104,
42+
"Anyothermixedbackground": 92,
43+
"Anyotherwhitebackground": 114,
44+
"AsianorAsianBritish": 125,
45+
"Bangladeshi": 96,
46+
"BlackorBlackBritish": 131,
47+
"British": 114,
48+
"Caribbean": 127,
49+
"Chinese": 100,
50+
"Indian": 110,
51+
"Irish": 111,
52+
"Mixed": 127,
53+
"Otherethnicgroup": 116,
54+
"Pakistani": 115,
55+
"White": 105,
56+
"WhiteandAsian": 114,
57+
"WhiteandBlackAfrican": 115,
58+
"WhiteandBlackCaribbean": 132
59+
}
60+
}
61+
},
62+
"eventGenders": {
63+
"availability": true,
64+
"availabilityCount": 1597,
65+
"distribution": {
66+
"genders": {
67+
"female": 1271,
68+
"male": 1233
69+
}
70+
}
71+
},
72+
"eventLocations": {
73+
"availability": true,
74+
"availabilityCount": 1597,
75+
"distribution": {
76+
"locations": {
77+
"England": 322,
78+
"NorthernIreland": 317,
79+
"RepublicofIreland": 311,
80+
"Scotland": 308,
81+
"Wales": 339
82+
}
83+
}
84+
}
85+
}
86+
],
87+
"id": "CINECA_synthetic_cohort_UK1",
88+
"inclusionCriteria": {
89+
"ageRange": {
90+
"end": {
91+
"iso8601duration": "P65Y"
92+
},
93+
"start": {
94+
"iso8601duration": "P18Y"
95+
}
96+
},
97+
"genders": [
98+
{
99+
"id": "NCIT:C16576",
100+
"label": "female"
101+
},
102+
{
103+
"id": "NCIT:C20197",
104+
"label": "male"
105+
}
106+
],
107+
"locations": [
108+
{
109+
"id": "GAZ:00150372",
110+
"label": "UK"
111+
}
112+
]
113+
},
114+
"name": "CINECA synthetic cohort UK1"
115+
}
116+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[
2+
{
3+
"type": "alphanumeric",
4+
"id": "libraryStrategy",
5+
"scopes": [
6+
"run"
7+
]
8+
},
9+
{
10+
"type": "alphanumeric",
11+
"id": "molecularAttributes.geneIds",
12+
"scopes": [
13+
"genomicVariation"
14+
]
15+
},
16+
{
17+
"type": "alphanumeric",
18+
"id": "diseases.ageOfOnset.iso8601duration",
19+
"scopes": [
20+
"individual"
21+
]
22+
}
23+
]

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/genomicVariations.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/individuals.json

Lines changed: 183766 additions & 1 deletion
Large diffs are not rendered by default.

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/runs.json

Lines changed: 47578 additions & 1 deletion
Large diffs are not rendered by default.

ri-tools/output_docs/CINECA_synthetic_cohort_EUROPE_UK1/targets.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)