Skip to content

Commit 96778de

Browse files
committed
enhancements to the different pubchem rest tools
1 parent 603e64f commit 96778de

7 files changed

+125
-127
lines changed

rest_tool/readfile.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
#!/usr/bin/env python
22

33
import io
4-
import urllib2, urllib, httplib
5-
def getListFromFile(file):
4+
import urllib2
5+
import urllib
6+
import httplib
7+
8+
def getListFromFile( infile ):
69
idlist=[]
7-
for line in file:
8-
if int(line):
9-
idlist.append(line.strip())
10+
for line in infile:
11+
line = line.strip()
12+
if line.isdigit():
13+
idlist.append( line )
1014
return idlist
1115

16+
def getListString( args ):
17+
if args.id_type_ff == "file":
18+
list_string = ",".join( getListFromFile(open(args.id_value, "r")) )
19+
else:
20+
list_string = args.id_value.strip().replace("__cr____cn__", ",")
21+
return list_string
22+
1223
def getresult(url):
1324
try:
1425
connection = urllib2.urlopen(url)
@@ -17,17 +28,8 @@ def getresult(url):
1728
else:
1829
return connection.read().rstrip()
1930

20-
def getListString(args):
21-
if args.id_type_ff == "file":
22-
#build comma list
23-
list_string=",".join(getListFromFile(open(args.id_value,"r")))
24-
else:
25-
print (args.id_value)
26-
list_string=args.id_value.strip().replace("__cr____cn__", ",")
27-
return list_string
28-
2931
def store_result_get(url, outfile):
30-
data=getresult(url)
32+
data = getresult(url)
3133
outfile.write(data)
3234
outfile.close()
3335

rest_tool/rest_tool.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import sys, os
44
import argparse
5-
65
import readfile
76

87
txt_output=["cids", "aids", "sids", "synonyms" ]
@@ -33,7 +32,7 @@ def main(args):
3332
url+="xml"
3433
if args.operation in check_for_id_type and not args.id_type is None:
3534
url+="?"+args.operation+"_type="+args.id_type
36-
print(url)
35+
print('The constructed REST URL is: %s' % url)
3736
data=readfile.getresult(url)
3837
outfile=args.outfile
3938
outfile.write(data)

rest_tool/rest_tool.xml

+20-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
<tool id="rest_tool" name="Fetch Data from pubchem" version="0.1.0">
2-
<description>Fetch pubchem data</description>
1+
<tool id="pubchem_rest_tool" name="Fetch Data from PubChem" version="0.1.0">
2+
<description>(Compounds, Substances and Assay)</description>
33
<version_command>echo "0.1.0"</version_command>
44
<macros>
55
<import>rest_tool_macros.xml</import>
@@ -122,7 +122,6 @@
122122
#end if
123123
#end if
124124
</command>
125-
126125
<inputs>
127126
<conditional name="choose_action">
128127
<param name="action" multiple="false" type="select" label="Choose Use Case">
@@ -132,21 +131,20 @@
132131
<option value="advanced" selected="True">Advanced</option>
133132
</param>
134133
<when value="search">
135-
<param name="input_type_search" multiple="false" type="select" label="Search for">
136-
<expand macro="acs_options" />
137-
</param>
138-
<param name="input_type" multiple="false" type="select" label="Search By">
139-
<option value="name">Name</option>
140-
</param>
141-
<param format="txt" name="search_name" type="text" label="Enter the Name" />
134+
<param name="input_type_search" multiple="false" type="select" label="Search for">
135+
<expand macro="acs_options" />
136+
</param>
137+
<param name="input_type" multiple="false" type="select" label="Search By">
138+
<option value="name">Name</option>
139+
</param>
140+
<param format="txt" name="search_name" type="text" label="Enter the Name" />
142141
</when>
143142
<when value="specific_data">
144143
<expand macro="field_or_file" />
145144
<conditional name="choose_acs">
146145
<param name="input_type" multiple="false" type="select" label="Input Type">
147146
<expand macro="acs_options" />
148147
</param>
149-
150148
<when value="assay">
151149
<expand macro="conditional_operation_assay" />
152150
</when>
@@ -172,7 +170,6 @@
172170
<when value="activity">
173171
<param format="txt" name="activity" type="text" label="Enter the activity" />
174172
</when>
175-
176173
<when value="target">
177174
<param name="target_identifier_type" multiple="false" type="select" label="Choose target identifier">
178175
<option value="gi">GI</option>
@@ -261,18 +258,24 @@
261258
</test>
262259
</tests>
263260
<help>
261+
264262
**What it does**
265263

266-
This tool fetches data from pubchem. Pubchem offers a very mighty search tool called rest interface. See https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html.
264+
This tool fetches data from pubchem via the PubChem REST API.
265+
More information are available under:
267266

268-
and https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST_Tutorial.html
267+
- https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html.
268+
- https://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST_Tutorial.html
269269

270270
This tool simplifies the use of this interface by offering the user almost every possibility in every step.
271271

272-
The input consists mainly of the following: Input IDs Operation Options
272+
The input consists mainly of the following inpouts:
273+
274+
- Input IDs
275+
- Operation
276+
- Options
273277

274-
The tool has three simple use cases and one more complex use case (choose 'advanced')
275278

276-
The outputformat will be, if possible tabular separated files. If not possible, you will get txt or xml files.
279+
The outputformat will be, if possible tabular separated files. If not possible, you will get txt or xml files for further processing.
277280
</help>
278281
</tool>

rest_tool/rest_tool_advanced.py

+44-40
Original file line numberDiff line numberDiff line change
@@ -5,69 +5,78 @@
55

66
import readfile
77
import rest_tool_functions
8-
#dictionary for the output format
98

10-
dict_output={"cids" :"xml", "aids" : "xml", "sids" : "xml", "description": "xml", "summary" : "xml", "record" : "csv", "classification": "xml", "targets" : "txt", "xrefs" : "txt", "synonyms" : "txt", "property": "csv", "doseresponse" : "csv" }
11-
12-
#alles andere ist xml
9+
#dictionary for the allowed output formats
10+
dict_output={"cids" :"xml",
11+
"aids" : "xml",
12+
"sids" : "xml",
13+
"description": "xml",
14+
"summary" : "xml",
15+
"record" : "csv",
16+
"classification": "xml",
17+
"targets" : "txt",
18+
"xrefs" : "txt",
19+
"synonyms" : "txt",
20+
"property": "csv",
21+
"doseresponse" : "csv"
22+
}
1323
check_for_id_type=["cids", "aids", "sids"]
1424

1525
dic_key_value_type={"assay": "AID", "compound" : "CID", "substance": "SID" }
1626
dic_key_value_operation={"aids": "AID", "cids" : "CID", "sids": "SID" }
1727

1828
post_id_types=["inchi", "sdf", "smiles"]
19-
2029
id_dict={"compound": "cid", "assay": "aid", "substance" : "sid" }
2130

22-
23-
def main(args):
24-
url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/"+args.type+"/"
25-
url+=args.id_type+"/"
26-
# check if we are post then skip this part otherwise put the ids in the url
31+
def main( args ):
32+
url = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/%s/%s/" % (args.type, args.id_type)
33+
# check if we are POST then skip this part otherwise insert the ids into the url
2734
if not args.id_type in post_id_types:
28-
if args.id_type ==id_dict[args.type]:
29-
url+=readfile.getListString(args)+"/"
35+
if args.id_type == id_dict[args.type]:
36+
url += readfile.getListString(args) + "/"
3037
else:
31-
url+=args.id_value+"/"
38+
url += args.id_value + "/"
39+
40+
url += args.operation + "/"
41+
if args.operation in ["target", "property", "xrefs"]:
42+
url += args.operation_value + "/"
3243

33-
url+=args.operation+"/"
34-
if args.operation == "target" or args.operation == "property" or args.operation == "xrefs":
35-
url+=args.operation_value+"/"
36-
37-
create_dict_tsv=False
44+
create_dict_tsv = False
3845
if args.operation == "xrefs":
3946
if "," in args.operation_value:
40-
url+="xml"
47+
url += "xml"
4148
else:
42-
url+="txt"
49+
url += "txt"
4350
else:
4451
if args.operation in check_for_id_type:
4552
# dont create dictionary if they are the same
4653
if dic_key_value_type[args.type] == dic_key_value_operation[args.operation]:
47-
url+="txt"
54+
url += "txt"
4855
else:
49-
url+="xml"
50-
create_dict_tsv=True
56+
url += "xml"
57+
create_dict_tsv = True
5158
else:
52-
url+=dict_output[args.operation]
59+
url += dict_output[args.operation]
5360
if args.operation in check_for_id_type and args.id_type not in post_id_types:
54-
url+="?%s_type=%s" % (args.operation, args.ids_operation_type)
55-
print(url)
61+
url += "?%s_type=%s" % (args.operation, args.ids_operation_type)
62+
print('The constructed REST URL is: %s' % url)
5663

5764
if args.id_type in post_id_types:
58-
postfile=open(args.id_value,"r")
59-
post_value=postfile.read()
60-
post_dict={args.id_type : post_value}
61-
print(post_dict)
65+
postfile = open(args.id_value,"r")
66+
post_value = postfile.read()
67+
post_dict = {args.id_type : post_value}
68+
#print(post_dict)
6269
readfile.store_result_post(url, post_dict, args.outfile)
63-
# check if have to create a tsv file
70+
# check if we have to create a tsv file
6471
elif create_dict_tsv == True:
65-
key=dic_key_value_type[args.type]
66-
value=dic_key_value_operation[args.operation]
67-
dic=rest_tool_functions.get_dict_key_value(url, key, value)
68-
rest_tool_functions.write_to_sf(dic, args.outfile, "\t")
72+
key = dic_key_value_type[args.type]
73+
value = dic_key_value_operation[args.operation]
74+
dic = rest_tool_functions.get_dict_key_value( url, key, value )
75+
rest_tool_functions.write_to_sf( dic, args.outfile, "\t" )
6976
else:
7077
readfile.store_result_get(url, args.outfile)
78+
79+
7180
if __name__ == "__main__":
7281
parser = argparse.ArgumentParser()
7382
parser.add_argument('--type', type=str, required=True,
@@ -95,10 +104,5 @@ def main(args):
95104
parser.add_argument('--outfile', type=argparse.FileType('w'), required=True,
96105
help="Specify the output file")
97106

98-
99-
if len(sys.argv) < 8:
100-
print "Too few arguments..."
101-
parser.print_help()
102-
exit(1)
103107
args = parser.parse_args()
104108
main( args )

rest_tool/rest_tool_assays_with_cids_given_target.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,15 @@
55
import argparse
66
import readfile
77

8-
#get every aid as a list
9-
#returns a dictionary with aid as key and as value the list of cids
10-
def getAllAssayIDs():
11-
url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/type/all/aids/TXT"
12-
data=readfile.getresult(url)
13-
aidlist=readfile.getListFromString(data)
14-
return aidlist
15-
16-
178
def getIDofLine(line):
189
arr=line.split(">")
1910
if len(arr) > 1:
20-
aid=arr[1].split("<")[0]
11+
aid = arr[1].split("<")[0]
2112
return aid
2213
else:
2314
return "-1"
24-
15+
16+
2517
#get xml of all aids with cids for an activity
2618
def getAllCidsForAssayActivity(activity):
2719
url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/activity/"+activity+"/aids/txt?list_return=listkey"

rest_tool/rest_tool_comp_for_assay.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,29 @@
66
import readfile
77
import rest_tool_functions
88

9-
10-
#get the cids for bioassay aid
9+
"""
10+
Get all DICs from belonging to bioassay IDs (AIDs)
11+
"""
1112

12-
1313
def main(args):
1414
if args.aid_file is None:
15-
aidlist=args.aid.split(",")
15+
aidlist = args.aid.split(",")
1616
else:
17-
aidlist=readfile.getListFromFile(args.aid_file)
18-
aidliststring=",".join(aidlist)
19-
url="http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aidliststring+"/cids/xml"
20-
print(url)
21-
dic=rest_tool_functions.get_dict_key_value(url, "AID", "CID")
17+
aidlist = readfile.getListFromFile(args.aid_file)
18+
aidliststring = ",".join(aidlist)
19+
url = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/assay/aid/"+aidliststring+"/cids/xml"
20+
print('The constructed REST URL is: %s' % url)
21+
dic = rest_tool_functions.get_dict_key_value(url, "AID", "CID")
2222
rest_tool_functions.write_to_sf(dic, args.outfile, "\t")
23-
23+
24+
2425
if __name__ == "__main__":
2526
parser = argparse.ArgumentParser()
2627
parser.add_argument('--aid', type=str, dest="aid", help="AIDs of the BioAssay")
2728
parser.add_argument('--aid-file', dest="aid_file", type=argparse.FileType('r'),
2829
help="Specify a file with a list of aids, one per line")
2930
parser.add_argument('--outfile', type=argparse.FileType('w'),
3031
help="Specify output file")
31-
if len(sys.argv) < 2:
32-
print "Too few arguments..."
33-
parser.print_help()
34-
exit(1)
32+
3533
args = parser.parse_args()
3634
main( args )

0 commit comments

Comments
 (0)