-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_tapioca.py
165 lines (133 loc) · 6.42 KB
/
run_tapioca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import tapioca as tp
import os
import argparse
import datetime
import pandas as pd
# error messages
INVALID_FILETYPE_MSG = "\n Error: Invalid file format. %s must be a .csv file."
INVALID_PATH_MSG = "\n Error: Invalid file path/name. Path %s does not exist."
INVALID_CONTENT_MSG = "\n Error: Invalid data content/structure. %s must be reformatted to fit expected structure." \
" See example input file on the Tapioca GitHub (https://github.com/FunctionLab/tapioca)"
INVALID_NORM_CONTENT_MSG = "\n Error: Invalid data content/structure. %s must be reformatted to fit expected structure." \
" See example normlaized input file on the Tapioca GitHub" \
"(https://github.com/FunctionLab/tapioca)"
def validate_file(file_name, prenorm):
#validate file name and path.
if not valid_path(file_name):
print(INVALID_PATH_MSG % (file_name))
quit()
elif not valid_filetype(file_name):
print(INVALID_FILETYPE_MSG % (file_name))
quit()
elif not validate_file_structure(file_name,prenorm):
if prenorm:
print(INVALID_CONTENT_MSG % (file_name))
else:
print(INVALID_NORM_CONTENT_MSG % (file_name))
quit()
return
def valid_filetype(file_name):
# validate file type
return file_name.endswith('.csv')
def valid_path(path):
# validate file path
return os.path.exists(path)
def validate_file_structure(path,prenorm):
# validate file contents
table = pd.read_csv(path)
cols = list(table.columns)
pass_flag = True
if prenorm:
if 'accession' not in cols:
print('\n Error: condition label not found in input file. Make sure condition is typed exactly as seen here and'
'that the condition label is in the first row of the first column of the file')
pass_flag = False
if 'condition' not in cols:
print('\n Error: condition label not found in input file. Make sure condition is typed exactly as seen here and'
'that the condition label is in the first row of the second column of the file')
pass_flag = False
if 'replicate' not in cols:
print('\n Error: replicate label not found in input file. Make sure replicate is typed exactly as seen here and'
'that the replicate label is in the first row of the second column of the file')
pass_flag = False
for col in cols[3:]:
if not col.replace('.','').isnumeric():
print('Error: all curve data points (ex. temperatures) should be a number, '+str(col)+'is not a number.'
)
pass_flag = False
else:
if 'condition' not in cols:
print('\n Error: condition label not found in input file. Make sure condition is typed exactly as seen here and'
'that the condition label is in the first row of the first column of the file')
pass_flag = False
if list(table['condition'])[1] != 'replicate':
print('\n Error: replicate label not found in input file. Make sure replicate is typed exactly as seen here and'
'that the replicate label is in the third row of the first column of the file')
pass_flag = False
if list(table['condition'])[2] != 'accession':
print('\n Error: accession label not found in input file. Make sure accession is typed exactly as seen here and'
'that the accession label is in the fourth row of the first column')
pass_flag = False
return pass_flag
def main():
# create parser object
parser = argparse.ArgumentParser(description="A Command Line Interface For Running Tapioca")
# defining arguments for parser object
parser.add_argument("-i", "--input", type=str, nargs=1,
metavar="raw_file", default=None,
help="The path to the input csv file")
parser.add_argument("-o", "--output", type=str, nargs=1,
metavar="base_save_name", default=None,
help="The base save name for the prediction files")
parser.add_argument("-r", "--ref", type=int, nargs=1,
metavar="ref_channel", default=1,
help="0 to not perform Reference channel normalization. Default 1")
parser.add_argument("-p", "--prenorm", type=int, nargs=1,
metavar="pre_normalized", default=0,
help="Set 1 when inputting pre-normalized data. Default 0")
parser.add_argument("-c", "--cofrac", type=int, nargs=1,
metavar="co_fractionation", default=0,
help="Set 1 when inputting cofractionation data. Default 0")
parser.add_argument("-f", "--fullmodel", type=int, nargs=1,
metavar="full_model", default=0,
help="Set 0 to use only the base submodel. Default 1")
parser.add_argument("-t", "--tissue", type=str, nargs=1,
metavar="tissue", default=None,
help="The path to the tissue-specific functional network you would like to use.")
# parse the arguments from standard input
args = parser.parse_args()
args.input = args.input[0]
input_check = './raw_input/'+args.input
args.ref = bool(args.ref[0])
args.prenorm = bool(args.prenorm[0])
args.cofrac = bool(args.cofrac[0])
args.fullmodel = bool(args.fullmodel[0])
# Check that the input file exists
if args.input == None:
print('Error: No input file provided.')
quit()
# Validate the Input
validate_file(input_check, args.prenorm)
# If savename is None then set a default name based on the time
if args.savename == None:
current_datatime = str(datetime.datetime.now()).replace('-', '').replace(':', '') \
.split('.')[0].replace(' ', '')
args.savename = current_datatime
else:
args.savename = args.savename[0]
if args.tissue == None:
args.tissue = ''
else:
args.tissue = args.tissue[0]
tp.run_tapioca(
input_file=args.input,
ref_channel=args.ref,
pre_normalized=args.prenorm,
co_fractionation=args.cofrac,
tissue=args.tissue,
base_save_name=args.savename,
full_model=args.fullmodel
)
if __name__ == "__main__":
# calling the main function
main()