Skip to content

Commit a99c586

Browse files
authored
Merge pull request #4 from EMBL-EBI-TSI/testing
Merge testing into master for 0.2 release
2 parents dbdf45e + ee659e5 commit a99c586

File tree

9 files changed

+437
-235
lines changed

9 files changed

+437
-235
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
*.swp
2+
*.pyc

cloudbuild_testing.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
steps:
2+
- name: 'gcr.io/cloud-builders/docker'
3+
args: ['build', '-t', 'eu.gcr.io/tes-wes/taskmaster:testing', '-f', 'dockerfiles/taskmaster/Dockerfile', '.']
4+
- name: 'gcr.io/cloud-builders/docker'
5+
args: ['build', '-t', 'eu.gcr.io/tes-wes/filer:testing', '-f', 'dockerfiles/filer/Dockerfile', '.']
6+
images: ['eu.gcr.io/tes-wes/taskmaster:testing', 'eu.gcr.io/tes-wes/filer:testing']

dockerfiles/taskmaster/Dockerfile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@ FROM gliderlabs/alpine
33
RUN apk add --no-cache python py-pip curl openssl
44
RUN pip install kubernetes
55

6-
WORKDIR /root
6+
RUN adduser -S taskmaster
7+
8+
USER taskmaster
9+
10+
WORKDIR /home/taskmaster
711
COPY scripts/taskmaster.py .
12+
COPY scripts/job.py .
13+
COPY scripts/pvc.py .
14+
COPY scripts/filer_class.py .
815

9-
ENTRYPOINT ["/root/taskmaster.py"]
10-
#CMD /root/taskmaster.py
16+
ENTRYPOINT ["/home/taskmaster/taskmaster.py"]
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
{
2+
"outputs": [
3+
{
4+
"url": "ftp://ftp-private.ebi.ac.uk/upload/mouselist.txt",
5+
"path": "/tmp/vol1/mouselist.txt",
6+
"type": "FILE"
7+
},
8+
{
9+
"url": "ftp://ftp-private.ebi.ac.uk/upload/mouse_out",
10+
"path": "/tmp/vol2/mouse",
11+
"type": "DIRECTORY"
12+
}
13+
],
14+
"inputs": [
15+
{
16+
"url": "ftp://ftp-private.ebi.ac.uk/upload/input.txt",
17+
"path": "/tmp/vol1/in.txt",
18+
"type": "FILE"
19+
},
20+
{
21+
"url": "ftp://ftp-private.ebi.ac.uk/upload/mouse",
22+
"path": "/tmp/vol2/mouse",
23+
"type": "DIRECTORY"
24+
}
25+
],
26+
"volumes": [
27+
"/tmp/vol1",
28+
"/tmp/vol2"
29+
],
30+
"executors": [
31+
{
32+
"apiVersion": "batch/v1",
33+
"kind": "Job",
34+
"metadata": {
35+
"annotations": {
36+
"tes-task-name": "Hello Input"
37+
},
38+
"labels": {
39+
"job-type": "executor",
40+
"taskmaster-name": "task-7d5c53f4",
41+
"executor-no": "0"
42+
},
43+
"name": "task-7d5c53f4-ex-00"
44+
},
45+
"spec": {
46+
"template": {
47+
"metadata": {
48+
"name": "task-7d5c53f4-ex-00"
49+
},
50+
"spec": {
51+
"containers": [
52+
{
53+
"command": [
54+
"cat",
55+
"/tmp/vol1/in.txt"
56+
],
57+
"image": "alpine",
58+
"name": "task-7d5c53f4-ex-00",
59+
"resources": {}
60+
}
61+
],
62+
"restartPolicy": "Never"
63+
}
64+
}
65+
}
66+
},
67+
{
68+
"apiVersion": "batch/v1",
69+
"kind": "Job",
70+
"metadata": {
71+
"annotations": {
72+
"tes-task-name": "Hello Input"
73+
},
74+
"labels": {
75+
"job-type": "executor",
76+
"taskmaster-name": "task-7d5c53f4",
77+
"executor-no": "1"
78+
},
79+
"name": "task-7d5c53f4-ex-01"
80+
},
81+
"spec": {
82+
"template": {
83+
"metadata": {
84+
"name": "task-7d5c53f4-ex-01"
85+
},
86+
"spec": {
87+
"containers": [
88+
{
89+
"command": [
90+
"sh", "-c",
91+
"find /tmp/vol2 > /tmp/vol1/mouselist.txt"
92+
],
93+
"image": "alpine",
94+
"name": "task-7d5c53f4-ex-01",
95+
"resources": {}
96+
}
97+
],
98+
"restartPolicy": "Never"
99+
}
100+
}
101+
}
102+
}
103+
],
104+
"resources": {
105+
"disk_gb": 0.1
106+
}
107+
}

scripts/filer.py

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99
import re
1010
import os
1111
import distutils.dir_util
12-
import requests
12+
import time
13+
import logging
14+
import traceback
15+
1316

1417
debug = True
1518

1619
def download_ftp_file(source, target, ftp):
20+
logging.debug('downloading ftp file: '+source+' target: '+target)
1721
basedir = os.path.dirname(target)
1822
distutils.dir_util.mkpath(basedir)
1923

@@ -22,20 +26,36 @@ def download_ftp_file(source, target, ftp):
2226

2327
def process_upload_dir(source, target, ftp):
2428
basename = os.path.basename(source)
29+
logging.debug('processing upload dir src: '+source+' target: '+target)
30+
logging.debug('dir basename: '+basename)
31+
wd = ftp.pwd()
32+
# does the parent dir exist?
2533
try:
26-
print('trying to create dir: ' + '/'+target+'/'+basename, file=sys.stderr)
34+
ftp.cwd('/'+target)
35+
except:
36+
logging.error('Cannot stat parent dir: /'+target)
37+
return 1
38+
39+
ftp.cwd(wd)
40+
41+
try:
42+
logging.debug('trying to create dir: ' + '/'+target+'/'+basename)
2743
ftp.mkd('/'+target+'/'+basename)
2844
except ftplib.error_perm:
29-
print('Directory exists, overwriting')
45+
logging.debug('Directory exists, overwriting')
3046

3147
for f in os.listdir(source):
32-
if os.path.isdir(source+'/'+f):
33-
process_upload_dir(source+'/'+f, target+'/'+basename+'/', ftp)
34-
elif os.path.isfile(source+'/'+f):
35-
ftp.storbinary("STOR "+target+'/'+basename+'/'+f, open(source+'/'+f, 'r'))
48+
path = source+'/'+f
49+
if os.path.isdir(path):
50+
process_upload_dir(path, target+'/'+basename+'/', ftp)
51+
elif os.path.isfile(path):
52+
logging.debug('Trying to upload file: '+path+' to dest: '+target+'/'+basename+'/'+f)
53+
ftp.storbinary("STOR "+target+'/'+basename+'/'+f, open(path, 'r'))
3654
return 0
3755

3856
def process_ftp_dir(source, target, ftp):
57+
logging.debug('processing ftp dir: '+source+' target: '+target)
58+
pwd = ftp.pwd()
3959
ftp.cwd('/'+source)
4060

4161
ls = []
@@ -53,11 +73,14 @@ def process_ftp_dir(source, target, ftp):
5373
else:
5474
download_ftp_file(name, target+'/'+name, ftp)
5575

76+
ftp.cwd(pwd)
77+
5678
def process_ftp_file(ftype, afile):
5779
p = re.compile('[a-z]+://([-a-z.]+)/(.*)')
5880
ftp_baseurl = p.match(afile['url']).group(1)
5981
ftp_path = p.match(afile['url']).group(2)
6082

83+
logging.debug('Connecting to FTP: '+ftp_baseurl)
6184
ftp = FTP(ftp_baseurl)
6285
if os.environ.get('TESK_FTP_USERNAME') is not None:
6386
try:
@@ -84,22 +107,32 @@ def process_ftp_file(ftype, afile):
84107
elif afile['type'] == 'DIRECTORY':
85108
return process_upload_dir(afile['path'], ftp_path, ftp)
86109
else:
87-
print('Unknown file type: '+afile['type'])
110+
logging.error('Unknown file type: '+afile['type'])
88111
return 1
89112
else:
90-
print('Unknown file action: ' + ftype)
113+
logging.error('Unknown file action: ' + ftype)
91114
return 1
92115

93116
def process_http_file(ftype, afile):
94117
if ftype == 'inputs':
95118
r = requests.get(afile['url'])
119+
120+
if r.status_code != 200:
121+
logging.error('Got status code: '+str(r.status_code))
122+
return 1
123+
96124
fp = open(afile['path'], 'wb')
97125
fp.write(r.content)
98126
fp.close
99127
return 0
100128
elif ftype == 'outputs':
101129
fp = open(afile['path'], 'r')
102130
r = requests.put(afile['url'], data=fp.read())
131+
132+
if r.status_code != 200 or r.status_code != 201:
133+
logging.error('Got status code: '+str(r.status_code))
134+
return 1
135+
103136
fp.close
104137
return 0
105138
else:
@@ -109,7 +142,7 @@ def process_http_file(ftype, afile):
109142
def filefromcontent(afile):
110143
content = afile.get('content')
111144
if content is None:
112-
print('Incorrect file spec format, no content or url specified', file=sys.stderr)
145+
logging.error('Incorrect file spec format, no content or url specified')
113146
return 1
114147

115148
fh = open(afile['path'], 'w')
@@ -125,7 +158,7 @@ def process_file(ftype, afile):
125158

126159
p = re.compile('([a-z]+)://')
127160
protocol = p.match(url).group(1)
128-
debug('protocol is: '+protocol)
161+
logging.debug('protocol is: '+protocol)
129162

130163
if protocol == 'ftp':
131164
return process_ftp_file(ftype, afile)
@@ -140,6 +173,8 @@ def debug(msg):
140173
print(msg, file=sys.stderr)
141174

142175
def main(argv):
176+
logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', level=logging.DEBUG)
177+
logging.debug('Starting filer...')
143178
parser = argparse.ArgumentParser(description='Filer script for down- and uploading files')
144179
parser.add_argument('filetype', help='filetype to handle, either \'inputs\' or \'outputs\' ')
145180
parser.add_argument('data', help='file description data, see docs for structure')
@@ -148,14 +183,15 @@ def main(argv):
148183
data = json.loads(args.data)
149184

150185
for afile in data[args.filetype]:
151-
debug('processing file: '+afile['path'])
186+
logging.debug('processing file: '+afile['path'])
152187
if process_file(args.filetype, afile):
153-
print('something went wrong', file=sys.stderr)
188+
logging.error('something went wrong')
154189
return 1
155190
# TODO a bit more detailed reporting
156191
else:
157-
debug('Processed file: ' + afile['path'])
192+
logging.debug('Processed file: ' + afile['path'])
158193

159194
return 0
195+
160196
if __name__ == "__main__":
161-
main(sys.argv)
197+
sys.exit(main(sys.argv))

scripts/filer_class.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
3+
class Filer:
4+
def __init__(self, name, data, filer_version='v0.5', debug=False):
5+
self.name = name
6+
self.spec = {
7+
"kind": "Job",
8+
"apiVersion": "batch/v1",
9+
"metadata": { "name": name },
10+
"spec": {
11+
"template": {
12+
"metadata": { "name": "tesk-filer" },
13+
"spec": {
14+
"containers": [ {
15+
"name": "filer",
16+
"image": "eu.gcr.io/tes-wes/filer:"+filer_version,
17+
"args": [],
18+
"env": [],
19+
"volumeMounts": [],
20+
"imagePullPolicy": "IfNotPresent"
21+
}
22+
],
23+
"volumes": [],
24+
"restartPolicy": "Never"
25+
}
26+
}
27+
}
28+
}
29+
30+
if debug:
31+
self.spec['spec']['template']['spec']['containers'][0]['imagePullPolicy'] = 'Always'
32+
33+
container = self.spec['spec']['template']['spec']['containers'][0]
34+
container['env'].append({ "name": "JSON_INPUT", "value": json.dumps(data) })
35+
#container['env'].append({ "name": "JSON_INPUT", "value": 'test' })
36+
37+
def set_ftp(self, user, pw):
38+
env = self.spec['spec']['template']['spec']['containers'][0]['env']
39+
env.append({ "name": "TESK_FTP_USERNAME", "value": user })
40+
env.append({ "name": "TESK_FTP_PASSWORD", "value": pw })
41+
42+
def set_volume_mounts(self, pvc):
43+
tempspec = self.spec['spec']['template']['spec']
44+
tempspec['containers'][0]['volumeMounts'] = pvc.volume_mounts
45+
tempspec['volumes'] = [ { "name": "task-volume", "persistentVolumeClaim": { "claimName": pvc.name} } ]
46+
47+
def get_spec(self, mode):
48+
self.spec['spec']['template']['spec']['containers'][0]['args'] = [mode, "$(JSON_INPUT)"]
49+
self.spec['spec']['template']['metadata']['name'] = self.name
50+
return self.spec

scripts/job.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from kubernetes import client, config
2+
import logging
3+
import time
4+
5+
class Job:
6+
def __init__(self, body, name='task-job', namespace='default'):
7+
self.name = name
8+
self.namespace = namespace
9+
self.status = 'Initialized'
10+
self.bv1 = client.BatchV1Api()
11+
self.body = body
12+
self.body['metadata']['name'] = self.name
13+
14+
def run_to_completion(self, poll_interval, check_cancelled):
15+
logging.debug(self.body)
16+
self.bv1.create_namespaced_job(self.namespace, self.body)
17+
status = self.get_status()
18+
while status == 'Running':
19+
if check_cancelled():
20+
self.delete()
21+
return 'Cancelled'
22+
23+
time.sleep(poll_interval)
24+
25+
status = self.get_status()
26+
27+
return status
28+
29+
def get_status(self):
30+
job = self.bv1.read_namespaced_job(self.name, self.namespace)
31+
try:
32+
if job.status.conditions[0].type == 'Complete' and job.status.conditions[0].status:
33+
self.status = 'Complete'
34+
elif job.status.conditions[0].type == 'Failed' and job.status.conditions[0].status:
35+
self.status = 'Failed'
36+
else:
37+
self.status = 'Error'
38+
except TypeError: # The condition is not initialized, so it is not complete yet, wait for it
39+
self.status = 'Running'
40+
41+
return self.status
42+
43+
def delete(self):
44+
self.bv1.delete_namespaced_job(self.name, self.namespace, client.V1DeleteOptions())

0 commit comments

Comments
 (0)