Skip to content

Commit 33afb42

Browse files
authored
Add change dbs dataset file status fix #5204 (#5241)
* add setdataset.py for #5204 * refactor and add setfiles * add Content-type arg to HTTPRequests * setdataset to use contentType * rename commands to setdatasetstatus setfilestatus * add autocomplete * list of LFNs not supported yet * some pylint and pep8 * add logging for setfilestatus * more HTTPRequest,CRABRest,getDBSRest to new RestInterfaces.py * do not pass version to REST clients, it is set in HTTPRequests * fix use of version and UserAgent * simply make userAgent=CRABClient/__version__ the default * cleanup use of Content-type * warn users that setdataset status does not change file status
1 parent 0bc1ad0 commit 33afb42

File tree

9 files changed

+371
-34
lines changed

9 files changed

+371
-34
lines changed

etc/crab-bash-completion.sh

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ _UseCrab ()
2323
"")
2424
case "$cur" in
2525
"")
26-
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) )
26+
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) )
2727
;;
2828
-*)
2929
COMPREPLY=( $(compgen -W '--version --help -h --quiet --debug' -- $cur) )
3030
;;
3131
*)
32-
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy' -- $cur) )
32+
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername checkdataset submit getoutput resubmit kill uploadlog remake report preparelocal createmyproxy setdatasetstatus setfilestatus' -- $cur) )
3333
;;
3434
esac
3535
;;
@@ -284,9 +284,29 @@ _UseCrab ()
284284
esac
285285
;;
286286

287+
"setdatasetstatus")
288+
case "$cur" in
289+
-*)
290+
COMPREPLY=( $(compgen -W '--help -h --status --dataset' -- $cur) )
291+
;;
292+
*)
293+
COMPREPLY=( $(compgen -f $cur) )
294+
esac
295+
;;
296+
297+
"setfilestatus")
298+
case "$cur" in
299+
-*)
300+
COMPREPLY=( $(compgen -W '--help -h --status --dataset --files' -- $cur) )
301+
;;
302+
*)
303+
COMPREPLY=( $(compgen -f $cur) )
304+
esac
305+
;;
306+
287307

288308
*)
289-
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal' -- $cur) )
309+
COMPREPLY=( $(compgen -W 'status tasks proceed checkwrite getlog checkusername submit getoutput resubmit kill uploadlog remake report preparelocal setdatasetstatus setfilestatus' -- $cur) )
290310
;;
291311
esac
292312

src/python/CRABClient/CRABOptParser.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from optparse import OptionParser
22

33
from CRABClient import __version__ as client_version
4-
from CRABClient.ClientUtilities import getAvailCommands
54
from ServerUtilities import SERVICE_INSTANCES
65

76

src/python/CRABClient/ClientMapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@
135135
'checkusername' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
136136
'checkwrite' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': True, 'requiresLocalCache': False},
137137
'checkdataset' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': True, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
138+
'setdatasetstatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
139+
'setfilestatus' : {'acceptsArguments': False, 'requiresREST': False, 'requiresRucio': False, 'requiresDirOption': False, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': False},
138140
'getlog' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True },
139141
'getoutput' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': True, 'requiresDirOption': True, 'useCache': True, 'requiresProxyVOOptions': True, 'requiresLocalCache': True },
140142
'kill' : {'acceptsArguments': False, 'requiresREST': True, 'requiresRucio': False, 'requiresDirOption': True, 'useCache': False, 'requiresProxyVOOptions': False, 'requiresLocalCache': True },

src/python/CRABClient/ClientUtilities.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,7 @@ def uploadlogfile(logger, proxyfilename, taskname=None, logfilename=None, logpat
230230
# so it needs its own REST server instantiation
231231
restClass = CRABClient.Emulator.getEmulator('rest')
232232
crabserver = restClass(hostname=serverurl, localcert=proxyfilename, localkey=proxyfilename,
233-
retry=2, logger=logger, verbose=False, version=__version__,
234-
userAgent='CRABClient')
233+
retry=2, logger=logger, verbose=False)
235234
crabserver.setDbInstance(instance)
236235
cacheurl = server_info(crabserver=crabserver, subresource='backendurls')['cacheSSL']
237236

@@ -646,7 +645,6 @@ def validateSubmitOptions(options, args):
646645
#Since server_info class needs SubCommand, and SubCommand needs server_info for
647646
#delegating the proxy then we are screwed
648647
#If anyone has a better solution please go on, otherwise live with that one :) :)
649-
from CRABClient import __version__
650648

651649
def server_info(crabserver=None, subresource=None):
652650
"""

src/python/CRABClient/Commands/SubCommand.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -362,18 +362,16 @@ def __init__(self, logger, cmdargs=None, disable_interspersed_args=False):
362362
# this is usually the first time that a call to the server is made, so where Emulator('rest') is initialized
363363
# arguments to Emulator('rest') call must match those for HTTPRequest.__init__ in RESTInteractions.py
364364
#server = CRABClient.Emulator.getEmulator('rest')(url=serverurl, localcert=proxyfilename, localkey=proxyfilename,
365-
# version=__version__, retry=2, logger=logger)
365+
# retry=2, logger=logger)
366366
if self.cmdconf['requiresREST']:
367367
crabRest = CRABClient.Emulator.getEmulator('rest')
368368
self.crabserver = crabRest(hostname=self.serverurl, localcert=self.proxyfilename, localkey=self.proxyfilename,
369-
retry=2, logger=self.logger, verbose=False, version=__version__,
370-
userAgent='CRABClient')
369+
retry=2, logger=self.logger, verbose=False)
371370
self.crabserver.setDbInstance(self.instance)
372371
# prepare also a test crabserver instance which will send tarballs to S3
373372
self.s3tester = crabRest(hostname='cmsweb-testbed.cern.ch',
374373
localcert=self.proxyfilename, localkey=self.proxyfilename,
375-
retry=0, logger=self.logger, verbose=False, version=__version__,
376-
userAgent='CRABClient')
374+
retry=0, logger=self.logger, verbose=False)
377375
self.s3tester.setDbInstance('preprod')
378376
self.handleMyProxy()
379377

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# pylint: disable=consider-using-f-string, unspecified-encoding
2+
"""
3+
allow users to (in)validate their own DBS USER datasets
4+
"""
5+
6+
import sys
7+
import json
8+
9+
from CRABClient.Commands.SubCommand import SubCommand
10+
from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException
11+
from CRABClient.ClientUtilities import colors
12+
from CRABClient.RestInterfaces import getDbsREST
13+
14+
if sys.version_info >= (3, 0):
15+
from urllib.parse import urlencode # pylint: disable=E0611
16+
if sys.version_info < (3, 0):
17+
from urllib import urlencode
18+
19+
20+
class setdatasetstatus(SubCommand):
21+
"""
22+
Set status of a USER dataset in phys03,
23+
optionally invalidates/revalidates all files in it
24+
meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py
25+
and to work whenever CRAB is supported, i.e. with both python2 and python3
26+
"""
27+
28+
name = 'setdatasetstatus'
29+
30+
def __init__(self, logger, cmdargs=None):
31+
SubCommand.__init__(self, logger, cmdargs)
32+
33+
def __call__(self):
34+
result = 'FAILED' # will change to 'SUCCESS' when all is OK
35+
36+
instance = self.options.instance
37+
dataset = self.options.dataset
38+
status = self.options.status
39+
recursive = self.options.recursive
40+
self.logger.debug('instance = %s' % instance)
41+
self.logger.debug('dataset = %s' % dataset)
42+
self.logger.debug('status = %s' % status)
43+
self.logger.debug('recursive = %s' % recursive)
44+
45+
if recursive:
46+
self.logger.warning("ATTENTION: recursive option is not implemented yet. Ignoring it")
47+
48+
# from DBS instance, to DBS REST services
49+
dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger,
50+
cert=self.proxyfilename, key=self.proxyfilename)
51+
52+
self.logger.info("looking up Dataset %s in DBS %s" % (dataset, instance))
53+
datasetStatusQuery = {'dataset': dataset, 'dataset_access_type': '*', 'detail': True}
54+
ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery))
55+
self.logger.debug('exitcode= %s', rc)
56+
if not ds:
57+
self.logger.error("ERROR: dataset %s not found in DBS" % dataset)
58+
raise ConfigurationException
59+
self.logger.info("Dataset status in DBS is %s" % ds[0]['dataset_access_type'])
60+
self.logger.info("Will set it to %s" % status)
61+
data = {'dataset': dataset, 'dataset_access_type': status}
62+
jdata = json.dumps(data)
63+
out, rc, msg = dbsWriter.put(uri='datasets', data=jdata)
64+
if rc == 200 and msg == 'OK':
65+
self.logger.info("Dataset status changed successfully")
66+
result = 'SUCCESS'
67+
else:
68+
msg = "Dataset status change failed: %s" % out
69+
raise CommandFailedException(msg)
70+
71+
ds, rc, msg = dbsReader.get(uri="datasets", data=urlencode(datasetStatusQuery))
72+
self.logger.debug('exitcode= %s', rc)
73+
self.logger.info("Dataset status in DBS now is %s" % ds[0]['dataset_access_type'])
74+
75+
self.logger.info("NOTE: status of files inside the dataset has NOT been changed")
76+
77+
return {'commandStatus': result}
78+
79+
def setOptions(self):
80+
"""
81+
__setOptions__
82+
83+
This allows to set specific command options
84+
"""
85+
self.parser.add_option('--instance', dest='instance', default='prod/phys03',
86+
help="DBS instance. e.g. prod/phys03 (default) or int/phys03. Use at your own risk." + \
87+
"Unless you really know what you are doing, stay with the default"
88+
)
89+
self.parser.add_option('--dataset', dest='dataset', default=None,
90+
help='dataset name')
91+
self.parser.add_option('--status', dest='status', default=None,
92+
help="New status of the dataset: VALID/INVALID/DELETED/DEPRECATED",
93+
choices=['VALID', 'INVALID', 'DELETED', 'DEPRECATED']
94+
)
95+
self.parser.add_option('--recursive', dest='recursive', default=False, action="store_true",
96+
help="Apply status to children datasets and sets all files status in those" + \
97+
"to VALID if status=VALID, INVALID otherwise"
98+
)
99+
100+
def validateOptions(self):
101+
SubCommand.validateOptions(self)
102+
103+
if self.options.dataset is None:
104+
msg = "%sError%s: Please specify the dataset to check." % (colors.RED, colors.NORMAL)
105+
msg += " Use the --dataset option."
106+
ex = MissingOptionException(msg)
107+
ex.missingOption = "dataset"
108+
raise ex
109+
if self.options.status is None:
110+
msg = "%sError%s: Please specify the new dataset status." % (colors.RED, colors.NORMAL)
111+
msg += " Use the --status option."
112+
ex = MissingOptionException(msg)
113+
ex.missingOption = "status"
114+
raise ex
115+
# minimal sanity check
116+
instance = self.options.instance
117+
if not '/' in instance or len(instance.split('/'))>2 and not instance.startswith('https://'):
118+
msg = "Bad instance value %s. " % instance
119+
msg += "Use either server/db format or full URL"
120+
raise ConfigurationException(msg)
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# pylint: disable=consider-using-f-string, unspecified-encoding
2+
"""
3+
allow users to (in)validate some files in their USER datasets in phys03
4+
"""
5+
6+
import json
7+
8+
from CRABClient.Commands.SubCommand import SubCommand
9+
from CRABClient.ClientExceptions import MissingOptionException, ConfigurationException, CommandFailedException
10+
from CRABClient.ClientUtilities import colors
11+
from CRABClient.RestInterfaces import getDbsREST
12+
13+
14+
class setfilestatus(SubCommand):
15+
"""
16+
Set status of a USER dataset in phys03,
17+
optionally invalidates/revalidates all files in it
18+
meant to replace https://github.com/dmwm/DBS/blob/master/Client/utils/DataOpsScripts/DBS3SetDatasetStatus.py
19+
and to work whenever CRAB is supported, i.e. with both python2 and python3
20+
"""
21+
22+
name = 'setfilestatus'
23+
24+
def __init__(self, logger, cmdargs=None):
25+
SubCommand.__init__(self, logger, cmdargs)
26+
27+
def __call__(self):
28+
29+
result = 'FAILED' # will change to 'SUCCESS' when all is OK
30+
31+
# intitalize, and validate args
32+
instance = self.options.instance
33+
dataset = self.options.dataset
34+
files = self.options.files
35+
status = self.options.status
36+
self.logger.debug('instance = %s' % instance)
37+
self.logger.debug('dataset = %s' % dataset)
38+
self.logger.debug('files = %s' % files)
39+
self.logger.debug('status = %s' % status)
40+
41+
statusToSet = 1 if status == 'VALID' else 0
42+
43+
filesToChange = None
44+
if files:
45+
# did the user specify the name of a file containing a list of LFN's ?
46+
try:
47+
with open(files, 'r') as f:
48+
flist = [lfn.strip() for lfn in f]
49+
filesToChange = ','.join(flist)
50+
except IOError:
51+
# no. Assume we have a comma separated list of LFN's (a single LFN is also OK)
52+
filesToChange = files.strip(",").strip()
53+
finally:
54+
# files and dataset options are mutually exclusive
55+
dataset = None
56+
if ',' in filesToChange:
57+
raise NotImplementedError('list of LFNs is not supported yet')
58+
59+
# from DBS instance, to DBS REST services
60+
dbsReader, dbsWriter = getDbsREST(instance=instance, logger=self.logger,
61+
cert=self.proxyfilename, key=self.proxyfilename)
62+
# we will need the dataset name
63+
if dataset:
64+
datasetName = dataset
65+
else:
66+
# get it from DBS
67+
lfn = filesToChange.split(',')[0]
68+
query = {'logical_file_name': lfn}
69+
out, rc, msg = dbsReader.get(uri='datasets', data=query)
70+
if not out:
71+
self.logger.error("ERROR: file %s not found in DBS" % lfn)
72+
raise ConfigurationException
73+
datasetName = out[0]['dataset']
74+
self.logger.info('LFN to be changed belongs to dataset %s' % datasetName)
75+
76+
# when acting on a list of LFN's, can't print status of all files before/after
77+
# best we can do is to print the number of valid/invalid file in the dataset
78+
# before/after.
79+
80+
self.logFilesTally(dataset=datasetName, dbs=dbsReader)
81+
82+
if filesToChange:
83+
data = {'logical_file_name': filesToChange, 'is_file_valid': statusToSet}
84+
if dataset:
85+
data = {'dataset': dataset, 'is_file_valid': statusToSet}
86+
jdata = json.dumps(data) # PUT requires data in JSON format
87+
out, rc, msg = dbsWriter.put(uri='files', data=jdata)
88+
if rc == 200 and msg == 'OK':
89+
self.logger.info("File(s) status changed successfully")
90+
result = 'SUCCESS'
91+
else:
92+
msg = "File(s) status change failed: %s" % out
93+
raise CommandFailedException(msg)
94+
95+
self.logFilesTally(dataset=datasetName, dbs=dbsReader)
96+
97+
return {'commandStatus': result}
98+
99+
def logFilesTally(self, dataset=None, dbs=None):
100+
""" prints total/valid/invalid files in dataset """
101+
query = {'dataset': dataset, 'validFileOnly': 1}
102+
out, _, _ = dbs.get(uri='files', data=query)
103+
valid = len(out)
104+
query = {'dataset': dataset, 'validFileOnly': 0}
105+
out, _, _ = dbs.get(uri='files', data=query)
106+
total = len(out)
107+
invalid = total - valid
108+
self.logger.info("Dataset file count total/valid/invalid = %d/%d/%d" % (total, valid, invalid))
109+
110+
def setOptions(self):
111+
"""
112+
__setOptions__
113+
114+
This allows to set specific command options
115+
"""
116+
self.parser.add_option('-i', '--instance', dest='instance', default='prod/phys03',
117+
help='DBS instance. e.g. prod/phys03 (default) or int/phys03'
118+
)
119+
self.parser.add_option('-d', '--dataset', dest='dataset', default=None,
120+
help='Will apply status to all files in this dataset.' + \
121+
' Use either --files or--dataset',
122+
metavar='<dataset_name>')
123+
self.parser.add_option('-s', '--status', dest='status', default=None,
124+
help='New status of the file(s): VALID/INVALID',
125+
choices=['VALID', 'INVALID']
126+
)
127+
self.parser.add_option('-f', '--files', dest='files', default=None,
128+
help='List of files to be validated/invalidated.' + \
129+
' Can be either a simple LFN or a file containg LFNs or' + \
130+
' a comma separated list of LFNs. Use either --files or --dataset',
131+
metavar="<lfn1[,..,lfnx] or filename>")
132+
133+
def validateOptions(self):
134+
SubCommand.validateOptions(self)
135+
136+
if not self.options.files and not self.options.dataset:
137+
msg = "%sError%s: Please specify the files to change." % (colors.RED, colors.NORMAL)
138+
msg += " Use either the --files or the --dataset option."
139+
ex = MissingOptionException(msg)
140+
ex.missingOption = "files"
141+
raise ex
142+
if self.options.files and self.options.dataset:
143+
msg = "%sError%s: You can not use both --files and --dataset at same time" % (colors.RED, colors.NORMAL)
144+
raise ConfigurationException(msg)
145+
if self.options.status is None:
146+
msg = "%sError%s: Please specify the new file(s) status." % (colors.RED, colors.NORMAL)
147+
msg += " Use the --status option."
148+
ex = MissingOptionException(msg)
149+
ex.missingOption = "status"
150+
raise ex

src/python/CRABClient/Emulator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ def setEmulator(name, value):
2424
overrideDict[name] = value
2525

2626
def getDefaults():
27-
import CRABClient.CrabRestInterface
28-
return {'rest' : CRABClient.CrabRestInterface.CRABRest,
27+
import CRABClient.RestInterfaces
28+
return {'rest' : CRABClient.RestInterfaces.CRABRest,
2929
'ufc' : 'dummy_ufc'}

0 commit comments

Comments
 (0)