@@ -291,19 +291,14 @@ def site_filter(self, data, doc):
291291 return
292292
293293 # Get output dict for this site
294- site = i .get ("MachineAttrGLIDEIN_ResourceName0" , "UNKNOWN" ) or "UNKNOWN"
294+ site = i .get ("LastRemoteHost" , "UNKNOWN" ) or "UNKNOWN"
295+ site = site .split ("@" )[- 1 ]
295296 o = data ["Site" ][site ]
296297
297298 # Add custom attrs to the list of attrs
298299 filter_attrs = DEFAULT_FILTER_ATTRS .copy ()
299300 filter_attrs = filter_attrs + ["User" ]
300301
301- # Count number of DAGNode Jobs
302- if i .get ("DAGNodeName" ) is not None and i .get ("JobUniverse" )!= 12 :
303- o ["_NumDAGNodes" ].append (1 )
304- else :
305- o ["_NumDAGNodes" ].append (0 )
306-
307302 # Count number of history ads (i.e. number of unique job ids)
308303 o ["_NumJobs" ].append (1 )
309304
@@ -316,6 +311,7 @@ def get_filters(self):
316311 filters = [
317312 self .schedd_filter ,
318313 self .user_filter ,
314+ self .site_filter ,
319315 ]
320316 return filters
321317
@@ -328,7 +324,7 @@ def add_custom_columns(self, agg):
328324 columns [5 ] = "Num Users"
329325 if agg == "Site" :
330326 columns [5 ] = "Num Users"
331- rm_columns = [30 ,45 ,50 ,70 ,80 ,90 ,300 ,305 ,310 ,320 ,330 ,340 ,350 , 370 , 380 ,390 ]
327+ rm_columns = [30 ,35 , 45 ,50 ,70 ,80 ,90 ,180 , 181 , 190 , 191 , 300 ,303 , 305 ,307 , 310 ,320 ,330 ,340 ,350 ,390 ]
332328 [columns .pop (key ) for key in rm_columns ]
333329 return columns
334330
@@ -341,90 +337,6 @@ def merge_filtered_data(self, data, agg):
341337 return rows
342338
343339
344- def compute_site_custom_columns (self , data , agg , agg_name ):
345-
346- # Output dictionary
347- row = {}
348-
349- # Compute goodput and total CPU hours columns
350- goodput_cpu_time = []
351- for (goodput_time , cpus ) in zip (
352- data ["CommittedTime" ],
353- data ["RequestCpus" ]):
354- if None in [goodput_time , cpus ]:
355- goodput_cpu_time .append (None )
356- else :
357- goodput_cpu_time .append (goodput_time * cpus )
358-
359- # Short jobs are jobs that ran for < 1 minute
360- is_short_job = []
361- for (goodput_time , record_date , start_date ) in zip (
362- data ["CommittedTime" ],
363- data ["RecordTime" ],
364- data ["JobCurrentStartDate" ]):
365- if (goodput_time is not None ) and (goodput_time > 0 ):
366- is_short_job .append (goodput_time < 60 )
367- elif None in (record_date , start_date ):
368- is_short_job .append (None )
369- else :
370- is_short_job .append ((record_date - start_date ) < 60 )
371-
372- # "Long" (i.e. "normal") jobs ran >= 1 minute
373- # We only want to use these when computing percentiles,
374- # so filter out short jobs and removed jobs,
375- # and sort them so we can easily grab the percentiles later
376- long_times_sorted = []
377- for (is_short , goodput_time ) in zip (
378- is_short_job ,
379- data ["CommittedTime" ]):
380- if (is_short == False ):
381- long_times_sorted .append (goodput_time )
382- long_times_sorted = self .clean (long_times_sorted )
383- long_times_sorted .sort ()
384-
385- # Compute columns
386- row ["All CPU Hours" ] = sum (self .clean (goodput_cpu_time )) / 3600
387- row ["Num Uniq Job Ids" ] = sum (data ['_NumJobs' ])
388- row ["Avg MB Sent" ] = stats .mean (self .clean (data ["BytesSent" ], allow_empty_list = False )) / 1e6
389- row ["Max MB Sent" ] = max (self .clean (data ["BytesSent" ], allow_empty_list = False )) / 1e6
390- row ["Avg MB Recv" ] = stats .mean (self .clean (data ["BytesRecvd" ], allow_empty_list = False )) / 1e6
391- row ["Max MB Recv" ] = max (self .clean (data ["BytesRecvd" ], allow_empty_list = False )) / 1e6
392- row ["Num Short Jobs" ] = sum (self .clean (is_short_job ))
393- row ["Max Rqst Mem MB" ] = max (self .clean (data ['RequestMemory' ], allow_empty_list = False ))
394- row ["Med Used Mem MB" ] = stats .median (self .clean (data ["MemoryUsage" ], allow_empty_list = False ))
395- row ["Max Used Mem MB" ] = max (self .clean (data ["MemoryUsage" ], allow_empty_list = False ))
396- row ["Max Rqst Cpus" ] = max (self .clean (data ["RequestCpus" ], allow_empty_list = False ))
397- row ["Num Users" ] = len (set (data ["User" ]))
398-
399- if row ["Num Uniq Job Ids" ] > 0 :
400- row ["% Short Jobs" ] = 100 * row ["Num Short Jobs" ] / row ["Num Uniq Job Ids" ]
401- else :
402- row ["% Short Jobs" ] = 0
403-
404- # Compute time percentiles and stats
405- if len (long_times_sorted ) > 0 :
406- row ["Min Hrs" ] = long_times_sorted [ 0 ] / 3600
407- row ["25% Hrs" ] = long_times_sorted [ len (long_times_sorted )// 4 ] / 3600
408- row ["Med Hrs" ] = stats .median (long_times_sorted ) / 3600
409- row ["75% Hrs" ] = long_times_sorted [3 * len (long_times_sorted )// 4 ] / 3600
410- row ["95% Hrs" ] = long_times_sorted [int (0.95 * len (long_times_sorted ))] / 3600
411- row ["Max Hrs" ] = long_times_sorted [- 1 ] / 3600
412- row ["Mean Hrs" ] = stats .mean (long_times_sorted ) / 3600
413- else :
414- for col in [f"{ x } Hrs" for x in ["Min" , "25%" , "Med" , "75%" , "95%" , "Max" , "Mean" ]]:
415- row [col ] = 0
416-
417- if len (long_times_sorted ) > 1 :
418- row ["Std Hrs" ] = stats .stdev (long_times_sorted ) / 3600
419- else :
420- # There is no variance if there is only one value
421- row ["Std Hrs" ] = 0
422-
423- # Compute mode for Project and Schedd columns in the Users table
424- row ["Num Users" ] = len (set (data ["User" ]))
425-
426- return row
427-
428340 def compute_custom_columns (self , data , agg , agg_name ):
429341
430342 if agg == "Site" :
@@ -587,3 +499,87 @@ def compute_custom_columns(self, data, agg, agg_name):
587499 row ["Num Users" ] = len (set (data ["User" ]))
588500
589501 return row
502+
503+
504+ def compute_site_custom_columns (self , data , agg , agg_name ):
505+
506+ # Output dictionary
507+ row = {}
508+
509+ # Compute goodput and total CPU hours columns
510+ goodput_cpu_time = []
511+ goodput_gpu_time = []
512+ for (goodput_time , cpus , gpus ) in zip (
513+ data ["CommittedTime" ],
514+ data ["RequestCpus" ],
515+ data ["RequestGpus" ]):
516+ if None in [goodput_time , cpus , gpus ]:
517+ goodput_cpu_time .append (None )
518+ goodput_gpu_time .append (None )
519+ else :
520+ goodput_cpu_time .append (goodput_time * cpus )
521+ goodput_gpu_time .append (goodput_time * cpus )
522+
523+ # Short jobs are jobs that ran for < 1 minute
524+ is_short_job = []
525+ for (goodput_time , record_date , start_date ) in zip (
526+ data ["CommittedTime" ],
527+ data ["RecordTime" ],
528+ data ["JobCurrentStartDate" ]):
529+ if (goodput_time is not None ) and (goodput_time > 0 ):
530+ is_short_job .append (goodput_time < 60 )
531+ elif None in (record_date , start_date ):
532+ is_short_job .append (None )
533+ else :
534+ is_short_job .append ((record_date - start_date ) < 60 )
535+
536+ # "Long" (i.e. "normal") jobs ran >= 1 minute
537+ # We only want to use these when computing percentiles,
538+ # so filter out short jobs and removed jobs,
539+ # and sort them so we can easily grab the percentiles later
540+ long_times_sorted = []
541+ for (is_short , goodput_time ) in zip (
542+ is_short_job ,
543+ data ["CommittedTime" ]):
544+ if (is_short == False ):
545+ long_times_sorted .append (goodput_time )
546+ long_times_sorted = self .clean (long_times_sorted )
547+ long_times_sorted .sort ()
548+
549+ # Compute columns
550+ row ["All CPU Hours" ] = sum (self .clean (goodput_cpu_time )) / 3600
551+ row ["All GPU Hours" ] = sum (self .clean (goodput_gpu_time )) / 3600
552+ row ["Num Uniq Job Ids" ] = sum (data ['_NumJobs' ])
553+ row ["Num Short Jobs" ] = sum (self .clean (is_short_job ))
554+ row ["Max Rqst Mem MB" ] = max (self .clean (data ['RequestMemory' ], allow_empty_list = False ))
555+ row ["Med Used Mem MB" ] = stats .median (self .clean (data ["MemoryUsage" ], allow_empty_list = False ))
556+ row ["Max Used Mem MB" ] = max (self .clean (data ["MemoryUsage" ], allow_empty_list = False ))
557+ row ["Max Rqst Cpus" ] = max (self .clean (data ["RequestCpus" ], allow_empty_list = False ))
558+ row ["Max Rqst Gpus" ] = max (self .clean (data ["RequestGpus" ], allow_empty_list = False ))
559+ row ["Num Users" ] = len (set (data ["User" ]))
560+
561+ if row ["Num Uniq Job Ids" ] > 0 :
562+ row ["% Short Jobs" ] = 100 * row ["Num Short Jobs" ] / row ["Num Uniq Job Ids" ]
563+ else :
564+ row ["% Short Jobs" ] = 0
565+
566+ # Compute time percentiles and stats
567+ if len (long_times_sorted ) > 0 :
568+ row ["Min Hrs" ] = long_times_sorted [ 0 ] / 3600
569+ row ["25% Hrs" ] = long_times_sorted [ len (long_times_sorted )// 4 ] / 3600
570+ row ["Med Hrs" ] = stats .median (long_times_sorted ) / 3600
571+ row ["75% Hrs" ] = long_times_sorted [3 * len (long_times_sorted )// 4 ] / 3600
572+ row ["95% Hrs" ] = long_times_sorted [int (0.95 * len (long_times_sorted ))] / 3600
573+ row ["Max Hrs" ] = long_times_sorted [- 1 ] / 3600
574+ row ["Mean Hrs" ] = stats .mean (long_times_sorted ) / 3600
575+ else :
576+ for col in [f"{ x } Hrs" for x in ["Min" , "25%" , "Med" , "75%" , "95%" , "Max" , "Mean" ]]:
577+ row [col ] = 0
578+
579+ if len (long_times_sorted ) > 1 :
580+ row ["Std Hrs" ] = stats .stdev (long_times_sorted ) / 3600
581+ else :
582+ # There is no variance if there is only one value
583+ row ["Std Hrs" ] = 0
584+
585+ return row
0 commit comments