Skip to content

Commit 4095013

Browse files
authored
Add Site report for GPUs (#27)
1 parent 1440029 commit 4095013

File tree

1 file changed

+88
-92
lines changed

1 file changed

+88
-92
lines changed

accounting/filters/ChtcScheddGpuFilter.py

Lines changed: 88 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -291,19 +291,14 @@ def site_filter(self, data, doc):
291291
return
292292

293293
# Get output dict for this site
294-
site = i.get("MachineAttrGLIDEIN_ResourceName0", "UNKNOWN") or "UNKNOWN"
294+
site = i.get("LastRemoteHost", "UNKNOWN") or "UNKNOWN"
295+
site = site.split("@")[-1]
295296
o = data["Site"][site]
296297

297298
# Add custom attrs to the list of attrs
298299
filter_attrs = DEFAULT_FILTER_ATTRS.copy()
299300
filter_attrs = filter_attrs + ["User"]
300301

301-
# Count number of DAGNode Jobs
302-
if i.get("DAGNodeName") is not None and i.get("JobUniverse")!=12:
303-
o["_NumDAGNodes"].append(1)
304-
else:
305-
o["_NumDAGNodes"].append(0)
306-
307302
# Count number of history ads (i.e. number of unique job ids)
308303
o["_NumJobs"].append(1)
309304

@@ -316,6 +311,7 @@ def get_filters(self):
316311
filters = [
317312
self.schedd_filter,
318313
self.user_filter,
314+
self.site_filter,
319315
]
320316
return filters
321317

@@ -328,7 +324,7 @@ def add_custom_columns(self, agg):
328324
columns[5] = "Num Users"
329325
if agg == "Site":
330326
columns[5] = "Num Users"
331-
rm_columns = [30,45,50,70,80,90,300,305,310,320,330,340,350,370,380,390]
327+
rm_columns = [30,35,45,50,70,80,90,180,181,190,191,300,303,305,307,310,320,330,340,350,390]
332328
[columns.pop(key) for key in rm_columns]
333329
return columns
334330

@@ -341,90 +337,6 @@ def merge_filtered_data(self, data, agg):
341337
return rows
342338

343339

344-
def compute_site_custom_columns(self, data, agg, agg_name):
345-
346-
# Output dictionary
347-
row = {}
348-
349-
# Compute goodput and total CPU hours columns
350-
goodput_cpu_time = []
351-
for (goodput_time, cpus) in zip(
352-
data["CommittedTime"],
353-
data["RequestCpus"]):
354-
if None in [goodput_time, cpus]:
355-
goodput_cpu_time.append(None)
356-
else:
357-
goodput_cpu_time.append(goodput_time * cpus)
358-
359-
# Short jobs are jobs that ran for < 1 minute
360-
is_short_job = []
361-
for (goodput_time, record_date, start_date) in zip(
362-
data["CommittedTime"],
363-
data["RecordTime"],
364-
data["JobCurrentStartDate"]):
365-
if (goodput_time is not None) and (goodput_time > 0):
366-
is_short_job.append(goodput_time < 60)
367-
elif None in (record_date, start_date):
368-
is_short_job.append(None)
369-
else:
370-
is_short_job.append((record_date - start_date) < 60)
371-
372-
# "Long" (i.e. "normal") jobs ran >= 1 minute
373-
# We only want to use these when computing percentiles,
374-
# so filter out short jobs and removed jobs,
375-
# and sort them so we can easily grab the percentiles later
376-
long_times_sorted = []
377-
for (is_short, goodput_time) in zip(
378-
is_short_job,
379-
data["CommittedTime"]):
380-
if (is_short == False):
381-
long_times_sorted.append(goodput_time)
382-
long_times_sorted = self.clean(long_times_sorted)
383-
long_times_sorted.sort()
384-
385-
# Compute columns
386-
row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600
387-
row["Num Uniq Job Ids"] = sum(data['_NumJobs'])
388-
row["Avg MB Sent"] = stats.mean(self.clean(data["BytesSent"], allow_empty_list=False)) / 1e6
389-
row["Max MB Sent"] = max(self.clean(data["BytesSent"], allow_empty_list=False)) / 1e6
390-
row["Avg MB Recv"] = stats.mean(self.clean(data["BytesRecvd"], allow_empty_list=False)) / 1e6
391-
row["Max MB Recv"] = max(self.clean(data["BytesRecvd"], allow_empty_list=False)) / 1e6
392-
row["Num Short Jobs"] = sum(self.clean(is_short_job))
393-
row["Max Rqst Mem MB"] = max(self.clean(data['RequestMemory'], allow_empty_list=False))
394-
row["Med Used Mem MB"] = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False))
395-
row["Max Used Mem MB"] = max(self.clean(data["MemoryUsage"], allow_empty_list=False))
396-
row["Max Rqst Cpus"] = max(self.clean(data["RequestCpus"], allow_empty_list=False))
397-
row["Num Users"] = len(set(data["User"]))
398-
399-
if row["Num Uniq Job Ids"] > 0:
400-
row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
401-
else:
402-
row["% Short Jobs"] = 0
403-
404-
# Compute time percentiles and stats
405-
if len(long_times_sorted) > 0:
406-
row["Min Hrs"] = long_times_sorted[ 0] / 3600
407-
row["25% Hrs"] = long_times_sorted[ len(long_times_sorted)//4] / 3600
408-
row["Med Hrs"] = stats.median(long_times_sorted) / 3600
409-
row["75% Hrs"] = long_times_sorted[3*len(long_times_sorted)//4] / 3600
410-
row["95% Hrs"] = long_times_sorted[int(0.95*len(long_times_sorted))] / 3600
411-
row["Max Hrs"] = long_times_sorted[-1] / 3600
412-
row["Mean Hrs"] = stats.mean(long_times_sorted) / 3600
413-
else:
414-
for col in [f"{x} Hrs" for x in ["Min", "25%", "Med", "75%", "95%", "Max", "Mean"]]:
415-
row[col] = 0
416-
417-
if len(long_times_sorted) > 1:
418-
row["Std Hrs"] = stats.stdev(long_times_sorted) / 3600
419-
else:
420-
# There is no variance if there is only one value
421-
row["Std Hrs"] = 0
422-
423-
# Compute mode for Project and Schedd columns in the Users table
424-
row["Num Users"] = len(set(data["User"]))
425-
426-
return row
427-
428340
def compute_custom_columns(self, data, agg, agg_name):
429341

430342
if agg == "Site":
@@ -587,3 +499,87 @@ def compute_custom_columns(self, data, agg, agg_name):
587499
row["Num Users"] = len(set(data["User"]))
588500

589501
return row
502+
503+
504+
def compute_site_custom_columns(self, data, agg, agg_name):
505+
506+
# Output dictionary
507+
row = {}
508+
509+
# Compute goodput and total CPU hours columns
510+
goodput_cpu_time = []
511+
goodput_gpu_time = []
512+
for (goodput_time, cpus, gpus) in zip(
513+
data["CommittedTime"],
514+
data["RequestCpus"],
515+
data["RequestGpus"]):
516+
if None in [goodput_time, cpus, gpus]:
517+
goodput_cpu_time.append(None)
518+
goodput_gpu_time.append(None)
519+
else:
520+
goodput_cpu_time.append(goodput_time * cpus)
521+
goodput_gpu_time.append(goodput_time * cpus)
522+
523+
# Short jobs are jobs that ran for < 1 minute
524+
is_short_job = []
525+
for (goodput_time, record_date, start_date) in zip(
526+
data["CommittedTime"],
527+
data["RecordTime"],
528+
data["JobCurrentStartDate"]):
529+
if (goodput_time is not None) and (goodput_time > 0):
530+
is_short_job.append(goodput_time < 60)
531+
elif None in (record_date, start_date):
532+
is_short_job.append(None)
533+
else:
534+
is_short_job.append((record_date - start_date) < 60)
535+
536+
# "Long" (i.e. "normal") jobs ran >= 1 minute
537+
# We only want to use these when computing percentiles,
538+
# so filter out short jobs and removed jobs,
539+
# and sort them so we can easily grab the percentiles later
540+
long_times_sorted = []
541+
for (is_short, goodput_time) in zip(
542+
is_short_job,
543+
data["CommittedTime"]):
544+
if (is_short == False):
545+
long_times_sorted.append(goodput_time)
546+
long_times_sorted = self.clean(long_times_sorted)
547+
long_times_sorted.sort()
548+
549+
# Compute columns
550+
row["All CPU Hours"] = sum(self.clean(goodput_cpu_time)) / 3600
551+
row["All GPU Hours"] = sum(self.clean(goodput_gpu_time)) / 3600
552+
row["Num Uniq Job Ids"] = sum(data['_NumJobs'])
553+
row["Num Short Jobs"] = sum(self.clean(is_short_job))
554+
row["Max Rqst Mem MB"] = max(self.clean(data['RequestMemory'], allow_empty_list=False))
555+
row["Med Used Mem MB"] = stats.median(self.clean(data["MemoryUsage"], allow_empty_list=False))
556+
row["Max Used Mem MB"] = max(self.clean(data["MemoryUsage"], allow_empty_list=False))
557+
row["Max Rqst Cpus"] = max(self.clean(data["RequestCpus"], allow_empty_list=False))
558+
row["Max Rqst Gpus"] = max(self.clean(data["RequestGpus"], allow_empty_list=False))
559+
row["Num Users"] = len(set(data["User"]))
560+
561+
if row["Num Uniq Job Ids"] > 0:
562+
row["% Short Jobs"] = 100 * row["Num Short Jobs"] / row["Num Uniq Job Ids"]
563+
else:
564+
row["% Short Jobs"] = 0
565+
566+
# Compute time percentiles and stats
567+
if len(long_times_sorted) > 0:
568+
row["Min Hrs"] = long_times_sorted[ 0] / 3600
569+
row["25% Hrs"] = long_times_sorted[ len(long_times_sorted)//4] / 3600
570+
row["Med Hrs"] = stats.median(long_times_sorted) / 3600
571+
row["75% Hrs"] = long_times_sorted[3*len(long_times_sorted)//4] / 3600
572+
row["95% Hrs"] = long_times_sorted[int(0.95*len(long_times_sorted))] / 3600
573+
row["Max Hrs"] = long_times_sorted[-1] / 3600
574+
row["Mean Hrs"] = stats.mean(long_times_sorted) / 3600
575+
else:
576+
for col in [f"{x} Hrs" for x in ["Min", "25%", "Med", "75%", "95%", "Max", "Mean"]]:
577+
row[col] = 0
578+
579+
if len(long_times_sorted) > 1:
580+
row["Std Hrs"] = stats.stdev(long_times_sorted) / 3600
581+
else:
582+
# There is no variance if there is only one value
583+
row["Std Hrs"] = 0
584+
585+
return row

0 commit comments

Comments
 (0)