Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions pycbc/libutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,112 @@ def __getattribute__(self, attr):
""".format(fun, attr, lib, lib, lib)
raise ImportError(inspect.cleandoc(msg))
return no_module(library_name)



def get_lscpu_caches():
""" Fetch the caches via lscpu """

# Run the command and capture stdout
result = subprocess.run(
["lscpu", "--caches"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)

l1d_cline_size = subprocess.run(
["cat", "/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
).stdout

l2_cline_size = subprocess.run(
["cat", "/sys/devices/system/cpu/cpu0/cache/index2/coherency_line_size"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
).stdout

l3_cline_size = subprocess.run(
["cat", "/sys/devices/system/cpu/cpu0/cache/index3/coherency_line_size"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
).stdout


#print(l3_cline_size)
cache_dict = {}

for line in result.stdout.splitlines():
parts = line.split()
# Ensure 4 columns
assert len(parts)>=4, "lscpu --caches must atleast return the first four columns"
# Assign values
key = parts[0]
net_cache_sizes = parts[2]
cache_assoc = parts[3]
cache_dict[key] = [net_cache_sizes, cache_assoc]

# Ensure the correct columns have been retrieved
assert cache_dict['NAME']==['ALL-SIZE', 'WAYS'], "Ensure the correct columns are retrieved"
# Convert str to bytes and int
caches_dict_bytes = {}

for key, val in cache_dict.items():
if key!='NAME':
cache_bytes = get_in_bytes(val[0])
ways = int(val[1])
caches_dict_bytes.update({key : [cache_bytes, ways]})
else:
caches_dict_bytes.update({key : val})

gcaches_dict = convert_to_getconf_conven(caches_dict_bytes)
gcaches_dict.update({"LEVEL1_DCACHE_LINESIZE" : int(l1d_cline_size)})
gcaches_dict.update({"LEVEL2_CACHE_LINESIZE" : int(l2_cline_size)})
gcaches_dict.update({"LEVEL3_CACHE_LINESIZE" : int(l3_cline_size)})

return gcaches_dict

def get_in_bytes(size_str):
""" Get the value of cache supplied as a string
in int bytes """

num_str = size_str[:-1]
# Conver KiB to Bytes
if size_str[-1] == 'K':
val = int(num_str)*1024
# Convert MiB to Bytes
elif size_str[-1] == 'M':
val = int(num_str)*1024*1024

return val


def convert_to_getconf_conven(cache_dict):

gcache_dict = {}

for key, val in cache_dict.items():
if key=='L1d':
gcache_dict.update({'LEVEL1_DCACHE_SIZE' : val[0]})
gcache_dict.update({'LEVEL1_DCACHE_ASSOC' : val[1]})
elif key=='L1i':
gcache_dict.update({'LEVEL1_ICACHE_SIZE' : val[0]})
gcache_dict.update({'LEVEL1_ICACHE_ASSOC' : val[1]})
elif key=='L2':
gcache_dict.update({'LEVEL2_CACHE_SIZE' : val[0]})
gcache_dict.update({'LEVEL2_CACHE_ASSOC' : val[1]})
elif key=='L3':
gcache_dict.update({'LEVEL3_CACHE_SIZE' : val[0]})
gcache_dict.update({'LEVEL3_CACHE_ASSOC' : val[1]})
else:
gcache_dict.update({key : val})

return gcache_dict
135 changes: 92 additions & 43 deletions pycbc/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,104 @@
import os, sys
import logging
from collections import OrderedDict
from pycbc.libutils import get_lscpu_caches

logger = logging.getLogger('pycbc.opt')

caches_backend = 'lscpu'
# Work around different Python versions to get runtime
# info on hardware cache sizes
_USE_SUBPROCESS = False
HAVE_GETCONF = False
if os.environ.get("LEVEL2_CACHE_SIZE", None) or os.environ.get("NO_GETCONF", None):
HAVE_GETCONF = False
elif sys.platform == 'darwin':
# Mac has getconf, but we can do nothing useful with it
HAVE_GETCONF = False
else:
import subprocess
_USE_SUBPROCESS = True
HAVE_GETCONF = True
# nix OS?
nix=False

if sys.platform=='darwin' or sys.platform=='linux':
nix=True

#print(f"opt: OS is nix? {nix}")

if os.environ.get("LEVEL2_CACHE_SIZE", None):
if not nix:
# If Windows, get L2 cache size from env
# Ignore other vars
LEVEL2_CACHE_SIZE = int(os.environ["LEVEL2_CACHE_SIZE"])
logger.info("opt: using LEVEL2_CACHE_SIZE %d from environment",
LEVEL2_CACHE_SIZE)
elif HAVE_GETCONF:
else:
# darwin or linux
# If python3, subprocess can be used
try:
import subprocess
_USE_SUBPROCESS = True
except ModuleNotFoundError:
import commands
_USE_SUBPROCESS = False

#print(f"subprocess module found? {_USE_SUBPROCESS}")
if _USE_SUBPROCESS:
def getconf(confvar):
return int(subprocess.check_output(['getconf', confvar]))
# Get cache sizes from lscpu, linesize from getconf
# getconf does not return the correct cache size
# on modern CPUs
if sys.platform!='darwin':
# Flag for later
LEVEL2_CACHE_LINESIZE=None

def getconf(confvar):
""" getconf for cache line sizes """
return int(subprocess.check_output(['getconf', confvar]))

if caches_backend=='lscpu':
""" lscpu for caches and their assoc """
cache_info = get_lscpu_caches()

def get_lscpu_val(confvar, caches_info=cache_info):
""" lscpu overload of getval """
return caches_info[confvar]

getval = get_lscpu_val

elif caches_backend=='getconf':
# getconf overload of getval
getval = getconf
caches_info=None
else:
raise KeyError(f"Unknown cache backend {caches_backend}")

else:
def getconf(confvar):
""" getconf overload, but with older commands module """
retlist = commands.getstatusoutput('getconf ' + confvar)
return int(retlist[1])

LEVEL1_DCACHE_SIZE = getconf('LEVEL1_DCACHE_SIZE')
LEVEL1_DCACHE_ASSOC = getconf('LEVEL1_DCACHE_ASSOC')
LEVEL1_DCACHE_LINESIZE = getconf('LEVEL1_DCACHE_LINESIZE')
LEVEL2_CACHE_SIZE = getconf('LEVEL2_CACHE_SIZE')
LEVEL2_CACHE_ASSOC = getconf('LEVEL2_CACHE_ASSOC')
LEVEL2_CACHE_LINESIZE = getconf('LEVEL2_CACHE_LINESIZE')
LEVEL3_CACHE_SIZE = getconf('LEVEL3_CACHE_SIZE')
LEVEL3_CACHE_ASSOC = getconf('LEVEL3_CACHE_ASSOC')
LEVEL3_CACHE_LINESIZE = getconf('LEVEL3_CACHE_LINESIZE')

getval=getconf

if sys.platform!='darwin':
LEVEL1_DCACHE_SIZE = getval('LEVEL1_DCACHE_SIZE')
LEVEL2_CACHE_SIZE = getval('LEVEL2_CACHE_SIZE')
LEVEL3_CACHE_SIZE = getval('LEVEL3_CACHE_SIZE')

LEVEL1_DCACHE_ASSOC = getval('LEVEL1_DCACHE_ASSOC')
LEVEL2_CACHE_ASSOC = getval('LEVEL2_CACHE_ASSOC')
LEVEL3_CACHE_ASSOC = getval('LEVEL3_CACHE_ASSOC')

# Can use getconf for cache line sizes
# but it fails to fetch it for L3
LEVEL1_DCACHE_LINESIZE = getval('LEVEL1_DCACHE_LINESIZE')
LEVEL2_CACHE_LINESIZE = getval('LEVEL2_CACHE_LINESIZE')
LEVEL3_CACHE_LINESIZE = getval('LEVEL3_CACHE_LINESIZE')
else:
# Get cache linesize from sysctl
# On Apple M chips, different Lev cache linesizes can be different!
# Also different cores (P vs E) can have different cache sizes!
# Cache assocs are are not usually exposed!
# So get only sys reported lev2 size here instead
LEVEL2_CACHE_LINESIZE=int(subprocess.check_output(['sysctl', '-n', 'hw.cachelinesize']))

# Left here for testing.
#print("Cache sizes")
#print(LEVEL1_DCACHE_SIZE,LEVEL2_CACHE_SIZE, LEVEL3_CACHE_SIZE)
#print("Cache assoc")
#print(LEVEL1_DCACHE_ASSOC, LEVEL2_CACHE_ASSOC, LEVEL3_CACHE_ASSOC)
#print("Cache linesizes")
#print(LEVEL1_DCACHE_LINESIZE, LEVEL2_CACHE_LINESIZE, LEVEL3_CACHE_LINESIZE)


def insert_optimization_option_group(parser):
Expand Down Expand Up @@ -101,27 +160,19 @@ def verify_optimization_options(opt, parser):

if opt.cpu_affinity_from_env is not None:
if opt.cpu_affinity is not None:
logger.error(
"Both --cpu_affinity_from_env and --cpu_affinity specified"
)
logging.error("Both --cpu_affinity_from_env and --cpu_affinity specified")
sys.exit(1)

requested_cpus = os.environ.get(opt.cpu_affinity_from_env)

if requested_cpus is None:
logger.error(
"CPU affinity requested from environment variable %s "
"but this variable is not defined",
opt.cpu_affinity_from_env
)
logging.error("CPU affinity requested from environment variable %s "
"but this variable is not defined" % opt.cpu_affinity_from_env)
sys.exit(1)

if requested_cpus == '':
logger.error(
"CPU affinity requested from environment variable %s "
"but this variable is empty",
opt.cpu_affinity_from_env
)
logging.error("CPU affinity requested from environment variable %s "
"but this variable is empty" % opt.cpu_affinity_from_env)
sys.exit(1)

if requested_cpus is None:
Expand All @@ -132,13 +183,11 @@ def verify_optimization_options(opt, parser):
retcode = os.system(command)

if retcode != 0:
logger.error(
'taskset command <%s> failed with return code %d',
command, retcode
)
logging.error('taskset command <%s> failed with return code %d' % \
(command, retcode))
sys.exit(1)

logger.info("Pinned to CPUs %s ", requested_cpus)
logging.info("Pinned to CPUs %s " % requested_cpus)

class LimitedSizeDict(OrderedDict):
""" Fixed sized dict for FIFO caching"""
Expand Down
Loading