Skip to content

Commit

Permalink
[WIP] Append f to function calls in single-precision mode
Browse files Browse the repository at this point in the history
Not sure if this makes sense? Are CUDA math functions without `f` suffix
overloaded for float (are they call the float version for float input)?

This needs to be extended for the other function implementations as
well.

See #234
  • Loading branch information
denisalevi committed Aug 12, 2021
1 parent 99a7ce2 commit 4232a6c
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions brian2cuda/cuda_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,14 @@ def __init__(self, *args, **kwds):
# These are used in _add_user_function to format the function code
if prefs.devices.cuda_standalone.default_functions_integral_convertion == np.float64:
self.default_func_type = 'double'
self.default_func_suffix = ''
self.other_func_type = 'float'
self.other_func_suffix = 'f'
else: # np.float32
self.default_func_type = 'float'
self.default_func_suffix = 'f'
self.other_func_type = 'double'
self.other_func_suffix = ''
# set clip function to either use all float or all double arguments
# see #51 for details
if prefs['core.default_float_dtype'] == np.float64:
Expand Down Expand Up @@ -634,7 +638,9 @@ def _add_user_function(self, varname, variable):
# `DEFAULT_FUNCTIONS['cos'] would match intependent of the function name.
if varname in functions_C99:
funccode = funccode.format(default_type=self.default_func_type,
other_type=self.other_func_type)
default_f=self.default_func_suffix,
other_type=self.other_func_type,
other_f=self.other_func_suffix)
elif varname in ['clip', 'exprel']:
funccode = funccode.format(float_dtype=self.float_dtype)
###
Expand Down Expand Up @@ -819,15 +825,18 @@ class CUDAAtomicsCodeGenerator(CUDACodeGenerator):
{{default_type}} _brian_{func}(T value)
{{{{
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
return {func}(({{default_type}})value);
// CUDA math functions are only overloaded for floating point types. Hence,
// here we cast integral types to floating point types.
return {func}{{default_f}}(({{default_type}})value);
#else
return {func}(value);
// Host functions are already overloaded for integral types
return {func}{{default_f}}(value);
#endif
}}}}
inline __host__ __device__
{{other_type}} _brian_{func}({{other_type}} value)
{{{{
return {func}(value);
return {func}{{other_f}}(value);
}}}}
'''.format(func=func_cuda)
# {default_type} and {other_type} will be formatted in CUDACodeGenerator.determine_keywords()
Expand Down

0 comments on commit 4232a6c

Please sign in to comment.