Skip to content

Commit 4232a6c

Browse files
committed
[WIP] Append f to function calls in single-precision mode
Not sure if this makes sense? Are CUDA math functions without `f` suffix overloaded for float (are they call the float version for float input)? This needs to be extended for the other function implementations as well. See #234
1 parent 99a7ce2 commit 4232a6c

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

brian2cuda/cuda_generator.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,14 @@ def __init__(self, *args, **kwds):
237237
# These are used in _add_user_function to format the function code
238238
if prefs.devices.cuda_standalone.default_functions_integral_convertion == np.float64:
239239
self.default_func_type = 'double'
240+
self.default_func_suffix = ''
240241
self.other_func_type = 'float'
242+
self.other_func_suffix = 'f'
241243
else: # np.float32
242244
self.default_func_type = 'float'
245+
self.default_func_suffix = 'f'
243246
self.other_func_type = 'double'
247+
self.other_func_suffix = ''
244248
# set clip function to either use all float or all double arguments
245249
# see #51 for details
246250
if prefs['core.default_float_dtype'] == np.float64:
@@ -634,7 +638,9 @@ def _add_user_function(self, varname, variable):
634638
# `DEFAULT_FUNCTIONS['cos'] would match intependent of the function name.
635639
if varname in functions_C99:
636640
funccode = funccode.format(default_type=self.default_func_type,
637-
other_type=self.other_func_type)
641+
default_f=self.default_func_suffix,
642+
other_type=self.other_func_type,
643+
other_f=self.other_func_suffix)
638644
elif varname in ['clip', 'exprel']:
639645
funccode = funccode.format(float_dtype=self.float_dtype)
640646
###
@@ -819,15 +825,18 @@ class CUDAAtomicsCodeGenerator(CUDACodeGenerator):
819825
{{default_type}} _brian_{func}(T value)
820826
{{{{
821827
#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
822-
return {func}(({{default_type}})value);
828+
// CUDA math functions are only overloaded for floating point types. Hence,
829+
// here we cast integral types to floating point types.
830+
return {func}{{default_f}}(({{default_type}})value);
823831
#else
824-
return {func}(value);
832+
// Host functions are already overloaded for integral types
833+
return {func}{{default_f}}(value);
825834
#endif
826835
}}}}
827836
inline __host__ __device__
828837
{{other_type}} _brian_{func}({{other_type}} value)
829838
{{{{
830-
return {func}(value);
839+
return {func}{{other_f}}(value);
831840
}}}}
832841
'''.format(func=func_cuda)
833842
# {default_type} and {other_type} will be formatted in CUDACodeGenerator.determine_keywords()

0 commit comments

Comments
 (0)