Skip to content

Commit

Permalink
Merge pull request #17 from fjarri/dynamic-mem
Browse files Browse the repository at this point in the history
Rename `local_mem` to `cu_dynamic_local_mem`
  • Loading branch information
fjarri authored Jul 27, 2024
2 parents dbf2ea1 + 9cfcebf commit 7b7addd
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 14 deletions.
13 changes: 13 additions & 0 deletions docs/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@ Version history
===============


0.5.0 (unreleased)
------------------

Changed
^^^^^^^

* ``local_mem`` keyword parameter of kernel calls renamed to ``cu_dynamic_local_mem``. (PR_17_)


.. _PR_17: https://github.com/fjarri/grunnur/pull/17



0.4.0 (25 Jul 2024)
-------------------

Expand Down
2 changes: 1 addition & 1 deletion grunnur/adapter_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,6 @@ def __call__(
self,
queue_adapter: QueueAdapter,
*args: BufferAdapter | numpy.generic,
local_mem: int = 0,
cu_dynamic_local_mem: int = 0,
) -> Any:
pass
4 changes: 2 additions & 2 deletions grunnur/adapter_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ def __call__(
self,
queue_adapter: QueueAdapter,
*args: BufferAdapter | numpy.generic,
local_mem: int = 0,
cu_dynamic_local_mem: int = 0,
) -> None:
# Will be checked in the upper levels
assert isinstance(queue_adapter, CuQueueAdapter) # noqa: S101
Expand All @@ -724,5 +724,5 @@ def __call__(
grid=self._grid,
block=self._block,
stream=queue_adapter._pycuda_stream, # noqa: SLF001
shared=local_mem,
shared=cu_dynamic_local_mem,
)
10 changes: 6 additions & 4 deletions grunnur/adapter_opencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,11 +632,13 @@ def __call__(
self,
queue_adapter: QueueAdapter,
*args: BufferAdapter | numpy.generic,
local_mem: int = 0,
cu_dynamic_local_mem: int = 0,
) -> pyopencl.Event:
# Local memory size is passed via regular kernel arguments in OpenCL.
# Should be checked in `PreparedKernel`.
assert local_mem == 0 # noqa: S101
if cu_dynamic_local_mem != 0:
raise ValueError(
"`cu_dynamic_local_mem` must be zero for OpenCL kernels; "
"dynamic local memory allocation is not supported"
)

# We have to keep the signature more general because of the base class,
# but the upper levels will ensure this is the case.
Expand Down
16 changes: 11 additions & 5 deletions grunnur/program.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def __call__(
self,
queue: Queue | MultiQueue,
*args: MultiArray | Array | Buffer | numpy.generic,
local_mem: int = 0,
cu_dynamic_local_mem: int = 0,
) -> Any:
"""
Enqueues the kernel on the devices in the given queue.
Expand All @@ -332,8 +332,10 @@ def __call__(
If an argument is a integer-keyed ``dict``, its values corresponding to the
device indices the kernel is executed on will be passed as kernel arguments.
:param cu_dynamic_local_mem: **CUDA only.** The size of dynamically allocated local
(shared in CUDA terms) memory, in bytes. That is, the size of
``extern __shared__`` arrays in CUDA kernels.
:param args: kernel arguments.
:param kwds: backend-specific keyword parameters.
:returns: a list of ``Event`` objects for enqueued kernels in case of PyOpenCL.
"""
if isinstance(queue, Queue):
Expand All @@ -357,7 +359,11 @@ def __call__(
single_queue = queue.queues[device]

pkernel = self._prepared_kernel_adapters[device]
ret_val = pkernel(single_queue._queue_adapter, *kernel_args, local_mem=local_mem) # noqa: SLF001
ret_val = pkernel(
single_queue._queue_adapter, # noqa: SLF001
*kernel_args,
cu_dynamic_local_mem=cu_dynamic_local_mem,
)
ret_vals.append(ret_val)

return ret_vals
Expand Down Expand Up @@ -455,11 +461,11 @@ def __call__(
global_size: Sequence[int] | Mapping[BoundDevice, Sequence[int]],
local_size: Sequence[int] | None | Mapping[BoundDevice, Sequence[int] | None] = None,
*args: MultiArray | Array | Buffer | numpy.generic,
local_mem: int = 0,
cu_dynamic_local_mem: int = 0,
) -> Any:
"""
A shortcut for :py:meth:`Kernel.prepare` and subsequent :py:meth:`PreparedKernel.__call__`.
See their doc entries for details.
"""
pkernel = self.prepare(global_size, local_size)
return pkernel(queue, *args, local_mem=local_mem)
return pkernel(queue, *args, cu_dynamic_local_mem=cu_dynamic_local_mem)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "grunnur"
version = "0.4.0"
version = "0.5.0.dev"
description = "Uniform API for PyOpenCL and PyCUDA."
authors = [
{name = "Bogdan Opanchuk", email = "[email protected]"},
Expand Down
18 changes: 17 additions & 1 deletion tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_compile(mock_or_real_context, no_prelude):
assert (res == ref).all()

# Explicit local_size
res2_dev = Array.from_host(queue, a) # Array.empty(queue, length, numpy.int32)
res2_dev = Array.empty(context.device, [length], numpy.int32)
program.kernel.multiply(queue, [length], [length // 2], res2_dev, a_dev, b_dev, c)
res2 = res2_dev.get(queue)
if not mocked:
Expand Down Expand Up @@ -518,3 +518,19 @@ def test_builtin_globals(mock_backend_pycuda):

assert "max_total_local_size = 1024" in program.sources[context.devices[0]].source
assert "max_total_local_size = 512" in program.sources[context.devices[1]].source


def test_cu_dynamic_local_mem(mock_context):
src = MockDefTemplate(kernels=[MockKernel("test", [numpy.int32])])
program = Program([mock_context.device], src)
queue = Queue(mock_context.device)

if mock_context.api.id == opencl_api_id():
message = (
"`cu_dynamic_local_mem` must be zero for OpenCL kernels; "
"dynamic local memory allocation is not supported"
)
with pytest.raises(ValueError, match=message):
program.kernel.test(queue, [100], [100], numpy.int32(1), cu_dynamic_local_mem=100)
else:
program.kernel.test(queue, [100], [100], numpy.int32(1), cu_dynamic_local_mem=100)

0 comments on commit 7b7addd

Please sign in to comment.