ROCm · mawad-amd · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
@@ -15,30 +15,19 @@
         "--device=/dev/dri",
         "--cap-add=SYS_PTRACE",
         "--group-add=video",
+        "--group-add=render",
         "--security-opt=seccomp=unconfined",
         "--shm-size=16G",
         "--ipc=host",
         "--ulimit=memlock=-1",
         "--ulimit=stack=67108864"
     ],
-    "features": {
-        "ghcr.io/devcontainers/features/common-utils:2": {
-            "installZsh": true,
-            "installOhMyZsh": true,
-            "upgradePackages": false,
-            "username": "automatic",
-            "uid": "automatic",
-            "gid": "automatic",
-            "configureZshAsDefaultShell": false
-        }
-    },
     "mounts": [
         "source=${localEnv:HOME}/.ssh/ssh-agent.sock,target=/tmp/ssh-agent.sock,type=bind"
     ],
     "remoteEnv": {
         "SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
     },
-    "remoteUser": "vscode",
-    "postStartCommand": "bash -lc 'set -e; if ! getent group video >/dev/null; then sudo groupadd -r video || true; fi; if ! getent group render >/dev/null; then sudo groupadd -r render || true; fi; sudo usermod -aG video,render vscode || true'",
+    "remoteUser": "root",
-    "remoteUser": "root",
+    "remoteUser": "vscode",
-    "remoteUser": "root",
+    "remoteUser": "vscode",
     "updateRemoteUserUID": true
 }
@@ -14,15 +14,26 @@ SOCK="${HOME}/.ssh/ssh-agent.sock"
 
 mkdir -p "${HOME}/.ssh"
 
+# Check if socket exists and is working
 if [[ -S "${SOCK}" ]]; then
-  exit 0
+  # Verify the agent is responsive and has keys
+  if SSH_AUTH_SOCK="${SOCK}" ssh-add -l >/dev/null 2>&1; then
+    # Agent is running and has keys loaded, we're done
+    exit 0
+  fi
 fi
 
+# Either socket doesn't exist, or agent has no keys
+# Remove stale socket if present
 rm -f "${SOCK}"
+
+# Start a new ssh-agent
 ssh-agent -a "${SOCK}" -t 8h >/dev/null
 
+# Try to load default keys
 if [[ -f "${HOME}/.ssh/id_rsa" ]]; then
   SSH_AUTH_SOCK="${SOCK}" ssh-add "${HOME}/.ssh/id_rsa" >/dev/null 2>&1 || true
 fi
 
+# List loaded keys (for verification, ignores errors)
 SSH_AUTH_SOCK="${SOCK}" ssh-add -l >/dev/null 2>&1 || true
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
 
 .venv/
 .pip-cache/
@@ -18,6 +18,7 @@ __pycache__/
 *.log
 *.txt
 !requirements.txt
+!CMakeLists.txt
 slurm_logs*/
 redo.sh
 
@@ -46,4 +47,8 @@ __pycache__/
 *.pyzw
 *.pyzwz
 
-!.devcontainer/devcontainer.json
+!.devcontainer/devcontainer.json
+
+DLPack-tensor-example
+
+gpucore.*
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+
+cmake_minimum_required(VERSION 3.21)
+project(iris_vmem LANGUAGES CXX HIP)
+
+# CPM for dependency management
+include(cmake/CPM.cmake)
+
+# Find required packages
+find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
+find_package(hip REQUIRED)
+
+# Use CPM to fetch pybind11
+CPMAddPackage(
+  NAME pybind11
+  GITHUB_REPOSITORY pybind/pybind11
+  GIT_TAG v2.11.1
+)
+
+# Use CPM to fetch fmt
+CPMAddPackage(
+  NAME fmt
+  GITHUB_REPOSITORY fmtlib/fmt
+  GIT_TAG 10.2.1
+)
+
+# Create Python module
+pybind11_add_module(_iris_vmem csrc/src/bindings.cpp)
+
+# Set include directories
+target_include_directories(_iris_vmem PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/csrc/include
+    ${Python3_INCLUDE_DIRS}
+)
+
+# Link libraries
+target_link_libraries(_iris_vmem PRIVATE
+    hip::host
+    fmt::fmt
+)
+
+# Set C++ standard
+set_target_properties(_iris_vmem PROPERTIES
+    CXX_STANDARD 20
+    CXX_STANDARD_REQUIRED ON
+    POSITION_INDEPENDENT_CODE ON
+)
+
+# Install target
+install(TARGETS _iris_vmem
+    LIBRARY DESTINATION iris
+)
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: MIT
+#
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors
+
+set(CPM_DOWNLOAD_VERSION 0.42.0)
+set(CPM_HASH_SUM "2020b4fc42dba44817983e06342e682ecfc3d2f484a581f11cc5731fbe4dce8a")
+
+if(CPM_SOURCE_CACHE)
+  set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+elseif(DEFINED ENV{CPM_SOURCE_CACHE})
+  set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+else()
+  set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+endif()
+
+# Expand relative path. This is important if the provided path contains a tilde (~)
+get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
+
+file(DOWNLOAD
+     https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
+     ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
+)
+
+include(${CPM_DOWNLOAD_LOCATION})
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
+
+#pragma once
+
+#include <Python.h>
+#include <hip/hip_runtime.h>
+#include <memory_resource>
+#include <stdexcept>
+#include <string>
+
+namespace iris {
+namespace gpu_array {
+
+// Structure to hold GPU Array Interface data (__cuda_array_interface__ compatible)
+struct GpuArrayInterface {
+    void* data;           // Device pointer
+    int64_t* shape;       // Shape array
+    int64_t* strides;     // Strides array (in bytes)
+    int ndim;            // Number of dimensions
+    std::string typestr; // Data type string (e.g., "<f4" for float32)
+    int version;         // Interface version (3)
+
+    // Cleanup
+    ~GpuArrayInterface() {
+        if (shape) delete[] shape;
+        if (strides) delete[] strides;
+    }
+};
+
+// Context for managing lifetime of GPU array interface objects
+struct GpuArrayContext {
+    std::pmr::memory_resource* allocator;  // Pointer to allocator
+    void* data_ptr;                         // Device pointer
+    size_t size;                           // Allocation size in bytes
+    GpuArrayInterface* interface;          // Interface struct
+
+    ~GpuArrayContext() {
+        if (interface) delete interface;
+    }
+};
+
+// Convert dtype string to GPU Array Interface typestr
+// Format: <endianness><type><size>
+// endianness: < (little), > (big), | (not applicable)
+// type: i (int), u (uint), f (float), c (complex)
+// size: bytes per element
+inline std::string dtype_to_typestr(const std::string& dtype_str) {
+    if (dtype_str == "int32") return "<i4";
+    if (dtype_str == "int64") return "<i8";
+    if (dtype_str == "float32") return "<f4";
+    if (dtype_str == "float64") return "<f8";
+    if (dtype_str == "uint32") return "<u4";
+    if (dtype_str == "uint64") return "<u8";
+    throw std::runtime_error("Unsupported dtype: " + dtype_str);
+}
+
+// Get element size from dtype string
+inline size_t get_element_size(const std::string& dtype_str) {
+    if (dtype_str == "int32" || dtype_str == "uint32" || dtype_str == "float32") return 4;
+    if (dtype_str == "int64" || dtype_str == "uint64" || dtype_str == "float64") return 8;
+    throw std::runtime_error("Unknown dtype: " + dtype_str);
+}
+
+// Compute row-major strides (in bytes)
+inline void compute_strides(int64_t* strides, const int64_t* shape, int ndim, size_t element_size) {
+    int64_t stride = element_size;
+    for (int i = ndim - 1; i >= 0; --i) {
+        strides[i] = stride;
+        stride *= shape[i];
+    }
+}
+
+// Create Python dict for __cuda_array_interface__
+inline PyObject* create_gpu_array_interface_dict(
+    void* data_ptr,
+    const int64_t* shape,
+    int ndim,
+    const std::string& typestr,
+    bool readonly = false
+) {
+    PyObject* dict = PyDict_New();
+    if (!dict) {
+        throw std::runtime_error("Failed to create dict");
+    }
+
+    // shape: tuple of ints
+    PyObject* shape_tuple = PyTuple_New(ndim);
+    for (int i = 0; i < ndim; ++i) {
+        PyTuple_SetItem(shape_tuple, i, PyLong_FromLongLong(shape[i]));
+    }
+    PyDict_SetItemString(dict, "shape", shape_tuple);
+    Py_DECREF(shape_tuple);
+
+    // typestr: string
+    PyObject* typestr_obj = PyUnicode_FromString(typestr.c_str());
+    PyDict_SetItemString(dict, "typestr", typestr_obj);
+    Py_DECREF(typestr_obj);
+
+    // data: tuple (pointer, readonly)
+    PyObject* data_tuple = PyTuple_New(2);
+    PyTuple_SetItem(data_tuple, 0, PyLong_FromVoidPtr(data_ptr));
+    PyTuple_SetItem(data_tuple, 1, PyBool_FromLong(readonly ? 1 : 0));
+    PyDict_SetItemString(dict, "data", data_tuple);
+    Py_DECREF(data_tuple);
+
+    // version: int (should be 3)
+    PyObject* version_obj = PyLong_FromLong(3);
+    PyDict_SetItemString(dict, "version", version_obj);
+    Py_DECREF(version_obj);
+
+    // strides: None for C-contiguous (optional, can compute if needed)
+    Py_INCREF(Py_None);
+    PyDict_SetItemString(dict, "strides", Py_None);
+
+    // descr: None for simple types (optional)
+    Py_INCREF(Py_None);
+    PyDict_SetItemString(dict, "descr", Py_None);
+
+    return dict;
+}
+
+}  // namespace gpu_array
+}  // namespace iris