Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,19 @@
"--device=/dev/dri",
"--cap-add=SYS_PTRACE",
"--group-add=video",
"--group-add=render",
"--security-opt=seccomp=unconfined",
"--shm-size=16G",
"--ipc=host",
"--ulimit=memlock=-1",
"--ulimit=stack=67108864"
],
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"installZsh": true,
"installOhMyZsh": true,
"upgradePackages": false,
"username": "automatic",
"uid": "automatic",
"gid": "automatic",
"configureZshAsDefaultShell": false
}
},
"mounts": [
"source=${localEnv:HOME}/.ssh/ssh-agent.sock,target=/tmp/ssh-agent.sock,type=bind"
],
"remoteEnv": {
"SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
},
"remoteUser": "vscode",
"postStartCommand": "bash -lc 'set -e; if ! getent group video >/dev/null; then sudo groupadd -r video || true; fi; if ! getent group render >/dev/null; then sudo groupadd -r render || true; fi; sudo usermod -aG video,render vscode || true'",
"remoteUser": "root",
Copy link

Copilot AI Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The remoteUser has been changed from "vscode" to "root". Running containers as root is a security risk and goes against container best practices. Unless there's a specific requirement for root access, consider using a non-root user with appropriate permissions instead.

Suggested change
"remoteUser": "root",
"remoteUser": "vscode",

Copilot uses AI. Check for mistakes.
"updateRemoteUserUID": true
}
13 changes: 12 additions & 1 deletion .devcontainer/ensure-ssh-agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,26 @@ SOCK="${HOME}/.ssh/ssh-agent.sock"

mkdir -p "${HOME}/.ssh"

# Check if socket exists and is working
if [[ -S "${SOCK}" ]]; then
exit 0
# Verify the agent is responsive and has keys
if SSH_AUTH_SOCK="${SOCK}" ssh-add -l >/dev/null 2>&1; then
# Agent is running and has keys loaded, we're done
exit 0
fi
fi

# Either socket doesn't exist, or agent has no keys
# Remove stale socket if present
rm -f "${SOCK}"

# Start a new ssh-agent
ssh-agent -a "${SOCK}" -t 8h >/dev/null

# Try to load default keys
if [[ -f "${HOME}/.ssh/id_rsa" ]]; then
SSH_AUTH_SOCK="${SOCK}" ssh-add "${HOME}/.ssh/id_rsa" >/dev/null 2>&1 || true
fi

# List loaded keys (for verification, ignores errors)
SSH_AUTH_SOCK="${SOCK}" ssh-add -l >/dev/null 2>&1 || true
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.

.venv/
.pip-cache/
Expand All @@ -18,6 +18,7 @@ __pycache__/
*.log
*.txt
!requirements.txt
!CMakeLists.txt
slurm_logs*/
redo.sh

Expand Down Expand Up @@ -46,4 +47,8 @@ __pycache__/
*.pyzw
*.pyzwz

!.devcontainer/devcontainer.json
!.devcontainer/devcontainer.json

DLPack-tensor-example

gpucore.*
53 changes: 53 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.

cmake_minimum_required(VERSION 3.21)
project(iris_vmem LANGUAGES CXX HIP)

# CPM for dependency management
include(cmake/CPM.cmake)

# Find required packages
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
find_package(hip REQUIRED)

# Use CPM to fetch pybind11
CPMAddPackage(
NAME pybind11
GITHUB_REPOSITORY pybind/pybind11
GIT_TAG v2.11.1
)

# Use CPM to fetch fmt
CPMAddPackage(
NAME fmt
GITHUB_REPOSITORY fmtlib/fmt
GIT_TAG 10.2.1
)

# Create Python module
pybind11_add_module(_iris_vmem csrc/src/bindings.cpp)

# Set include directories
target_include_directories(_iris_vmem PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/csrc/include
${Python3_INCLUDE_DIRS}
)

# Link libraries
target_link_libraries(_iris_vmem PRIVATE
hip::host
fmt::fmt
)

# Set C++ standard
set_target_properties(_iris_vmem PROPERTIES
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON
POSITION_INDEPENDENT_CODE ON
)

# Install target
install(TARGETS _iris_vmem
LIBRARY DESTINATION iris
)
24 changes: 24 additions & 0 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: MIT
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set(CPM_DOWNLOAD_VERSION 0.42.0)
set(CPM_HASH_SUM "2020b4fc42dba44817983e06342e682ecfc3d2f484a581f11cc5731fbe4dce8a")

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
)

include(${CPM_DOWNLOAD_LOCATION})
124 changes: 124 additions & 0 deletions csrc/include/gpu_array.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.

#pragma once

#include <Python.h>
#include <hip/hip_runtime.h>
#include <memory_resource>
#include <stdexcept>
#include <string>

namespace iris {
namespace gpu_array {

// Structure to hold GPU Array Interface data (__cuda_array_interface__ compatible)
struct GpuArrayInterface {
void* data; // Device pointer
int64_t* shape; // Shape array
int64_t* strides; // Strides array (in bytes)
int ndim; // Number of dimensions
std::string typestr; // Data type string (e.g., "<f4" for float32)
int version; // Interface version (3)

// Cleanup
~GpuArrayInterface() {
if (shape) delete[] shape;
if (strides) delete[] strides;
}
};

// Context for managing lifetime of GPU array interface objects
struct GpuArrayContext {
std::pmr::memory_resource* allocator; // Pointer to allocator
void* data_ptr; // Device pointer
size_t size; // Allocation size in bytes
GpuArrayInterface* interface; // Interface struct

~GpuArrayContext() {
if (interface) delete interface;
}
};

// Convert dtype string to GPU Array Interface typestr
// Format: <endianness><type><size>
// endianness: < (little), > (big), | (not applicable)
// type: i (int), u (uint), f (float), c (complex)
// size: bytes per element
inline std::string dtype_to_typestr(const std::string& dtype_str) {
if (dtype_str == "int32") return "<i4";
if (dtype_str == "int64") return "<i8";
if (dtype_str == "float32") return "<f4";
if (dtype_str == "float64") return "<f8";
if (dtype_str == "uint32") return "<u4";
if (dtype_str == "uint64") return "<u8";
throw std::runtime_error("Unsupported dtype: " + dtype_str);
}

// Get element size from dtype string
inline size_t get_element_size(const std::string& dtype_str) {
if (dtype_str == "int32" || dtype_str == "uint32" || dtype_str == "float32") return 4;
if (dtype_str == "int64" || dtype_str == "uint64" || dtype_str == "float64") return 8;
throw std::runtime_error("Unknown dtype: " + dtype_str);
}

// Compute row-major strides (in bytes)
inline void compute_strides(int64_t* strides, const int64_t* shape, int ndim, size_t element_size) {
int64_t stride = element_size;
for (int i = ndim - 1; i >= 0; --i) {
strides[i] = stride;
stride *= shape[i];
}
}

// Create Python dict for __cuda_array_interface__
inline PyObject* create_gpu_array_interface_dict(
void* data_ptr,
const int64_t* shape,
int ndim,
const std::string& typestr,
bool readonly = false
) {
PyObject* dict = PyDict_New();
if (!dict) {
throw std::runtime_error("Failed to create dict");
}

// shape: tuple of ints
PyObject* shape_tuple = PyTuple_New(ndim);
for (int i = 0; i < ndim; ++i) {
PyTuple_SetItem(shape_tuple, i, PyLong_FromLongLong(shape[i]));
}
PyDict_SetItemString(dict, "shape", shape_tuple);
Py_DECREF(shape_tuple);

// typestr: string
PyObject* typestr_obj = PyUnicode_FromString(typestr.c_str());
PyDict_SetItemString(dict, "typestr", typestr_obj);
Py_DECREF(typestr_obj);

// data: tuple (pointer, readonly)
PyObject* data_tuple = PyTuple_New(2);
PyTuple_SetItem(data_tuple, 0, PyLong_FromVoidPtr(data_ptr));
PyTuple_SetItem(data_tuple, 1, PyBool_FromLong(readonly ? 1 : 0));
PyDict_SetItemString(dict, "data", data_tuple);
Py_DECREF(data_tuple);

// version: int (should be 3)
PyObject* version_obj = PyLong_FromLong(3);
PyDict_SetItemString(dict, "version", version_obj);
Py_DECREF(version_obj);

// strides: None for C-contiguous (optional, can compute if needed)
Py_INCREF(Py_None);
PyDict_SetItemString(dict, "strides", Py_None);

// descr: None for simple types (optional)
Py_INCREF(Py_None);
PyDict_SetItemString(dict, "descr", Py_None);

return dict;
}

} // namespace gpu_array
} // namespace iris
Loading
Loading