Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
env:
SECRET_CODECOV_TOKEN: "EEXB5DS9rR3VXck1NzJougBwxy+3bGKAX9sq1hTwe+rvftmQzdnpy3MlJXLUXQXnBvjezhHZpt07nlG1p9Pi39bnUIddPJHJVVbtqjiGbVuAjVno2tcm8cvi/mYDPoJw7hs8G36IVDb3wklO9wAiO7vwO2br8LQOHMNZBTCUfkb30aT3e/yBnb2QiwNspKCvcd7XYpsmMy78Egdg219sfZ783fG/H7VHv0YzZThj+IAUhm8ftsPURHRmHk28wSdFGzwI2CX8nEx4LgtDhqa+JH84YajIiwWaFymfkw6phpSF3KQNlR53qRWUDD6hClhOizmYyQuZZ8TO5gnNDsrGLg==;U2FsdGVkX1/pfvZY/FJSU7D+DE+6I18s5BSfa63C+31RoDKiHqENegG4whXuxZ5a6YE0XegF8jOretp+E7FiyQ=="

steps:
- label: "Julia v1 -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
cuda: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS -- CUDA"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
cuda: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia v1 -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30

- label: "Julia LTS -- AMDGPU"
plugins:
- JuliaCI/julia#v1:
version: "1.10" # "lts" isn't valid
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
dirs:
- src
- ext
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
timeout_in_minutes: 30
4 changes: 1 addition & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: CI
on:
push:
branches:
- 'master'
- 'main'
- 'release-'
tags: '*'
Expand All @@ -21,7 +20,6 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6' # previous LTS release
- 'lts' # current LTS release
- '1' # current stable release
os:
Expand All @@ -45,4 +43,4 @@ jobs:
- uses: codecov/codecov-action@v5
with:
file: lcov.info


22 changes: 19 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,26 +1,42 @@
name = "Strided"
uuid = "5e0ebb24-38b0-5f93-81fe-25c709ecae67"
authors = ["Lukas Devos <[email protected]>", "Maarten Van Damme <[email protected]>", "Jutho Haegeman <[email protected]>"]
version = "2.3.2"
authors = ["Lukas Devos <[email protected]>", "Maarten Van Damme <[email protected]>", "Jutho Haegeman <[email protected]>"]

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
StridedViews = "4db3bf67-4bd7-4b4e-b153-31dc3fb37143"
TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"

[extensions]
StridedAMDGPUExt = "AMDGPU"
StridedGPUArraysExt = "GPUArrays"
StridedCUDAExt = "CUDA"

[compat]
AMDGPU = "2"
Aqua = "0.8"
CUDA = "5"
GPUArrays = "11.4.1"
LinearAlgebra = "1.6"
Random = "1.6"
StridedViews = "0.3.2,0.4"
StridedViews = "0.4.5"
Test = "1.6"
TupleTools = "1.6"
julia = "1.6"

[extras]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Random", "Aqua"]
test = ["Test", "Random", "Aqua", "AMDGPU", "CUDA", "GPUArrays"]
16 changes: 16 additions & 0 deletions ext/StridedAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module StridedAMDGPUExt

using Strided, StridedViews, AMDGPU
using AMDGPU: Adapt
using AMDGPU: GPUArrays

const ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}

function Base.copy!(dst::StridedView{TD, ND, TAD, FD}, src::StridedView{TS, NS, TAS, FS}) where {TD <: Number, ND, TAD <: ROCArray{TD}, FD <: ALL_FS, TS <: Number, NS, TAS <: ROCArray{TS}, FS <: ALL_FS}
bc_style = Base.Broadcast.BroadcastStyle(TAS)
bc = Base.Broadcast.Broadcasted(bc_style, identity, (src,), axes(dst))
GPUArrays._copyto!(dst, bc)
return dst
end

end
16 changes: 16 additions & 0 deletions ext/StridedCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module StridedCUDAExt

using Strided, StridedViews, CUDA
using CUDA: Adapt, KernelAdaptor
using CUDA: GPUArrays

const ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}

function Base.copy!(dst::StridedView{TD, ND, TAD, FD}, src::StridedView{TS, NS, TAS, FS}) where {TD <: Number, ND, TAD <: CuArray{TD}, FD <: ALL_FS, TS <: Number, NS, TAS <: CuArray{TS}, FS <: ALL_FS}
bc_style = Base.Broadcast.BroadcastStyle(TAS)
bc = Base.Broadcast.Broadcasted(bc_style, identity, (src,), axes(dst))
GPUArrays._copyto!(dst, bc)
return dst
end

end
15 changes: 15 additions & 0 deletions ext/StridedGPUArraysExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module StridedGPUArraysExt

using Strided, GPUArrays
using GPUArrays: Adapt, KernelAbstractions

ALL_FS = Union{typeof(adjoint), typeof(conj), typeof(identity), typeof(transpose)}

KernelAbstractions.get_backend(sv::StridedView{T, N, TA}) where {T, N, TA <: AnyGPUArray{T}} = KernelAbstractions.get_backend(parent(sv))

function Base.Broadcast.BroadcastStyle(gpu_sv::StridedView{T, N, TA}) where {T, N, TA <: AnyGPUArray{T}}
raw_style = Base.Broadcast.BroadcastStyle(TA)
return typeof(raw_style)(Val(N)) # sets the dimensionality correctly
end

end
18 changes: 18 additions & 0 deletions test/amd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
@testset "Copy with ROCStridedView: $T, $f1, $f2" for f2 in (identity, conj, adjoint, transpose), f1 in (identity, conj, transpose, adjoint)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think transpose and adjoint really make sense for scalar element types.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah wait, it is applied to the matrix. Never mind, then it does make sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there another element type we want to test with? Maybe a StridedView of a BlockArray or something?

for m1 in (0, 16, 32), m2 in (0, 16, 32)
if iszero(m1 * m2)
A1 = AMDGPU.ROCMatrix{T}(undef, (m1, m2))
else
A1 = ROCMatrix(randn(T, (m1, m2)))
end
A2 = similar(A1)
A1c = copy(A1)
A2c = copy(A2)
B1 = f1(StridedView(A1c))
B2 = f2(StridedView(A2c))
axes(f1(A1)) == axes(f2(A2)) || continue
@test collect(ROCMatrix(copy!(f2(A2), f1(A1)))) == AMDGPU.Adapt.adapt(Vector{T}, copy!(B2, B1))
end
end
end
14 changes: 14 additions & 0 deletions test/cuda.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
@testset "Copy with CuStridedView: $T, $f1, $f2" for f2 in (identity, conj, adjoint, transpose), f1 in (identity, conj, transpose, adjoint)
for m1 in (0, 16, 32), m2 in (0, 16, 32)
A1 = CUDA.randn(T, (m1, m2))
A2 = similar(A1)
A1c = copy(A1)
A2c = copy(A2)
B1 = f1(StridedView(A1c))
B2 = f2(StridedView(A2c))
axes(f1(A1)) == axes(f2(A2)) || continue
@test collect(CuMatrix(copy!(f2(A2), f1(A1)))) == CUDA.Adapt.adapt(Vector{T}, copy!(B2, B1))
end
end
end
43 changes: 28 additions & 15 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,38 @@ using LinearAlgebra
using Random
using Strided
using Strided: StridedView
using Aqua
using AMDGPU, CUDA, GPUArrays

Random.seed!(1234)

println("Base.Threads.nthreads() = $(Base.Threads.nthreads())")
is_buildkite = get(ENV, "BUILDKITE", "false") == "true"

println("Running tests single-threaded:")
Strided.disable_threads()
include("othertests.jl")
include("blasmultests.jl")
if !is_buildkite
println("Base.Threads.nthreads() = $(Base.Threads.nthreads())")

println("Running tests multi-threaded:")
Strided.enable_threads()
Strided.set_num_threads(Base.Threads.nthreads() + 1)
include("othertests.jl")
include("blasmultests.jl")
println("Running tests single-threaded:")
Strided.disable_threads()
include("othertests.jl")
include("blasmultests.jl")

Strided.enable_threaded_mul()
include("blasmultests.jl")
Strided.disable_threaded_mul()
println("Running tests multi-threaded:")
Strided.enable_threads()
Strided.set_num_threads(Base.Threads.nthreads() + 1)
include("othertests.jl")
include("blasmultests.jl")

using Aqua
Aqua.test_all(Strided; piracies = false)
Strided.enable_threaded_mul()
include("blasmultests.jl")
Strided.disable_threaded_mul()

Aqua.test_all(Strided; piracies = false)
end

if CUDA.functional()
include("cuda.jl")
end

if AMDGPU.functional()
include("amd.jl")
end
Loading