Skip to content

[libspirv][ptx-nvidiacl] Change __clc__group_scratch size to 32 x i128 #18431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions libclc/libspirv/lib/ptx-nvidiacl/group/collectives_helpers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,60 @@
; 128 bits per warp is sufficient for all fundamental data types and complex
; Reducing storage for small data types or increasing it for user-defined types
; will likely require an additional pass to track group algorithm usage
@__clc__group_scratch = internal addrspace(3) global [128 x i64] undef, align 1
@__clc__group_scratch = internal addrspace(3) global [32 x i128] undef, align 1

define i8 addrspace(3)* @__clc__get_group_scratch_bool() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)*
ret i8 addrspace(3)* %cast
}

define i8 addrspace(3)* @__clc__get_group_scratch_char() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)*
ret i8 addrspace(3)* %cast
}

define i16 addrspace(3)* @__clc__get_group_scratch_short() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to i16 addrspace(3)*
ret i16 addrspace(3)* %cast
}

define i32 addrspace(3)* @__clc__get_group_scratch_int() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to i32 addrspace(3)*
ret i32 addrspace(3)* %cast
}

define i64 addrspace(3)* @__clc__get_group_scratch_long() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to i64 addrspace(3)*
ret i64 addrspace(3)* %cast
}

define half addrspace(3)* @__clc__get_group_scratch_half() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to half addrspace(3)*
ret half addrspace(3)* %cast
}

define float addrspace(3)* @__clc__get_group_scratch_float() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to float addrspace(3)*
ret float addrspace(3)* %cast
}

define double addrspace(3)* @__clc__get_group_scratch_double() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to double addrspace(3)*
ret double addrspace(3)* %cast
}
Expand All @@ -77,21 +77,21 @@ entry:

define %complex_half addrspace(3)* @__clc__get_group_scratch_complex_half() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to %complex_half addrspace(3)*
ret %complex_half addrspace(3)* %cast
}

define %complex_float addrspace(3)* @__clc__get_group_scratch_complex_float() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to %complex_float addrspace(3)*
ret %complex_float addrspace(3)* %cast
}

define %complex_double addrspace(3)* @__clc__get_group_scratch_complex_double() nounwind alwaysinline {
entry:
%ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%ptr = getelementptr inbounds [32 x i128], [32 x i128] addrspace(3)* @__clc__group_scratch, i64 0, i64 0
%cast = bitcast i64 addrspace(3)* %ptr to %complex_double addrspace(3)*
ret %complex_double addrspace(3)* %cast
}
Loading