|
| 1 | +// REQUIRES: opencl-aot, ocloc, target-spir |
| 2 | + |
| 3 | +// RUN: %if any-device-is-gpu %{ %{run-aux} %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %s -o %t1.out %} |
| 4 | +// RUN: %if gpu %{ %{run} %t1.out %} |
| 5 | + |
| 6 | +// RUN: %if any-device-is-cpu %{ %{run-aux} %clangxx -fsycl -fsycl-targets=spir64_x86_64 %s -o %t2.out %} |
| 7 | +// RUN: %if cpu %{ %{run} %t2.out %} |
| 8 | +// |
| 9 | +// Permute the order in which specialization constants are |
| 10 | +// written into the input kernel_bundle to expose alignment |
| 11 | +// issues when loading/storing the value of a composite specialization constant |
| 12 | +// from the specialization constants buffer. |
| 13 | + |
| 14 | +// Currently not supported on GPU because of the bug in IGC. |
| 15 | +// UNSUPPORTED: gpu |
| 16 | +// UNSUPPORTED-TRACKER: GSD-12237 |
| 17 | + |
| 18 | +#include <sycl/detail/core.hpp> |
| 19 | +#include <sycl/kernel_bundle.hpp> |
| 20 | +#include <sycl/specialization_id.hpp> |
| 21 | + |
| 22 | +#include <algorithm> |
| 23 | +#include <array> |
| 24 | +#include <iostream> |
| 25 | +#include <vector> |
| 26 | + |
| 27 | +struct Composite { |
| 28 | + float f; |
| 29 | + int i; |
| 30 | + char c; |
| 31 | + Composite() = delete; |
| 32 | + constexpr Composite(float fVal, int iVal, char cVal) |
| 33 | + : f(fVal), i(iVal), c(cVal) {} |
| 34 | + constexpr Composite(const Composite &) = default; |
| 35 | + bool operator==(const Composite &other) const { |
| 36 | + return f == other.f && i == other.i && c == other.c; |
| 37 | + } |
| 38 | +}; |
| 39 | + |
| 40 | +class TestAlignment; |
| 41 | + |
| 42 | +// Define several 1-byte spec constants and one Composite spec constant. |
| 43 | +// We permute the order in which we call set_specialization_constant on the |
| 44 | +// input kernel_bundle to exercise ordering effects. |
| 45 | +constexpr sycl::specialization_id<char> char_spec0('a'); |
| 46 | +constexpr sycl::specialization_id<char> char_spec1('b'); |
| 47 | +constexpr sycl::specialization_id<char> char_spec2('c'); |
| 48 | +constexpr sycl::specialization_id<Composite> composite_spec(Composite{3.14f, 42, |
| 49 | + 'X'}); |
| 50 | + |
| 51 | +int main() { |
| 52 | + sycl::queue q; |
| 53 | + char char_out0 = 0, char_out1 = 0, char_out2 = 0; |
| 54 | + Composite comp_out{0.0f, 0, 0}; |
| 55 | + |
| 56 | + char char_vals[3] = {'U', 'V', 'W'}; |
| 57 | + Composite set_comp{2.71f, 99, 'Y'}; |
| 58 | + |
| 59 | + // We will permute these 4 spec "items": indices 0..2 -> char specs, |
| 60 | + // index 3 -> composite spec. |
| 61 | + std::vector<int> items = {0, 1, 2, 3}; |
| 62 | + |
| 63 | + // Record whether any permutation exposed a mismatch |
| 64 | + bool all_pass = true; |
| 65 | + int perm_index = 0; |
| 66 | + |
| 67 | + // Try all permutations (24) |
| 68 | + do { |
| 69 | + ++perm_index; |
| 70 | + { |
| 71 | + sycl::buffer<char, 1> b0(&char_out0, sycl::range<1>(1)); |
| 72 | + sycl::buffer<char, 1> b1(&char_out1, sycl::range<1>(1)); |
| 73 | + sycl::buffer<char, 1> b2(&char_out2, sycl::range<1>(1)); |
| 74 | + sycl::buffer<Composite, 1> bc(&comp_out, sycl::range<1>(1)); |
| 75 | + |
| 76 | + q.submit([&](sycl::handler &cgh) { |
| 77 | + // Apply set_specialization_constant in the order defined by this |
| 78 | + // permutation. |
| 79 | + for (int idx : items) { |
| 80 | + switch (idx) { |
| 81 | + case 0: |
| 82 | + cgh.set_specialization_constant<char_spec0>(char_vals[0]); |
| 83 | + break; |
| 84 | + case 1: |
| 85 | + cgh.set_specialization_constant<char_spec1>(char_vals[1]); |
| 86 | + break; |
| 87 | + case 2: |
| 88 | + cgh.set_specialization_constant<char_spec2>(char_vals[2]); |
| 89 | + break; |
| 90 | + case 3: |
| 91 | + cgh.set_specialization_constant<composite_spec>(set_comp); |
| 92 | + break; |
| 93 | + default: |
| 94 | + break; |
| 95 | + } |
| 96 | + } |
| 97 | + |
| 98 | + auto acc0 = b0.template get_access<sycl::access::mode::write>(cgh); |
| 99 | + auto acc1 = b1.template get_access<sycl::access::mode::write>(cgh); |
| 100 | + auto acc2 = b2.template get_access<sycl::access::mode::write>(cgh); |
| 101 | + auto accc = bc.template get_access<sycl::access::mode::write>(cgh); |
| 102 | + |
| 103 | + cgh.single_task<TestAlignment>([=](sycl::kernel_handler kh) { |
| 104 | + acc0[0] = kh.get_specialization_constant<char_spec0>(); |
| 105 | + acc1[0] = kh.get_specialization_constant<char_spec1>(); |
| 106 | + acc2[0] = kh.get_specialization_constant<char_spec2>(); |
| 107 | + accc[0] = kh.get_specialization_constant<composite_spec>(); |
| 108 | + }); |
| 109 | + }); |
| 110 | + q.wait_and_throw(); |
| 111 | + } |
| 112 | + |
| 113 | + // Validate results |
| 114 | + bool fail = (char_out0 != char_vals[0]) || (char_out1 != char_vals[1]) || |
| 115 | + (char_out2 != char_vals[2]) || !(comp_out == set_comp); |
| 116 | + |
| 117 | + if (fail) { |
| 118 | + all_pass = false; |
| 119 | + std::cerr << "FAIL: permutation " << perm_index |
| 120 | + << " produced wrong values" << std::endl; |
| 121 | + std::cerr << " permutation order: "; |
| 122 | + for (int x : items) |
| 123 | + std::cerr << x << ' '; |
| 124 | + std::cerr << std::endl; |
| 125 | + std::cerr << " char_outs: " << char_out0 << ' ' << char_out1 << ' ' |
| 126 | + << char_out2 << std::endl; |
| 127 | + std::cerr << " comp_out: f=" << comp_out.f << " i=" << comp_out.i |
| 128 | + << " c='" << comp_out.c << std::endl; |
| 129 | + break; |
| 130 | + } |
| 131 | + |
| 132 | + } while (std::next_permutation(items.begin(), items.end())); |
| 133 | + |
| 134 | + if (all_pass) { |
| 135 | + std::cout << "PASS: all permutations produced expected values" << std::endl; |
| 136 | + return 0; |
| 137 | + } |
| 138 | + std::cerr << "Some permutation failed - this likely indicates a bug" |
| 139 | + "sensitive to ordering and placement of specialization" |
| 140 | + "constants values in the specialization constants" |
| 141 | + "buffer, like incorrect alignment assumptions when" |
| 142 | + "loading/storing the values, missing padding etc." |
| 143 | + << std::endl; |
| 144 | + return 1; |
| 145 | +} |
0 commit comments