Skip to content

Commit f8fdee8

Browse files
committed
implement emult on dense vectors for OpenCL backend
1 parent 2a02b08 commit f8fdee8

File tree

4 files changed

+205
-0
lines changed

4 files changed

+205
-0
lines changed

src/opencl/cl_algo_registry.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include <opencl/cl_v_count_mf.hpp>
3939
#include <opencl/cl_v_eadd.hpp>
4040
#include <opencl/cl_v_eadd_fdb.hpp>
41+
#include <opencl/cl_v_emult.hpp>
4142
#include <opencl/cl_v_map.hpp>
4243
#include <opencl/cl_v_reduce.hpp>
4344
#include <opencl/cl_vxm.hpp>
@@ -99,6 +100,11 @@ namespace spla {
99100
g_registry->add(MAKE_KEY_CL_0("m_extract_row", INT), std::make_shared<Algo_m_extract_row_cl<T_INT>>());
100101
g_registry->add(MAKE_KEY_CL_0("m_extract_row", UINT), std::make_shared<Algo_m_extract_row_cl<T_UINT>>());
101102
g_registry->add(MAKE_KEY_CL_0("m_extract_row", FLOAT), std::make_shared<Algo_m_extract_row_cl<T_FLOAT>>());
103+
104+
// algorthm v_emult
105+
g_registry->add(MAKE_KEY_CL_0("v_emult", INT), std::make_shared<Algo_v_emult_cl<T_INT>>());
106+
g_registry->add(MAKE_KEY_CL_0("v_emult", UINT), std::make_shared<Algo_v_emult_cl<T_UINT>>());
107+
g_registry->add(MAKE_KEY_CL_0("v_emult", FLOAT), std::make_shared<Algo_v_emult_cl<T_FLOAT>>());
102108
}
103109

104110
}// namespace spla

src/opencl/cl_v_emult.hpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/**********************************************************************************/
2+
/* This file is part of spla project */
3+
/* https://github.com/JetBrains-Research/spla */
4+
/**********************************************************************************/
5+
/* MIT License */
6+
/* */
7+
/* Copyright (c) 2025 SparseLinearAlgebra */
8+
/* */
9+
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
10+
/* of this software and associated documentation files (the "Software"), to deal */
11+
/* in the Software without restriction, including without limitation the rights */
12+
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
13+
/* copies of the Software, and to permit persons to whom the Software is */
14+
/* furnished to do so, subject to the following conditions: */
15+
/* */
16+
/* The above copyright notice and this permission notice shall be included in all */
17+
/* copies or substantial portions of the Software. */
18+
/* */
19+
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
20+
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
21+
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
22+
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
23+
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
24+
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
25+
/* SOFTWARE. */
26+
/**********************************************************************************/
27+
28+
#ifndef SPLA_CL_VECTOR_EMULT_HPP
29+
#define SPLA_CL_VECTOR_EMULT_HPP
30+
31+
#include <schedule/schedule_tasks.hpp>
32+
33+
#include <core/dispatcher.hpp>
34+
#include <core/registry.hpp>
35+
#include <core/top.hpp>
36+
#include <core/tscalar.hpp>
37+
#include <core/ttype.hpp>
38+
#include <core/tvector.hpp>
39+
40+
#include <opencl/cl_counter.hpp>
41+
#include <opencl/cl_fill.hpp>
42+
#include <opencl/cl_formats.hpp>
43+
#include <opencl/generated/auto_vector_emult.hpp>
44+
45+
namespace spla {
46+
47+
template<typename T>
48+
class Algo_v_emult_cl final : public RegistryAlgo {
49+
public:
50+
~Algo_v_emult_cl() override = default;
51+
52+
std::string get_name() override {
53+
return "v_emult";
54+
}
55+
56+
std::string get_description() override {
57+
return "parallel vector element-wise mult on opencl device";
58+
}
59+
60+
Status execute(const DispatchContext& ctx) override {
61+
auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
62+
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
63+
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
64+
65+
if (u->is_valid(FormatVector::AccDense) && v->is_valid(FormatVector::AccDense)) {
66+
return execute_dn2dn(ctx);
67+
}
68+
69+
return execute_dn2dn(ctx);
70+
}
71+
72+
private:
73+
Status execute_dn2dn(const DispatchContext& ctx) {
74+
TIME_PROFILE_SCOPE("cl/vector_emult_dn2dn");
75+
76+
auto t = ctx.task.template cast_safe<ScheduleTask_v_emult>();
77+
ref_ptr<TVector<T>> r = t->r.template cast_safe<TVector<T>>();
78+
ref_ptr<TVector<T>> u = t->u.template cast_safe<TVector<T>>();
79+
ref_ptr<TVector<T>> v = t->v.template cast_safe<TVector<T>>();
80+
ref_ptr<TOpBinary<T, T, T>> op = t->op.template cast_safe<TOpBinary<T, T, T>>();
81+
82+
std::shared_ptr<CLProgram> program;
83+
if (!ensure_kernel(op, program)) return Status::CompilationError;
84+
85+
r->validate_wd(FormatVector::AccDense);
86+
u->validate_rw(FormatVector::AccDense);
87+
v->validate_rw(FormatVector::AccDense);
88+
89+
auto* p_cl_r = r->template get<CLDenseVec<T>>();
90+
const auto* p_cl_u = u->template get<CLDenseVec<T>>();
91+
const auto* p_cl_v = v->template get<CLDenseVec<T>>();
92+
auto* p_cl_acc = get_acc_cl();
93+
auto& queue = p_cl_acc->get_queue_default();
94+
95+
const uint n = r->get_n_rows();
96+
97+
auto kernel = program->make_kernel("dense_to_dense");
98+
kernel.setArg(0, p_cl_r->Ax);
99+
kernel.setArg(1, p_cl_u->Ax);
100+
kernel.setArg(2, p_cl_v->Ax);
101+
kernel.setArg(3, n);
102+
kernel.setArg(4, r->get_fill_value());
103+
104+
cl::NDRange global(p_cl_acc->get_default_wgs() * div_up_clamp(n, p_cl_acc->get_default_wgs(), 1u, 1024u));
105+
cl::NDRange local(p_cl_acc->get_default_wgs());
106+
queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local);
107+
108+
return Status::Ok;
109+
}
110+
111+
bool ensure_kernel(const ref_ptr<TOpBinary<T, T, T>>& op, std::shared_ptr<CLProgram>& program) {
112+
CLProgramBuilder program_builder;
113+
program_builder
114+
.set_name("vector_emult")
115+
.add_type("TYPE", get_ttype<T>().template as<Type>())
116+
.add_op("OP_BINARY", op.template as<OpBinary>())
117+
.set_source(source_vector_emult)
118+
.acquire();
119+
120+
program = program_builder.get_program();
121+
122+
return true;
123+
}
124+
};
125+
126+
}// namespace spla
127+
128+
#endif
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
////////////////////////////////////////////////////////////////////
2+
// Copyright (c) 2021 - 2025 SparseLinearAlgebra
3+
// Autogenerated file, do not modify
4+
////////////////////////////////////////////////////////////////////
5+
6+
#pragma once
7+
8+
static const char source_vector_emult[] = R"(
9+
10+
11+
12+
__kernel void dense_to_dense(__global TYPE* g_rx,
13+
__global const TYPE* g_ux,
14+
__global const TYPE* g_vx,
15+
const uint n,
16+
const TYPE fill_value) {
17+
const uint gid = get_global_id(0);
18+
const uint gsize = get_global_size(0);
19+
20+
for (uint i = gid; i < n; i += gsize) {
21+
TYPE u = g_ux[i];
22+
TYPE v = g_vx[i];
23+
g_rx[i] = u != fill_value && v != fill_value ? OP_BINARY(u, v) : fill_value;
24+
}
25+
}
26+
27+
)";

src/opencl/kernels/vector_emult.cl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
/**********************************************************************************/
3+
/* This file is part of spla project */
4+
/* https://github.com/SparseLinearAlgebra/spla */
5+
/**********************************************************************************/
6+
/* MIT License */
7+
/* */
8+
/* Copyright (c) 2023 SparseLinearAlgebra */
9+
/* */
10+
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
11+
/* of this software and associated documentation files (the "Software"), to deal */
12+
/* in the Software without restriction, including without limitation the rights */
13+
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
14+
/* copies of the Software, and to permit persons to whom the Software is */
15+
/* furnished to do so, subject to the following conditions: */
16+
/* */
17+
/* The above copyright notice and this permission notice shall be included in all */
18+
/* copies or substantial portions of the Software. */
19+
/* */
20+
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
21+
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
22+
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
23+
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
24+
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
25+
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
26+
/* SOFTWARE. */
27+
/**********************************************************************************/
28+
29+
#include "common_def.cl"
30+
31+
__kernel void dense_to_dense(__global TYPE* g_rx,
32+
__global const TYPE* g_ux,
33+
__global const TYPE* g_vx,
34+
const uint n,
35+
const TYPE fill_value) {
36+
const uint gid = get_global_id(0);
37+
const uint gsize = get_global_size(0);
38+
39+
for (uint i = gid; i < n; i += gsize) {
40+
TYPE u = g_ux[i];
41+
TYPE v = g_vx[i];
42+
g_rx[i] = u != fill_value && v != fill_value ? OP_BINARY(u, v) : fill_value;
43+
}
44+
}

0 commit comments

Comments
 (0)