Skip to content

Support coo_matrix in coo_symmetrize and coo_remove_scalar #2662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: branch-25.08
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions cpp/include/raft/sparse/linalg/detail/symmetrize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <raft/core/device_coo_matrix.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/sparse/convert/csr.cuh>
#include <raft/sparse/coo.hpp>
Expand Down Expand Up @@ -148,6 +149,17 @@ void coo_symmetrize(COO<T, IdxT, nnz_t>* in,

out->allocate(in->nnz * 2, in->n_rows, in->n_cols, true, stream);

std::cout << "in->n_rows: " << in->n_rows << std::endl;
std::cout << "in->nnz: " << in->nnz << std::endl;

// raft::print_device_vector("in->rows", in->rows(), in->nnz, std::cout);
// raft::print_device_vector("in->cols", in->cols(), in->nnz, std::cout);
// raft::print_device_vector("in->vals", in->vals(), in->nnz, std::cout);

// raft::print_device_vector("out->rows", out->rows(), out->nnz, std::cout);
// raft::print_device_vector("out->cols", out->cols(), out->nnz, std::cout);
// raft::print_device_vector("out->vals", out->vals(), out->nnz, std::cout);

coo_symmetrize_kernel<TPB_X, T><<<grid, blk, 0, stream>>>(in_row_ind.data(),
in->rows(),
in->cols(),
Expand All @@ -161,6 +173,81 @@ void coo_symmetrize(COO<T, IdxT, nnz_t>* in,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

/**
* @brief takes a COO matrix which may not be symmetric and symmetrizes
* it, running a custom reduction function against the each value
* and its transposed value.
*
* @param in: Input COO matrix
* @param out: Output symmetrized COO matrix
* @param reduction_op: a custom reduction function
* @param stream: cuda stream to use
*/
template <int TPB_X = 128, typename T, typename IdxT, typename nnz_t, typename Lambda>
void coo_symmetrize_mytest(raft::device_coo_matrix_view<T, IdxT, IdxT, nnz_t> in,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Todo: add the public function in non detail file

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Todo: rename function name from mytest

raft::device_coo_matrix_view<T, IdxT, IdxT, nnz_t> out,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably change this to a ref and lazily allocate out inside this function with out.initialize_sparsity(out_nnz)

Lambda reduction_op, // two-argument reducer
cudaStream_t stream)
{
auto in_structure = in.structure_view();
auto out_structure = out.structure_view();

auto in_n_rows = in_structure.get_n_rows();
auto in_n_cols = in_structure.get_n_cols();
auto in_nnz = in_structure.get_nnz();

auto out_n_rows = out_structure.get_n_rows();
auto out_n_cols = out_structure.get_n_cols();
auto out_nnz = out_structure.get_nnz();

auto in_rows = in_structure.get_rows().data();
auto in_cols = in_structure.get_cols().data();
auto in_vals = in.get_elements().data();

auto out_rows = out_structure.get_rows().data();
auto out_cols = out_structure.get_cols().data();
auto out_vals = out.get_elements().data();

dim3 grid(raft::ceildiv(in_n_rows, TPB_X), 1, 1);
dim3 blk(TPB_X, 1, 1);

// ASSERT(!out->validate_mem(), "Expecting unallocated COO for output");

rmm::device_uvector<nnz_t> in_row_ind(in_n_rows, stream);

convert::sorted_coo_to_csr(in_rows, in_nnz, in_row_ind.data(), in_n_rows, stream);

// raft::print_device_vector("in_row_ind", in_row_ind.data(), in_n_rows, std::cout);

// out->allocate(in->nnz * 2, in->n_rows, in->n_cols, true, stream);

std::cout << "in_n_rows: " << in_n_rows << std::endl;
std::cout << "in_nnz: " << in_nnz << std::endl;

// raft::print_device_vector("in_rows", in_rows, in_nnz, std::cout);
// raft::print_device_vector("in_cols", in_cols, in_nnz, std::cout);
// raft::print_device_vector("in_vals", in_vals, in_nnz, std::cout);

// raft::print_device_vector("out_rows", out_rows, out_nnz, std::cout);
// raft::print_device_vector("out_cols", out_cols, out_nnz, std::cout);
// raft::print_device_vector("out_vals", out_vals, out_nnz, std::cout);

coo_symmetrize_kernel<TPB_X, T><<<grid, blk, 0, stream>>>(in_row_ind.data(),
in_rows,
in_cols,
in_vals,
out_rows,
out_cols,
out_vals,
in_n_rows,
in_nnz,
reduction_op);

std::cout << "in_nnz: " << in_nnz << std::endl;

RAFT_CUDA_TRY(cudaPeekAtLastError());
}

/**
* @brief Find how much space needed in each row.
* We look through all datapoints and increment the count for each row.
Expand Down
95 changes: 95 additions & 0 deletions cpp/include/raft/sparse/op/detail/filter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <raft/core/device_coo_matrix.hpp>
#include <raft/sparse/coo.hpp>
#include <raft/sparse/detail/cusparse_wrappers.h>
#include <raft/sparse/detail/utils.h>
Expand Down Expand Up @@ -151,6 +152,7 @@ void coo_remove_scalar(COO<T, idx_t, nnz_t>* in,
T scalar,
cudaStream_t stream)
{
std::cout << "original remove scalar-1" << std::endl;
rmm::device_uvector<nnz_t> row_count_nz(in->n_rows, stream);
rmm::device_uvector<nnz_t> row_count(in->n_rows, stream);

Expand All @@ -166,10 +168,20 @@ void coo_remove_scalar(COO<T, idx_t, nnz_t>* in,
in->rows(), in->vals(), in->nnz, scalar, (unsigned long long int*)row_count_nz.data(), stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());

std::cout << "original remove scalar" << std::endl;
std::cout << "in->n_rows: " << in->n_rows << std::endl;

// raft::print_device_vector("original row_count_nz", row_count_nz.data(), in->n_rows, std::cout);

thrust::device_ptr<nnz_t> d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data());
nnz_t out_nnz =
thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in->n_rows);

// std::cout << "original out_nnz: " << out_nnz << std::endl;

// raft::print_device_vector("row_count_nz", row_count_nz.data(), in->n_rows, std::cout);
// raft::print_device_vector("row_count", row_count.data(), in->n_rows, std::cout);

out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream);

coo_remove_scalar<TPB_X, T, idx_t, nnz_t>(in->rows(),
Expand All @@ -187,6 +199,89 @@ void coo_remove_scalar(COO<T, idx_t, nnz_t>* in,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

/**
* @brief Removes the values matching a particular scalar from a COO formatted sparse matrix.
*
* @param in: input COO matrix
* @param out: output COO matrix
* @param scalar: scalar to remove from arrays
* @param stream: cuda stream to use
*/
template <int TPB_X, typename T, typename idx_t, typename nnz_t>
void coo_remove_scalar_mytest(raft::device_coo_matrix_view<T, idx_t, idx_t, nnz_t> in,
raft::device_coo_matrix<T, idx_t, idx_t, nnz_t>& out,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to make this a device_coo_matrix_view ? Problem is that I can't call out.initialize_sparsity(out_nnz) if its a view.

T scalar,
cudaStream_t stream)
{
auto in_structure = in.structure_view();

auto in_n_rows = in_structure.get_n_rows();
auto in_n_cols = in_structure.get_n_cols();
auto in_nnz = in_structure.get_nnz();

auto in_rows = in_structure.get_rows().data();
auto in_cols = in_structure.get_cols().data();
auto in_vals = in.get_elements().data();

// std::cout << "in.n_rows: " << in.n_rows << std::endl;

rmm::device_uvector<nnz_t> row_count_nz(in_n_rows, stream);
rmm::device_uvector<nnz_t> row_count(in_n_rows, stream);

RAFT_CUDA_TRY(
cudaMemsetAsync(row_count_nz.data(), 0, static_cast<nnz_t>(in_n_rows) * sizeof(nnz_t), stream));
RAFT_CUDA_TRY(
cudaMemsetAsync(row_count.data(), 0, static_cast<nnz_t>(in_n_rows) * sizeof(nnz_t), stream));

linalg::coo_degree(in_rows, in_nnz, row_count.data(), stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());

linalg::coo_degree_scalar(in_rows, in_vals, in_nnz, scalar, (nnz_t*)row_count_nz.data(), stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());

std::cout << "mytest remove scalar" << std::endl;
std::cout << "in_n_rows: " << in_n_rows << std::endl;

// raft::print_device_vector("row_count_nz", row_count_nz.data(), in_n_rows, std::cout);

thrust::device_ptr<nnz_t> d_row_count_nz = thrust::device_pointer_cast(row_count_nz.data());
auto out_nnz =
thrust::reduce(rmm::exec_policy(stream), d_row_count_nz, d_row_count_nz + in_n_rows);

// std::cout << "mytest out_nnz: " << out_nnz << std::endl;

// raft::print_device_vector("row_count_nz", row_count_nz.data(), in_n_rows, std::cout);
// raft::print_device_vector("row_count", row_count.data(), in_n_rows, std::cout);

out.initialize_sparsity(out_nnz);

auto out_structure = out.structure_view();

auto out_n_rows = out_structure.get_n_rows();
auto out_n_cols = out_structure.get_n_cols();
out_nnz = out_structure.get_nnz();

auto out_rows = out_structure.get_rows().data();
auto out_cols = out_structure.get_cols().data();
auto out_vals = out.get_elements().data();

// out->allocate(out_nnz, in->n_rows, in->n_cols, false, stream);

coo_remove_scalar<TPB_X, T, idx_t, nnz_t>(in_rows,
in_cols,
in_vals,
in_nnz,
out_rows,
out_cols,
out_vals,
row_count_nz.data(),
row_count.data(),
scalar,
in_n_rows,
stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

/**
* @brief Removes zeros from a COO formatted sparse matrix.
*
Expand Down