-
Notifications
You must be signed in to change notification settings - Fork 29
Open
Labels
bugSomething isn't workingSomething isn't working
Description
#include "random"
#include "hierarchical_kv/merlin_hashtable.cuh"
using K = uint64_t;
using S = uint64_t;
using V = float;
using MerlinHashTable = nv::merlin::HashTable<K, V, S>;
using TableOption = nv::merlin::HashTableOptions;
using namespace nv::merlin;
using namespace std;
#define CUCO_CUDA_TRY(...) \
GET_CUCO_CUDA_TRY_MACRO(__VA_ARGS__, CUCO_CUDA_TRY_2, CUCO_CUDA_TRY_1) \
(__VA_ARGS__)
#define GET_CUCO_CUDA_TRY_MACRO(_1, _2, NAME, ...) NAME
#define CUCO_CUDA_TRY_2(_call, _exception_type) \
do { \
cudaError_t const error = (_call); \
if (cudaSuccess != error) { \
cudaGetLastError(); \
throw _exception_type{std::string{"ERROR at "} + __FILE__ + ":" + \
CUCO_STRINGIFY(__LINE__) + ": " + \
cudaGetErrorName(error) + " " + \
cudaGetErrorString(error)}; \
} \
} while (0);
#define CUCO_CUDA_TRY_1(_call) CUCO_CUDA_TRY_2(_call, cuco::cuda_error)
__device__ void select() {
}
void test_merlin(bool block = true) {
TableOption options;
options.init_capacity = 128 * 1024 * 1024UL;
options.max_capacity = 128 * 1024 * 1024UL;
options.dim = 8;
options.max_hbm_for_vectors = nv::merlin::GB(30);
options.evict_strategy = EvictStrategy::kLru;
// MerlinHashTable table;
std::shared_ptr<MerlinHashTable> table = std::make_shared<MerlinHashTable>();
table->init(options);
size_t total_key_length = 1ul << 27;
size_t total_key_range = 1ul << 27;
size_t total_key_per_op = 1ul << 20;
int dim_size = 8;
K* h_keys;
V* h_values;
K* d_keys;
V* d_values;
K* h_cold_keys;
K* d_cold_keys;
K* d_input_keys;
V* d_output_values;
bool* d_founds;
K* d_evicted_keys;
V* d_evicted_values;
S* d_evicted_scores;
h_keys = static_cast<K*>(std::malloc(total_key_length * sizeof(K)));
h_values = static_cast<V*>(std::malloc(total_key_length * dim_size * sizeof(V)));
h_cold_keys = static_cast<K*>(std::malloc(total_key_length * sizeof(K)));
CUCO_CUDA_TRY(cudaMalloc(&d_keys, total_key_length * sizeof(K)));
CUCO_CUDA_TRY(cudaMalloc(&d_values, total_key_length * dim_size * sizeof(V)));
CUCO_CUDA_TRY(cudaMalloc(&d_cold_keys, total_key_length * sizeof(K)));
CUCO_CUDA_TRY(cudaMalloc(&d_input_keys, total_key_per_op * sizeof(K)));
CUCO_CUDA_TRY(cudaMalloc(&d_output_values, total_key_per_op * dim_size * sizeof(V)));
CUCO_CUDA_TRY(cudaMalloc(&d_founds, total_key_per_op * sizeof(bool)));
CUCO_CUDA_TRY(cudaMemset(d_output_values, 0, total_key_per_op * dim_size * sizeof(V)));
CUCO_CUDA_TRY(cudaMemset(d_founds, 0, total_key_per_op * sizeof(bool)));
CUCO_CUDA_TRY(cudaMalloc(&d_evicted_keys, total_key_per_op * sizeof(K)));
CUCO_CUDA_TRY(cudaMalloc(&d_evicted_values, total_key_per_op * dim_size * sizeof(V)));
CUCO_CUDA_TRY(cudaMalloc(&d_evicted_scores, total_key_per_op * sizeof(S)));
CUCO_CUDA_TRY(cudaMemset(d_evicted_keys, 0, total_key_per_op * sizeof(K)));
CUCO_CUDA_TRY(cudaMemset(d_evicted_values, 0, total_key_per_op * dim_size * sizeof(V)));
CUCO_CUDA_TRY(cudaMemset(d_evicted_scores, 0, total_key_per_op * sizeof(S)));
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_int_distribution<uint64_t> dist(0, 1ul << 50);
for (int i = 0; i < total_key_length; i++) {
h_keys[i] = dist(mt) % total_key_range;
h_cold_keys[i] = dist(mt);
for (int j = 0; j < dim_size; j++) {
h_values[i * dim_size + j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);;
}
}
cudaMemcpy(d_keys, h_keys, total_key_length * sizeof(K), cudaMemcpyHostToDevice);
cudaMemcpy(d_cold_keys, h_cold_keys, total_key_length * sizeof(K), cudaMemcpyHostToDevice);
cudaMemcpy(d_values, h_values, total_key_length * dim_size * sizeof(V), cudaMemcpyHostToDevice);
cudaStream_t stream;
cudaStreamCreate(&stream);
cudaStreamSynchronize(stream);
cout << "start" << endl;
int count = 0;
for (;;) {
size_t start_index = dist(mt) % (total_key_length - total_key_per_op);
cudaMemcpyAsync(d_input_keys, d_keys + start_index, total_key_per_op * sizeof(K),
cudaMemcpyDeviceToDevice, stream);
cudaMemcpyAsync(d_input_keys, d_cold_keys + start_index, total_key_per_op / 10 * sizeof(K),
cudaMemcpyDeviceToDevice, stream);
table->find(total_key_per_op, d_input_keys, d_output_values, d_founds, nullptr, stream);
start_index = dist(mt) % (total_key_length - total_key_per_op);
cudaMemcpyAsync(d_input_keys, d_keys + start_index, total_key_per_op * sizeof(K),
cudaMemcpyDeviceToDevice, stream);
cudaMemcpyAsync(d_input_keys, d_cold_keys + start_index, total_key_per_op / 10 * sizeof(K),
cudaMemcpyDeviceToDevice, stream);
table->insert_and_evict(total_key_per_op, d_input_keys, d_values + start_index * dim_size,
nullptr, d_evicted_keys, d_evicted_values, d_evicted_scores, stream);
cudaStreamSynchronize(stream);
count++;
if (count % 100 == 0) {
}
cout << "find and insert_and_evict: " << count << endl;
}
}
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working