Skip to content

Commit ddc9181

Browse files
Siqiao Chenfacebook-github-bot
Siqiao Chen
authored andcommitted
improve write performance by ~10x (#4277)
Summary: X-link: facebookresearch/FBGEMM#1354 we used [] to get a pointer to ith entry in weights. This is very slow. In this diff, we use raw pointer access to get the ith pointer. This improve the write performance by 10x and bring write performance on the same scale as read performance. Reviewed By: q10 Differential Revision: D76073692
1 parent cf90aac commit ddc9181

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,12 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
232232
CHECK(indices.is_contiguous());
233233
CHECK(weights.is_contiguous());
234234
CHECK_EQ(indices.size(0), weights.size(0));
235+
int64_t stride = weights.size(1);
236+
auto indices_data_ptr = indices.data_ptr<index_t>();
237+
auto weights_data_ptr = weights.data_ptr<weight_type>();
235238
{
236239
auto wlmap = kv_store_.by(shard_id).wlock();
237240
auto* pool = kv_store_.pool_by(shard_id);
238-
auto indices_data_ptr = indices.data_ptr<index_t>();
239241
for (auto index_iter = indexes.begin();
240242
index_iter != indexes.end();
241243
index_iter++) {
@@ -258,11 +260,8 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
258260
auto* data_ptr =
259261
StoreValueUtils::data_ptr<weight_type>(block);
260262
std::copy(
261-
weights[id_index]
262-
.template data_ptr<weight_type>(),
263-
weights[id_index]
264-
.template data_ptr<weight_type>() +
265-
weights[id_index].numel(),
263+
weights_data_ptr + id_index * stride,
264+
weights_data_ptr + (id_index + 1) * stride,
266265
data_ptr);
267266
}
268267
}

0 commit comments

Comments
 (0)