small changes for kvzch (#4073)

duduyi2013 · duduyi2013 · commit bb1cbc2cf392 · 2025-05-03T23:35:24.000-07:00
Summary: Pull Request resolved: #4073 X-link: facebookresearch/FBGEMM#1157 change set 1. introduce 2 new type to better introduce KVZCH into SSD TBE 2. add virtual indicator for PartiallyMaterializeTensor so that when checkpoint and publish see PMT, they are able to tell whether it is for normal SSD emb or kv zch embedding 3. update hash mode name to chunk-based or interleaved-based 4. change id and bucket shape to 2D tensor instead of 1D Differential Revision: D74137570
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py
@@ -11,7 +11,7 @@
 
 import enum
 from dataclasses import dataclass
-from typing import List, NamedTuple
+from typing import List, NamedTuple, Tuple
 
 import torch
 from torch import Tensor
@@ -49,6 +49,33 @@ def from_str(cls, key: str):
             raise ValueError(f"Cannot parse value into EmbeddingLocation: {key}")
 
 
+class KVZCHParams(NamedTuple):
+    # global bucket id start and global bucket id end offsets for each logical table,
+    # where start offset is inclusive and end offset is exclusive
+    bucket_offsets: List[Tuple[int, int]] = []
+    # bucket size for each logical table
+    # the value indicates corresponding input space for each bucket id, e.g. 2^50 / total_num_buckets
+    bucket_sizes: List[int] = []
+
+
+class BackendType(enum.IntEnum):
+    SSD = 0
+    DRAM = 1
+    PS = 2
+
+    @classmethod
+    # pyre-ignore[3]
+    def from_str(cls, key: str):
+        lookup = {
+            "ssd": BackendType.SSD,
+            "dram": BackendType.DRAM,
+        }
+        if key in lookup:
+            return lookup[key]
+        else:
+            raise ValueError(f"Cannot parse value into BackendType: {key}")
+
+
 class CacheAlgorithm(enum.Enum):
     LRU = 0
     LFU = 1
diff --git a/fbgemm_gpu/fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py b/fbgemm_gpu/fbgemm_gpu/tbe/ssd/utils/partially_materialized_tensor.py
@@ -33,7 +33,7 @@ class PartiallyMaterializedTensor:
     or use `full_tensor()` to get the full tensor (this could OOM).
     """
 
-    def __init__(self, wrapped) -> None:
+    def __init__(self, wrapped, is_virtual: bool = False) -> None:
         """
         Ensure caller loads the module before creating this object.
 
@@ -48,6 +48,7 @@ def __init__(self, wrapped) -> None:
             wrapped: torch.classes.fbgemm.KVTensorWrapper
         """
         self._wrapped = wrapped
+        self._is_virtual = is_virtual
         self._requires_grad = False
 
     @property
@@ -57,6 +58,17 @@ def wrapped(self):
         """
         return self._wrapped
 
+    @property
+    def is_virtual(self):
+        """
+        Indicate whether PMT is a virtual tensor.
+        This indicator is needed for checkpoint or publish.
+        They need to know wheether it is PMT for kvzch or for normal emb table
+        for kvzch, checkpoint and publish need to call all-gather to recalculate the correct
+        metadata of the ShardedTensor
+        """
+        return self._is_virtual
+
     @classmethod
     def __torch_function__(cls, func, types, args=(), kwargs=None):
         if kwargs is None:
diff --git a/fbgemm_gpu/include/fbgemm_gpu/split_embeddings_cache/kv_db_cpp_utils.h b/fbgemm_gpu/include/fbgemm_gpu/split_embeddings_cache/kv_db_cpp_utils.h
@@ -46,7 +46,7 @@ inline size_t hash_shard(int64_t id, size_t num_shards) {
 ///
 /// @param unordered_indices unordered ids, the id here might be
 /// original(unlinearized) id
-/// @param hash_mode 0 for hash by mod, 1 for hash by interleave
+/// @param hash_mode 0 for chunk-based hashing, 1 for interleaved-based hashing
 /// @param bucket_start global bucket id, the start of the bucket range
 /// @param bucket_end global bucket id, the end of the bucket range
 /// @param bucket_size an optional, virtual size(input space, e.g. 2^50) of a
diff --git a/fbgemm_gpu/src/split_embeddings_cache/kv_db_cpp_utils.cpp b/fbgemm_gpu/src/split_embeddings_cache/kv_db_cpp_utils.cpp
@@ -23,10 +23,10 @@ int64_t _get_bucket_id(
     std::optional<int64_t> total_num_buckets = std::nullopt) {
   if (hash_mode == 0) {
     CHECK(bucket_size.has_value());
-    // hash by mod
+    // chunk-based hashing
     return id / bucket_size.value();
   } else {
-    // hash by interleave
+    // interleave-based hashing
     CHECK(total_num_buckets.has_value());
     return id % total_num_buckets.value();
   }
@@ -42,7 +42,7 @@ std::tuple<at::Tensor, at::Tensor> get_bucket_sorted_indices_and_bucket_tensor(
   TORCH_CHECK(unordered_indices.is_contiguous());
   TORCH_CHECK(
       hash_mode == 0 || hash_mode == 1,
-      "only support hash by mod and interleaved for now");
+      "only support hash by chunk-based or interleaved-based hashing for now");
   TORCH_CHECK(
       bucket_start <= bucket_end,
       "bucket_start:",
@@ -73,11 +73,16 @@ std::tuple<at::Tensor, at::Tensor> get_bucket_sorted_indices_and_bucket_tensor(
   for (int64_t i = 0; i < num_indices; ++i) {
     auto global_bucket_id = _get_bucket_id(
         indices_data_ptr[i], hash_mode, bucket_size, total_num_buckets);
-    CHECK(global_bucket_id >= bucket_start && global_bucket_id < bucket_end)
-        << "indices: " << indices_data_ptr[i]
-        << " bucket id: " << global_bucket_id
-        << " must fall into the range between:" << bucket_start << " and "
-        << bucket_end;
+    TORCH_CHECK(
+        global_bucket_id >= bucket_start && global_bucket_id < bucket_end,
+        "indices: ",
+        indices_data_ptr[i],
+        " bucket id: ",
+        global_bucket_id,
+        " must fall into the range between:",
+        bucket_start,
+        " and ",
+        bucket_end);
     if (bucket_id_to_cnt.find(global_bucket_id) == bucket_id_to_cnt.end()) {
       bucket_id_to_cnt[global_bucket_id] = 0;
     }
diff --git a/fbgemm_gpu/src/ssd_split_embeddings_cache/embedding_rocksdb_wrapper.h b/fbgemm_gpu/src/ssd_split_embeddings_cache/embedding_rocksdb_wrapper.h
@@ -86,9 +86,14 @@ class EmbeddingRocksDBWrapper : public torch::jit::CustomClassHolder {
       int64_t start_id,
       int64_t end_id,
       int64_t id_offset,
-      c10::intrusive_ptr<EmbeddingSnapshotHandleWrapper> snapshot_handle) {
+      std::optional<c10::intrusive_ptr<EmbeddingSnapshotHandleWrapper>>
+          snapshot_handle) {
     return impl_->get_keys_in_range_by_snapshot(
-        start_id, end_id, id_offset, snapshot_handle->handle);
+        start_id,
+        end_id,
+        id_offset,
+        snapshot_handle.has_value() ? snapshot_handle.value()->handle
+                                    : nullptr);
   }
 
   void toggle_compaction(bool enable) {
diff --git a/fbgemm_gpu/src/ssd_split_embeddings_cache/kv_tensor_wrapper.h b/fbgemm_gpu/src/ssd_split_embeddings_cache/kv_tensor_wrapper.h
@@ -44,7 +44,7 @@ class KVTensorWrapper : public torch::jit::CustomClassHolder {
       int64_t dtype,
       int64_t row_offset,
       std::optional<c10::intrusive_ptr<EmbeddingSnapshotHandleWrapper>>
-          snapshot_handle);
+          snapshot_handle = std::nullopt);
 
   at::Tensor narrow(int64_t dim, int64_t start, int64_t length);
 
diff --git a/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h b/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h
@@ -547,7 +547,7 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
     }
 
     at::Tensor returned_keys = at::empty(
-        total_num, at::TensorOptions().device(at::kCPU).dtype(at::kLong));
+        {total_num, 1}, at::TensorOptions().device(at::kCPU).dtype(at::kLong));
     auto key_ptr = returned_keys.data_ptr<int64_t>();
     int64_t offset = 0;
     for (const auto& keys : keys_in_db_shards) {
diff --git a/fbgemm_gpu/test/tbe/ssd/ssd_l2_cache_test.py b/fbgemm_gpu/test/tbe/ssd/ssd_l2_cache_test.py
@@ -269,7 +269,6 @@ def test_rocksdb_get_discrete_ids(
         mixed: bool,
         weights_precision: SparseType,
     ) -> None:
-        weights_precision: SparseType = SparseType.FP32
         emb, Es, Ds, max_D = self.generate_fbgemm_ssd_tbe(
             T, D, log_E, weights_precision, mixed, False, 8
         )
@@ -306,7 +305,7 @@ def test_rocksdb_get_discrete_ids(
             start_id + offset, end_id + offset, offset, snapshot
         )
         ids_in_range_ordered, _ = torch.sort(ids_in_range)
-        id_tensor_ordered, _ = torch.sort(id_tensor)
+        id_tensor_ordered, _ = torch.sort(id_tensor.view(-1))
 
         assert torch.equal(ids_in_range_ordered, id_tensor_ordered)
 
@@ -377,7 +376,8 @@ def test_get_bucket_sorted_indices(
         else:
             # test failure
             with self.assertRaisesRegex(
-                RuntimeError, "only support hash by mod and interleaved for now"
+                RuntimeError,
+                "only support hash by chunk-based or interleaved-based hashing for now",
             ):
                 torch.ops.fbgemm.get_bucket_sorted_indices_and_bucket_tensor(
                     indices,
@@ -400,7 +400,7 @@ def test_get_bucket_sorted_indices(
             last_bucket_id = cur_bucket_id
         # Calculate expected tensor output
         expected_bucket_tensor = torch.zeros(
-            bucket_end - bucket_start, 1, dtype=torch.int64
+            bucket_end - bucket_start, dtype=torch.int64
         )
         for index in indices:
             self.assertTrue(hash_mode >= 0 and hash_mode <= 1)
@@ -412,4 +412,4 @@ def test_get_bucket_sorted_indices(
             expected_bucket_tensor[bucket_id - bucket_start] += 1
 
         # Compare actual and expected tensor outputs
-        self.assertTrue(torch.equal(bucket_t, expected_bucket_tensor))
+        self.assertTrue(torch.equal(bucket_t.view(-1), expected_bucket_tensor))

Original file line number	Diff line number	Diff line change
`@@ -547,7 +547,7 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {`
`547`	`547`	`}`
`548`	`548`
`549`	`549`	`at::Tensor returned_keys = at::empty(`
`550`		`- total_num, at::TensorOptions().device(at::kCPU).dtype(at::kLong));`
	`550`	`+ {total_num, 1}, at::TensorOptions().device(at::kCPU).dtype(at::kLong));`
`551`	`551`	`auto key_ptr = returned_keys.data_ptr<int64_t>();`
`552`	`552`	`int64_t offset = 0;`
`553`	`553`	`for (const auto& keys : keys_in_db_shards) {`