Allocate correctly

Groverkss · Groverkss · commit 1b9845018bae · 2025-07-21T10:34:44.000+01:00
diff --git a/sharktank/sharktank/examples/export_paged_llm_v1.py b/sharktank/sharktank/examples/export_paged_llm_v1.py
@@ -163,9 +163,7 @@ def size_per_device(
 
     def setup_cache(model, shard_count):
         if model.config.kv_cache_type == "paged":
-            cache_state = model.cache.allocate(
-                page_count=hp.context_length // llama_config.block_seq_stride
-            )
+            cache_state = model.cache.allocate(page_count=args.device_block_count)
             page_dim = torch.export.Dim("page", max=args.device_block_count)
 
             pipeline_parallelism_size = len(cache_state)