Skip to content

Commit 1b98450

Browse files
committed
Allocate correctly
1 parent 4df8edc commit 1b98450

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

sharktank/sharktank/examples/export_paged_llm_v1.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,7 @@ def size_per_device(
163163

164164
def setup_cache(model, shard_count):
165165
if model.config.kv_cache_type == "paged":
166-
cache_state = model.cache.allocate(
167-
page_count=hp.context_length // llama_config.block_seq_stride
168-
)
166+
cache_state = model.cache.allocate(page_count=args.device_block_count)
169167
page_dim = torch.export.Dim("page", max=args.device_block_count)
170168

171169
pipeline_parallelism_size = len(cache_state)

0 commit comments

Comments
 (0)