We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4df8edc commit 1b98450Copy full SHA for 1b98450
sharktank/sharktank/examples/export_paged_llm_v1.py
@@ -163,9 +163,7 @@ def size_per_device(
163
164
def setup_cache(model, shard_count):
165
if model.config.kv_cache_type == "paged":
166
- cache_state = model.cache.allocate(
167
- page_count=hp.context_length // llama_config.block_seq_stride
168
- )
+ cache_state = model.cache.allocate(page_count=args.device_block_count)
169
page_dim = torch.export.Dim("page", max=args.device_block_count)
170
171
pipeline_parallelism_size = len(cache_state)
0 commit comments