Skip to content

Commit 25090b0

Browse files
0.23.2 release - fix export for nemo models
1 parent 5c9390c commit 25090b0

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

modelopt/torch/export/layer_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ def build_stacked_experts(
957957
"""Builds the experts_weight_1 and experts_weight_2 configs for the experts."""
958958
# Resmooth all experts
959959
preprocess_linear_fusion(
960-
[expert_getter(experts, i, linear_names[2]) for i in range(num_experts)],
960+
[expert_getter(experts, i, linear_names[1]) for i in range(num_experts)],
961961
resmooth_only=True,
962962
)
963963

modelopt/torch/export/tensorrt_llm_utils.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,9 @@ def convert_to_tensorrt_llm_config(
415415
layernorm_type_map = {i.name: i.value for i in LayerNormType}
416416
layernorm_position_map = {i.name: i.value for i in LayerNormPositionType}
417417

418-
if decoder_type == "mpt":
418+
if decoder_type in ["gpt", "gemma", "llama"]:
419+
pass
420+
elif decoder_type == "mpt":
419421
config.update(
420422
{
421423
"clip_qkv": first_attention_config.clip_qkv,
@@ -506,7 +508,7 @@ def convert_to_tensorrt_llm_config(
506508
config["vision_output_dim"] = vision_output_dim if vision_output_dim != 0 else 7680
507509
else:
508510
raise NotImplementedError(
509-
f"Cannot export tensorrt_llm checkpoint for model {config_architecture}. "
511+
f"Cannot export tensorrt_llm checkpoint for model {decoder_type}: {config_architecture}. "
510512
"It's not supported by TensorRT-LLM yet."
511513
)
512514

0 commit comments

Comments
 (0)