mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
[https://nvbugs/5419069][fix] Fix the mismatched layer name components. (#6417)
Signed-off-by: Yukun He <23156053+hyukn@users.noreply.github.com>
This commit is contained in:
parent
ac23f4a80d
commit
90856bf97d
@ -171,6 +171,30 @@ def smooth_qwen2_model(model, scales, alpha, qwen_qkv_para, qwen_smoother):
|
||||
scales[layer_name]["w"] = module.mlp.down_proj.weight.abs().max(
|
||||
dim=1)[0]
|
||||
|
||||
scales_keys_to_rename = [
|
||||
key for key in scales.keys() if 'language_model.' in key
|
||||
]
|
||||
|
||||
qwen_qkv_para_keys_to_rename = [
|
||||
key for key in qwen_qkv_para.keys() if 'language_model.' in key
|
||||
]
|
||||
|
||||
qwen_smoother_keys_to_rename = [
|
||||
key for key in qwen_smoother.keys() if 'language_model.' in key
|
||||
]
|
||||
|
||||
for key in scales_keys_to_rename:
|
||||
scales[key.replace('language_model.', '')] = scales[key]
|
||||
del scales[key]
|
||||
|
||||
for key in qwen_qkv_para_keys_to_rename:
|
||||
qwen_qkv_para[key.replace('language_model.', '')] = qwen_qkv_para[key]
|
||||
del qwen_qkv_para[key]
|
||||
|
||||
for key in qwen_smoother_keys_to_rename:
|
||||
qwen_smoother[key.replace('language_model.', '')] = qwen_smoother[key]
|
||||
del qwen_smoother[key]
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def capture_activation_range(model,
|
||||
|
||||
@ -427,7 +427,6 @@ examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float
|
||||
examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5141288)
|
||||
examples/test_qwen.py::test_llm_qwen_7b_int8_kv_1node_1gpus[qwen2_vl_7b_instruct-enable_gemm_plugin-enable_weight_only] SKIP (https://nvbugs/5419067)
|
||||
examples/test_qwen.py::test_llm_qwen_awq_single_gpu_summary[qwen2_vl_7b_instruct-nb:4] SKIP (https://nvbugs/5419068)
|
||||
examples/test_qwen.py::test_llm_qwen_smooth_quant_single_gpu_summary[qwen2_vl_7b_instruct-enable_ptpc-nb:4] SKIP (https://nvbugs/5419069)
|
||||
examples/test_recurrentgemma.py::test_llm_recurrentgemma_1gpu[use_cpp_session-recurrentgemma-2b-use_paged_cache-fp8-float16-enable_attn_plugin-enable_gemm_plugin] SKIP (https://nvbugs/5419070)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity] SKIP (https://nvbugs/5421989)
|
||||
examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-RobertaForSequenceClassification-bert/twitter-roberta-base-emotion] SKIP (https://nvbugs/5421989)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user