From 8a963fc10ee005b3f425d14bcd427eef5c57f157 Mon Sep 17 00:00:00 2001 From: David Friehs Date: Sun, 7 Jun 2026 21:41:39 +0200 Subject: [PATCH] convert : fix conversion for Mistral-Medium-3.5-128B (#24268) Mistral explicitly sets `moe` and `llama_4_scaling` to `null` in params.json, breaking `key in dict` checks during conversion. Replace with `dict.get(key) is not None` where this matters. Fixes `convert-hf-to-gguf.py --mistral-format Mistral-Medium-3.5-128B` --- conversion/mistral.py | 5 +++-- convert_hf_to_gguf.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/conversion/mistral.py b/conversion/mistral.py index 7a7d6e0393..aec22ca387 100644 --- a/conversion/mistral.py +++ b/conversion/mistral.py @@ -105,8 +105,9 @@ class MistralModel(LlamaModel): gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim) gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"]) - if "llama_4_scaling" in hparams: - gguf_writer.add_attn_temperature_scale(hparams["llama_4_scaling"]["beta"]) + llama_4_scaling = hparams.get("llama_4_scaling") + if llama_4_scaling is not None: + gguf_writer.add_attn_temperature_scale(llama_4_scaling["beta"]) class MistralMoeModel(DeepseekV2Model): diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index cd19eebdfa..a6192c039a 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -238,7 +238,7 @@ def main() -> None: assert hparams.get("vision_encoder") is not None, "This model does not support multimodal" from conversion.pixtral import PixtralModel model_class = PixtralModel - elif "moe" in hparams: + elif hparams.get("moe") is not None: from conversion.mistral import MistralMoeModel model_class = MistralMoeModel else: