44from collections .abc import Generator
55
66import gguf
7+ import regex as re
78import torch
89import torch .nn as nn
910from huggingface_hub import hf_hub_download
@@ -94,6 +95,7 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
9495 hasattr (config , "vision_config" ) and config .vision_config is not None
9596 )
9697 gguf_to_hf_name_map = {}
98+ sideload_params : list [re .Pattern ] = []
9799 # hack: ggufs have a different name than transformers
98100 if model_type == "cohere" :
99101 model_type = "command-r"
@@ -118,6 +120,12 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
118120 gguf_to_hf_name_map [f"blk.{ idx } .ffn_up_exps.weight" ] = (
119121 f"model.layers.{ idx } .mlp.experts.0.up_proj.weight"
120122 )
123+ sideload_params .append (
124+ re .compile (
125+ f"model\\ .layers\\ .{ idx } "
126+ r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
127+ )
128+ )
121129 if model_type in ("qwen2_moe" , "qwen3_moe" ):
122130 model_type = model_type .replace ("_" , "" )
123131 # GGUF layer map assumes that we will have a merged expert weights
@@ -132,6 +140,12 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
132140 gguf_to_hf_name_map [f"blk.{ idx } .ffn_up_exps.weight" ] = (
133141 f"model.layers.{ idx } .mlp.experts.0.up_proj.weight"
134142 )
143+ sideload_params .append (
144+ re .compile (
145+ f"model\\ .layers\\ .{ idx } "
146+ r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
147+ )
148+ )
135149
136150 arch = None
137151 for key , value in gguf .MODEL_ARCH_NAMES .items ():
@@ -241,7 +255,15 @@ def find_hf_name_in_tensor_map(hf_name: str) -> str | None:
241255 # Parameter not in manual overrides either
242256 unmapped_params .append (hf_name )
243257
244- # All parameters must be mapped: both vision/projector and backbone
258+ # All parameters (except those initialized by other means) must be mapped:
259+ # both vision/projector and backbone
260+ if unmapped_params :
261+ unmapped_params = list (
262+ filter (
263+ lambda x : not any (re .fullmatch (p , x ) for p in sideload_params ),
264+ unmapped_params ,
265+ )
266+ )
245267 if unmapped_params :
246268 raise RuntimeError (
247269 f"Failed to map GGUF parameters "
0 commit comments