Skip to content

Commit 58d5b3f

Browse files
a4lgIsotr0py
andauthored
[Model][Quantization] Restore MoE + GGUF models support (incl. Qwen3 MoE) by allowing Sideload Parameters (#30116)
Signed-off-by: Tsukasa OI <[email protected]> Co-authored-by: Isotr0py <[email protected]>
1 parent c2e1987 commit 58d5b3f

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

vllm/model_executor/layers/quantization/gguf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def get_quant_method(
8282
return UnquantizedEmbeddingMethod()
8383
return GGUFEmbeddingMethod(self)
8484
elif isinstance(layer, FusedMoE):
85+
# TODO: Select UnquantizedFusedMoEMethod on unquantized layers.
8586
return GGUFMoEMethod(self, layer.moe_config)
8687
return None
8788

vllm/model_executor/model_loader/gguf_loader.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections.abc import Generator
55

66
import gguf
7+
import regex as re
78
import torch
89
import torch.nn as nn
910
from huggingface_hub import hf_hub_download
@@ -94,6 +95,7 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
9495
hasattr(config, "vision_config") and config.vision_config is not None
9596
)
9697
gguf_to_hf_name_map = {}
98+
sideload_params: list[re.Pattern] = []
9799
# hack: ggufs have a different name than transformers
98100
if model_type == "cohere":
99101
model_type = "command-r"
@@ -118,6 +120,12 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
118120
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = (
119121
f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
120122
)
123+
sideload_params.append(
124+
re.compile(
125+
f"model\\.layers\\.{idx}"
126+
r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
127+
)
128+
)
121129
if model_type in ("qwen2_moe", "qwen3_moe"):
122130
model_type = model_type.replace("_", "")
123131
# GGUF layer map assumes that we will have a merged expert weights
@@ -132,6 +140,12 @@ def _get_gguf_weights_map(self, model_config: ModelConfig):
132140
gguf_to_hf_name_map[f"blk.{idx}.ffn_up_exps.weight"] = (
133141
f"model.layers.{idx}.mlp.experts.0.up_proj.weight"
134142
)
143+
sideload_params.append(
144+
re.compile(
145+
f"model\\.layers\\.{idx}"
146+
r"\.mlp\.experts\.[0-9]+\.(gate|up|down)_proj\.weight"
147+
)
148+
)
135149

136150
arch = None
137151
for key, value in gguf.MODEL_ARCH_NAMES.items():
@@ -241,7 +255,15 @@ def find_hf_name_in_tensor_map(hf_name: str) -> str | None:
241255
# Parameter not in manual overrides either
242256
unmapped_params.append(hf_name)
243257

244-
# All parameters must be mapped: both vision/projector and backbone
258+
# All parameters (except those initialized by other means) must be mapped:
259+
# both vision/projector and backbone
260+
if unmapped_params:
261+
unmapped_params = list(
262+
filter(
263+
lambda x: not any(re.fullmatch(p, x) for p in sideload_params),
264+
unmapped_params,
265+
)
266+
)
245267
if unmapped_params:
246268
raise RuntimeError(
247269
f"Failed to map GGUF parameters "

0 commit comments

Comments
 (0)