@@ -852,25 +852,17 @@ def prepare_fsdp(model, accelerator, evaluation_mode: bool = True):
852852 return model
853853
854854
855- def patch_vllm_moe_model_weight_loader (model ):
856- """
857- Patch vLLM MoE model to add weight_loader attribute to expert weights.
855+ _moe_model_registry_cache = None
858856
859- This is a workaround for a bug in vLLM 0.8.2 where MoE weights (w13_weight, w2_weight)
860- don't have the weight_loader attribute, causing AttributeError during weight loading.
861- Code adapted from verl/verl/utils/vllm/patch.py
862857
863- Args:
864- model: The vLLM model to patch.
865- """
866- import importlib
858+ def _get_moe_model_registry ():
867859
868- # Check if already patched (idempotent)
869- if getattr (model , '_swift_moe_weight_loader_patched' , False ):
870- return
860+ global _moe_model_registry_cache
861+ if _moe_model_registry_cache is not None :
862+ return _moe_model_registry_cache
863+
864+ import importlib
871865
872- # MoE model configurations: (module_path, class_names, mlp_attr)
873- # mlp_attr specifies the attribute name for the MoE layer in each model
874866 moe_model_configs = [
875867 ('vllm.model_executor.models.deepseek_v2' , ('DeepseekV2ForCausalLM' , 'DeepseekV3ForCausalLM' ), 'mlp' ),
876868 ('vllm.model_executor.models.mixtral' , ('MixtralForCausalLM' , ), 'block_sparse_moe' ),
@@ -881,7 +873,6 @@ def patch_vllm_moe_model_weight_loader(model):
881873 ('vllm.model_executor.models.kimi_vl' , ('KimiVLForConditionalGeneration' , ), 'mlp' ),
882874 ]
883875
884- # Build supported models list and MLP attribute mapping
885876 supported_moe_models = []
886877 mlp_attr_mapping = {}
887878
@@ -893,10 +884,32 @@ def patch_vllm_moe_model_weight_loader(model):
893884 model_class = getattr (module , class_name )
894885 supported_moe_models .append (model_class )
895886 mlp_attr_mapping [model_class ] = mlp_attr
896- except (ImportError , AttributeError ):
887+ except (ImportError , AttributeError , RuntimeError ):
897888 pass
898889
899- # Early return if no MoE models are supported
890+ _moe_model_registry_cache = (supported_moe_models , mlp_attr_mapping )
891+ return _moe_model_registry_cache
892+
893+
894+ def patch_vllm_moe_model_weight_loader (model ):
895+ """
896+ Patch vLLM MoE model to add weight_loader attribute to expert weights.
897+
898+ This is a workaround for a bug in vLLM 0.8.2 where MoE weights (w13_weight, w2_weight)
899+ don't have the weight_loader attribute, causing AttributeError during weight loading.
900+ Code adapted from verl/verl/utils/vllm/patch.py
901+
902+ Args:
903+ model: The vLLM model to patch.
904+ """
905+ # Check if already patched (idempotent).
906+ # Note: the flag can be lost when vLLM sleep/wake_up recreates the model
907+ # object, so the expensive import step is cached in _get_moe_model_registry.
908+ if getattr (model , '_swift_moe_weight_loader_patched' , False ):
909+ return
910+
911+ supported_moe_models , mlp_attr_mapping = _get_moe_model_registry ()
912+
900913 if not supported_moe_models :
901914 return
902915
0 commit comments