Merge branch 'main' into release/4.1

Jintao-Huang · Jintao-Huang · commit 4738d9e3dced · 2026-04-07T15:06:01.000+08:00
diff --git a/swift/rlhf_trainers/grpo_trainer.py b/swift/rlhf_trainers/grpo_trainer.py
@@ -2627,7 +2627,9 @@ def _prepare_model_inputs(self, inputs: 'DataType') -> Dict[str, Any]:
             k: v
             for k, v in inputs.items() if k not in [
                 'logits_to_keep', 'completion_mask', 'ref_per_token_logps', 'advantages', 'old_per_token_logps',
-                'truncated_mask', 'seq_lengths', 'num_items_in_batch', 'rollout_per_token_logps'
+                'truncated_mask', 'seq_lengths', 'num_items_in_batch', 'rollout_per_token_logps', 'rollout_logprobs',
+                'is_truncated', 'add_eos', 'response_token_ids', 'prompt_id', 'rollout_is_weights', 'finish_reason',
+                'request_id'
             ]
         }
 
diff --git a/swift/rlhf_trainers/utils.py b/swift/rlhf_trainers/utils.py
@@ -852,25 +852,17 @@ def prepare_fsdp(model, accelerator, evaluation_mode: bool = True):
     return model
 
 
-def patch_vllm_moe_model_weight_loader(model):
-    """
-    Patch vLLM MoE model to add weight_loader attribute to expert weights.
+_moe_model_registry_cache = None
 
-    This is a workaround for a bug in vLLM 0.8.2 where MoE weights (w13_weight, w2_weight)
-    don't have the weight_loader attribute, causing AttributeError during weight loading.
-    Code adapted from verl/verl/utils/vllm/patch.py
 
-    Args:
-        model: The vLLM model to patch.
-    """
-    import importlib
+def _get_moe_model_registry():
 
-    # Check if already patched (idempotent)
-    if getattr(model, '_swift_moe_weight_loader_patched', False):
-        return
+    global _moe_model_registry_cache
+    if _moe_model_registry_cache is not None:
+        return _moe_model_registry_cache
+
+    import importlib
 
-    # MoE model configurations: (module_path, class_names, mlp_attr)
-    # mlp_attr specifies the attribute name for the MoE layer in each model
     moe_model_configs = [
         ('vllm.model_executor.models.deepseek_v2', ('DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM'), 'mlp'),
         ('vllm.model_executor.models.mixtral', ('MixtralForCausalLM', ), 'block_sparse_moe'),
@@ -881,7 +873,6 @@ def patch_vllm_moe_model_weight_loader(model):
         ('vllm.model_executor.models.kimi_vl', ('KimiVLForConditionalGeneration', ), 'mlp'),
     ]
 
-    # Build supported models list and MLP attribute mapping
     supported_moe_models = []
     mlp_attr_mapping = {}
 
@@ -893,10 +884,32 @@ def patch_vllm_moe_model_weight_loader(model):
                     model_class = getattr(module, class_name)
                     supported_moe_models.append(model_class)
                     mlp_attr_mapping[model_class] = mlp_attr
-        except (ImportError, AttributeError):
+        except (ImportError, AttributeError, RuntimeError):
             pass
 
-    # Early return if no MoE models are supported
+    _moe_model_registry_cache = (supported_moe_models, mlp_attr_mapping)
+    return _moe_model_registry_cache
+
+
+def patch_vllm_moe_model_weight_loader(model):
+    """
+    Patch vLLM MoE model to add weight_loader attribute to expert weights.
+
+    This is a workaround for a bug in vLLM 0.8.2 where MoE weights (w13_weight, w2_weight)
+    don't have the weight_loader attribute, causing AttributeError during weight loading.
+    Code adapted from verl/verl/utils/vllm/patch.py
+
+    Args:
+        model: The vLLM model to patch.
+    """
+    # Check if already patched (idempotent).
+    # Note: the flag can be lost when vLLM sleep/wake_up recreates the model
+    # object, so the expensive import step is cached in _get_moe_model_registry.
+    if getattr(model, '_swift_moe_weight_loader_patched', False):
+        return
+
+    supported_moe_models, mlp_attr_mapping = _get_moe_model_registry()
+
     if not supported_moe_models:
         return
 
diff --git a/swift/template/templates/gemma.py b/swift/template/templates/gemma.py
@@ -242,7 +242,7 @@ def _data_collator_mm_data(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]:
 register_template(GemmaTemplateMeta(MLLMTemplateType.gemma3n, template_cls=Gemma3nTemplate))
 
 
-class Gemma4Template(Gemma3Template):
+class Gemma4Template(Template):
     placeholder_tokens = ['<|image|>', '<|audio|>', '<|video|>']
 
     def replace_tag(self, media_type: Literal['image', 'video', 'audio'], index: int,
@@ -267,19 +267,23 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
             return_tensors='pt',
             add_special_tokens=False,
         )
-        splited_tokens = iter(self._split_list(media_inputs['input_ids'][0].tolist(), split_token))
+        splited_tokens = self._split_list(media_inputs['input_ids'][0].tolist(), split_token)
         media_inputs.pop('input_ids')
         media_inputs.pop('attention_mask')
         input_ids = encoded['input_ids']
         labels = encoded['labels']
         loss_scale = encoded.get('loss_scale', None)
 
-        def _get_new_tokens(i):
-            return next(splited_tokens)
-
         idx_list = []
         for key in ['image', 'video', 'audio']:
             idx_list += findall(input_ids, getattr(self.config, f'{key}_token_id'))
+        sorted_order = sorted(range(len(idx_list)), key=lambda i: idx_list[i])
+        idx_list = [idx_list[i] for i in sorted_order]
+        splited_tokens = [splited_tokens[i] for i in sorted_order]
+
+        def _get_new_tokens(i):
+            return splited_tokens[i]
+
         if idx_list:
             input_ids, labels, loss_scale = self._extend_tokens(input_ids, labels, loss_scale, idx_list,
                                                                 _get_new_tokens)

Original file line number	Diff line number	Diff line change
`@@ -2627,7 +2627,9 @@ def _prepare_model_inputs(self, inputs: 'DataType') -> Dict[str, Any]:`
`2627`	`2627`	`k: v`
`2628`	`2628`	`for k, v in inputs.items() if k not in [`
`2629`	`2629`	`'logits_to_keep', 'completion_mask', 'ref_per_token_logps', 'advantages', 'old_per_token_logps',`
`2630`		`- 'truncated_mask', 'seq_lengths', 'num_items_in_batch', 'rollout_per_token_logps'`
	`2630`	`+ 'truncated_mask', 'seq_lengths', 'num_items_in_batch', 'rollout_per_token_logps', 'rollout_logprobs',`
	`2631`	`+ 'is_truncated', 'add_eos', 'response_token_ids', 'prompt_id', 'rollout_is_weights', 'finish_reason',`
	`2632`	`+ 'request_id'`
`2631`	`2633`	`]`
`2632`	`2634`	`}`
`2633`	`2635`