microsoft
/

Phi-4-multimodal-instruct

@@ -1932,6 +1932,15 @@ class Phi4MMModel(Phi4MMPreTrainedModel):
                 )
         return causal_mask
 class Phi4MMForCausalLM(Phi4MMPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
@@ -2134,6 +2143,9 @@ class Phi4MMForCausalLM(Phi4MMPreTrainedModel, GenerationMixin):
         hidden_states = outputs[0]
         # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
         logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :])
         loss = None

                 )
         return causal_mask
+    def prepare_inputs_for_generation():
+        """
+        Placeholder for the `prepare_inputs_for_generation` method.
+        This function is part of the `GenerationMixin` and is added to the `Phi4MMModel`
+        class to prevent the model from breaking due to the AttributeError.
+        """
+        pass
 class Phi4MMForCausalLM(Phi4MMPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
         hidden_states = outputs[0]
         # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
+        if num_logits_to_keep is None:
+            num_logits_to_keep = hidden_states.size(1)
         logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :])
         loss = None

processing_phi4mm.py CHANGED Viewed

@@ -506,7 +506,7 @@ class Phi4MMProcessor(ProcessorMixin):
     image_processor_class = "AutoImageProcessor"  # Phi4MMImageProcessor will be registered later
     audio_processor_class = "AutoFeatureExtractor"  # Phi4MMAudioFeatureExtractor will be registered later
-    def __init__(self, image_processor, audio_processor, tokenizer):
         self.image_processor = image_processor
         self.audio_processor = audio_processor
         self.tokenizer = tokenizer

     image_processor_class = "AutoImageProcessor"  # Phi4MMImageProcessor will be registered later
     audio_processor_class = "AutoFeatureExtractor"  # Phi4MMAudioFeatureExtractor will be registered later
+    def __init__(self, image_processor, audio_processor, tokenizer, **kwargs):
         self.image_processor = image_processor
         self.audio_processor = audio_processor
         self.tokenizer = tokenizer