turing-motors
/

Terra

autonomous driving

video generation

Model card Files Files and versions

koukyo1994 commited on Dec 10, 2024

Commit

29e517a

·

verified ·

1 Parent(s): 3d2e679

update llama_action model

Files changed (1) hide show

modeling_llama_action.py +4 -14

modeling_llama_action.py CHANGED Viewed

@@ -200,29 +200,19 @@ class LlamaActionForCausalLM(LlamaForCausalLM):
         past_key_values=None,
         attention_mask=None,
         use_cache=None,
-        show_progress=False,
-        prefix="",
-        total=0,
         **kwargs):
         batch_size = input_ids.size(0)
         seq_length = input_ids.size(1)
         n_frames = seq_length // self.num_image_patches
         attention_mask_length = n_frames * (self.num_image_patches + self.num_action_embeddings)
-        if show_progress:
-            if past_key_values is None or len(past_key_values) == 0:
-                pbar = tqdm(total=total - len(input_ids[0]), desc=prefix, leave=False)
-                postfix = f"Frame [{n_frames + 1}/{total // self.num_image_patches}]"
-                pbar.set_postfix_str(postfix)
-            else:
-                pbar.update()
         if seq_length % self.num_image_patches != 0:
             n_last_frame_tokens = seq_length % self.num_image_patches
             attention_mask_length += n_last_frame_tokens
-        else:
-            if show_progress:
-                postfix = f"Frame [{n_frames + 1}/{total // self.num_image_patches}]"
-                pbar.set_postfix_str(postfix)
         attention_mask = torch.ones((batch_size, attention_mask_length), device=input_ids.device, dtype=torch.long)
         # cut decoder_input_ids if past_key_values is used
         if past_key_values is not None and len(past_key_values) > 0:

         past_key_values=None,
         attention_mask=None,
         use_cache=None,
+        progress_bar=None,
         **kwargs):
         batch_size = input_ids.size(0)
         seq_length = input_ids.size(1)
         n_frames = seq_length // self.num_image_patches
         attention_mask_length = n_frames * (self.num_image_patches + self.num_action_embeddings)
+        if progress_bar is not None:
+            progress_bar.update()
         if seq_length % self.num_image_patches != 0:
             n_last_frame_tokens = seq_length % self.num_image_patches
             attention_mask_length += n_last_frame_tokens
         attention_mask = torch.ones((batch_size, attention_mask_length), device=input_ids.device, dtype=torch.long)
         # cut decoder_input_ids if past_key_values is used
         if past_key_values is not None and len(past_key_values) > 0: