mehmetkeremturkcan commited on
Commit
c9c78e3
·
verified ·
1 Parent(s): 63f9fe5

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +99 -0
README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - Lightricks/LTX-Video
4
+ library_name: diffusers
5
+ datasets:
6
+ - nexar-ai/nexar_collision_prediction
7
+ ---
8
+
9
+
10
+ # Traffic World Models (LTX-Video, i2v)
11
+
12
+ This repository hosts the fine-tuned LTX-Video image-to-video (i2v) diffusion model specialized for generating realistic driving footage.
13
+
14
+ ## Model Details
15
+
16
+ - **Base Model**: LTX-Video
17
+ - **Resolution**: 768×512 pixels (Adjustable)
18
+ - **Frame Length**: 49 frames per generated video (Adjustable)
19
+ - **Fine-tuning Method**: Low-Rank Adaptation (LoRA)
20
+ - **Data Source**: Nexar Collision Prediction dataset videos (1,500 clips)
21
+
22
+ ## Usage Example
23
+
24
+ ```python
25
+ import os
26
+ import argparse
27
+ import torch
28
+ from diffusers.utils import export_to_video, load_image
29
+ from stg_ltx_i2v_pipeline import LTXImageToVideoSTGPipeline
30
+
31
+ def generate_video_from_image(
32
+ image_path,
33
+ prompt,
34
+ output_dir="outputs",
35
+ width=768,
36
+ height=512,
37
+ num_frames=49,
38
+ lora_path="mehmetkeremturkcan/DashcamCrashModels-LTX-I2V",
39
+ lora_weight=1.0,
40
+ prefix="crashmodel, ",
41
+ negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
42
+ stg_mode="STG-A",
43
+ stg_applied_layers_idx=[19],
44
+ stg_scale=1.0,
45
+ do_rescaling=True
46
+ ):
47
+ # Create output directory if it doesn't exist
48
+ if not os.path.exists(output_dir):
49
+ os.makedirs(output_dir)
50
+ # Load the model
51
+ pipe = LTXImageToVideoSTGPipeline.from_pretrained(
52
+ "a-r-r-o-w/LTX-Video-0.9.1-diffusers",
53
+ torch_dtype=torch.bfloat16,
54
+ local_files_only=False
55
+ )
56
+ # Apply LoRA weights
57
+ pipe.load_lora_weights(
58
+ lora_path,
59
+ weight_name="pytorch_lora_weights.safetensors",
60
+ adapter_name="suturing"
61
+ )
62
+ pipe.set_adapters("suturing", lora_weight)
63
+ pipe.to("cuda")
64
+ # Prepare the image and prompt
65
+ image = load_image(image_path).resize((width, height))
66
+ full_prompt = prefix + prompt if prefix else prompt
67
+ # Generate output filename
68
+ basename = os.path.basename(image_path).split('.')[0]
69
+ output_filename = f"{basename}_i2v.mp4"
70
+ output_path = os.path.join(output_dir, output_filename)
71
+ # Generate the video
72
+ print(f"Generating video with prompt: {full_prompt}")
73
+ video = pipe(
74
+ image=image,
75
+ prompt=full_prompt,
76
+ negative_prompt=negative_prompt,
77
+ width=width,
78
+ height=height,
79
+ num_frames=num_frames,
80
+ num_inference_steps=50,
81
+ decode_timestep=0.03,
82
+ decode_noise_scale=0.025,
83
+ generator=None,
84
+ stg_mode=stg_mode,
85
+ stg_applied_layers_idx=stg_applied_layers_idx,
86
+ stg_scale=stg_scale,
87
+ do_rescaling=do_rescaling
88
+ ).frames[0]
89
+
90
+ # Export the video
91
+ export_to_video(video, output_path, fps=24)
92
+ print(f"Video saved to: {output_path}")
93
+ return output_path
94
+
95
+ generate_video_from_image(
96
+ image_path="../suturing_datasetv2/images/9_railroad_final_8487-8570_NeedleWithdrawalNonIdeal.png",
97
+ prompt="A needlewithdrawalnonideal clip, generated from a backhand task."
98
+ )
99
+ ```