Create README.md
Browse files
README.md
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model:
|
| 3 |
+
- Lightricks/LTX-Video
|
| 4 |
+
library_name: diffusers
|
| 5 |
+
datasets:
|
| 6 |
+
- nexar-ai/nexar_collision_prediction
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# Traffic World Models (LTX-Video, i2v)
|
| 11 |
+
|
| 12 |
+
This repository hosts the fine-tuned LTX-Video image-to-video (i2v) diffusion model specialized for generating realistic driving footage.
|
| 13 |
+
|
| 14 |
+
## Model Details
|
| 15 |
+
|
| 16 |
+
- **Base Model**: LTX-Video
|
| 17 |
+
- **Resolution**: 768×512 pixels (Adjustable)
|
| 18 |
+
- **Frame Length**: 49 frames per generated video (Adjustable)
|
| 19 |
+
- **Fine-tuning Method**: Low-Rank Adaptation (LoRA)
|
| 20 |
+
- **Data Source**: Nexar Collision Prediction dataset videos (1,500 clips)
|
| 21 |
+
|
| 22 |
+
## Usage Example
|
| 23 |
+
|
| 24 |
+
```python
|
| 25 |
+
import os
|
| 26 |
+
import argparse
|
| 27 |
+
import torch
|
| 28 |
+
from diffusers.utils import export_to_video, load_image
|
| 29 |
+
from stg_ltx_i2v_pipeline import LTXImageToVideoSTGPipeline
|
| 30 |
+
|
| 31 |
+
def generate_video_from_image(
|
| 32 |
+
image_path,
|
| 33 |
+
prompt,
|
| 34 |
+
output_dir="outputs",
|
| 35 |
+
width=768,
|
| 36 |
+
height=512,
|
| 37 |
+
num_frames=49,
|
| 38 |
+
lora_path="mehmetkeremturkcan/DashcamCrashModels-LTX-I2V",
|
| 39 |
+
lora_weight=1.0,
|
| 40 |
+
prefix="crashmodel, ",
|
| 41 |
+
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
|
| 42 |
+
stg_mode="STG-A",
|
| 43 |
+
stg_applied_layers_idx=[19],
|
| 44 |
+
stg_scale=1.0,
|
| 45 |
+
do_rescaling=True
|
| 46 |
+
):
|
| 47 |
+
# Create output directory if it doesn't exist
|
| 48 |
+
if not os.path.exists(output_dir):
|
| 49 |
+
os.makedirs(output_dir)
|
| 50 |
+
# Load the model
|
| 51 |
+
pipe = LTXImageToVideoSTGPipeline.from_pretrained(
|
| 52 |
+
"a-r-r-o-w/LTX-Video-0.9.1-diffusers",
|
| 53 |
+
torch_dtype=torch.bfloat16,
|
| 54 |
+
local_files_only=False
|
| 55 |
+
)
|
| 56 |
+
# Apply LoRA weights
|
| 57 |
+
pipe.load_lora_weights(
|
| 58 |
+
lora_path,
|
| 59 |
+
weight_name="pytorch_lora_weights.safetensors",
|
| 60 |
+
adapter_name="suturing"
|
| 61 |
+
)
|
| 62 |
+
pipe.set_adapters("suturing", lora_weight)
|
| 63 |
+
pipe.to("cuda")
|
| 64 |
+
# Prepare the image and prompt
|
| 65 |
+
image = load_image(image_path).resize((width, height))
|
| 66 |
+
full_prompt = prefix + prompt if prefix else prompt
|
| 67 |
+
# Generate output filename
|
| 68 |
+
basename = os.path.basename(image_path).split('.')[0]
|
| 69 |
+
output_filename = f"{basename}_i2v.mp4"
|
| 70 |
+
output_path = os.path.join(output_dir, output_filename)
|
| 71 |
+
# Generate the video
|
| 72 |
+
print(f"Generating video with prompt: {full_prompt}")
|
| 73 |
+
video = pipe(
|
| 74 |
+
image=image,
|
| 75 |
+
prompt=full_prompt,
|
| 76 |
+
negative_prompt=negative_prompt,
|
| 77 |
+
width=width,
|
| 78 |
+
height=height,
|
| 79 |
+
num_frames=num_frames,
|
| 80 |
+
num_inference_steps=50,
|
| 81 |
+
decode_timestep=0.03,
|
| 82 |
+
decode_noise_scale=0.025,
|
| 83 |
+
generator=None,
|
| 84 |
+
stg_mode=stg_mode,
|
| 85 |
+
stg_applied_layers_idx=stg_applied_layers_idx,
|
| 86 |
+
stg_scale=stg_scale,
|
| 87 |
+
do_rescaling=do_rescaling
|
| 88 |
+
).frames[0]
|
| 89 |
+
|
| 90 |
+
# Export the video
|
| 91 |
+
export_to_video(video, output_path, fps=24)
|
| 92 |
+
print(f"Video saved to: {output_path}")
|
| 93 |
+
return output_path
|
| 94 |
+
|
| 95 |
+
generate_video_from_image(
|
| 96 |
+
image_path="../suturing_datasetv2/images/9_railroad_final_8487-8570_NeedleWithdrawalNonIdeal.png",
|
| 97 |
+
prompt="A needlewithdrawalnonideal clip, generated from a backhand task."
|
| 98 |
+
)
|
| 99 |
+
```
|