File size: 3,157 Bytes
c9c78e3 3776353 c9c78e3 3776353 c9c78e3 3776353 c9c78e3 9e86115 c9c78e3 9e86115 c9c78e3 bf75b65 c9c78e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
---
base_model:
- Lightricks/LTX-Video
library_name: diffusers
datasets:
- nexar-ai/nexar_collision_prediction
---
<p align="center">
<img src="https://keremturkcan.com/images/crashmodel.jpg" />
</p>
# Traffic World Models (LTX-Video, i2v)
This repository hosts the fine-tuned LTX-Video image-to-video (i2v) diffusion model specialized for generating realistic driving footage.
## Model Details
- **Base Model**: LTX-Video
- **Resolution**: 768×512 pixels (Adjustable)
- **Frame Length**: 49 frames per generated video (Adjustable, 121 recommended)
- **Fine-tuning Method**: Low-Rank Adaptation (LoRA)
- **Data Source**: Nexar Collision Prediction dataset videos (1,500 clips)
## Usage Example
```python
import os
import argparse
import torch
from diffusers.utils import export_to_video, load_image
from stg_ltx_i2v_pipeline import LTXImageToVideoSTGPipeline
def generate_video_from_image(
image_path,
prompt,
output_dir="outputs",
width=768,
height=512,
num_frames=121,
lora_path="mehmetkeremturkcan/DashcamCrashModels-LTX-I2V",
lora_weight=1.0,
prefix="crashmodel, ",
negative_prompt="worst quality, inconsistent motion, blurry, jittery, distorted",
stg_mode="STG-A",
stg_applied_layers_idx=[19],
stg_scale=1.0,
do_rescaling=True
):
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Load the model
pipe = LTXImageToVideoSTGPipeline.from_pretrained(
"a-r-r-o-w/LTX-Video-0.9.1-diffusers",
torch_dtype=torch.bfloat16,
local_files_only=False
)
# Apply LoRA weights
pipe.load_lora_weights(
lora_path,
weight_name="pytorch_lora_weights.safetensors",
adapter_name="crashmodel"
)
pipe.set_adapters("crashmodel", lora_weight)
pipe.to("cuda")
# Prepare the image and prompt
image = load_image(image_path).resize((width, height))
full_prompt = prefix + prompt if prefix else prompt
# Generate output filename
basename = os.path.basename(image_path).split('.')[0]
output_filename = f"{basename}_i2v.mp4"
output_path = os.path.join(output_dir, output_filename)
# Generate the video
print(f"Generating video with prompt: {full_prompt}")
video = pipe(
image=image,
prompt=full_prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
num_frames=num_frames,
num_inference_steps=50,
decode_timestep=0.03,
decode_noise_scale=0.025,
generator=None,
stg_mode=stg_mode,
stg_applied_layers_idx=stg_applied_layers_idx,
stg_scale=stg_scale,
do_rescaling=do_rescaling
).frames[0]
# Export the video
export_to_video(video, output_path, fps=24)
print(f"Video saved to: {output_path}")
return output_path
generate_video_from_image(
image_path="my_starting_image.png",
prompt="Dashcam footage of a near-miss in a Highway environment with Snow weather and Dark lighting conditions." # "near-miss" for crashes, "regular driving" for regular driving footage
)
``` |