Instructions to use linyq/kiwi-edit-5b-reference-only-diffusers with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use linyq/kiwi-edit-5b-reference-only-diffusers with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("linyq/kiwi-edit-5b-reference-only-diffusers", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| import torch | |
| import torch.nn as nn | |
| from diffusers import ModelMixin, ConfigMixin | |
| from diffusers.configuration_utils import register_to_config | |
| class ConditionalEmbedder(ModelMixin, ConfigMixin): | |
| """ | |
| Patchifies VAE-encoded conditions (source video or reference image) | |
| into the DiT hidden dimension space via a Conv3d layer. | |
| """ | |
| def __init__( | |
| self, | |
| in_dim: int = 48, | |
| dim: int = 3072, | |
| patch_size: list = [1, 2, 2], | |
| zero_init: bool = True, | |
| ref_pad_first: bool = False, | |
| ): | |
| super().__init__() | |
| kernel_size = tuple(patch_size) | |
| self.patch_embedding = nn.Conv3d( | |
| in_dim, dim, kernel_size=kernel_size, stride=kernel_size | |
| ) | |
| self.ref_pad_first = ref_pad_first | |
| if zero_init: | |
| nn.init.zeros_(self.patch_embedding.weight) | |
| nn.init.zeros_(self.patch_embedding.bias) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| return self.patch_embedding(x) | |