kromic's picture
Upload 5 files
296901d verified
raw
history blame
1.58 kB
import torch
from diffusers import StableDiffusionPipeline, UNet2DConditionModel, AutoencoderKL, DDPMScheduler
from transformers import CLIPTextModel, CLIPImageProcessor, AutoTokenizer
# Load the fine-tuned models
vae = AutoencoderKL.from_pretrained("./Model/finetuned_vae_v1_150_epoch_9")
unet = UNet2DConditionModel.from_pretrained("./Model/finetuned_crosswalk_model_v1_150_epoch_9")
scheduler = DDPMScheduler.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="scheduler")
# Load the CLIP text encoder, tokenizer, and feature extractor
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")
# Load the fine-tuned Stable Diffusion pipeline
pipeline = StableDiffusionPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
feature_extractor=feature_extractor,
safety_checker=None,
)
# Move the pipeline to GPU (if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
print ("Working with: ",device)
pipeline.to(device)
# Generate an image from a text prompt
prompt = "a crosswalk image" # Customize your prompt here
with torch.amp.autocast('cuda'):
image = pipeline(prompt, num_inference_steps=50, guidance_scale=9).images[0]
# Save or show the generated image
image.resize((640,360)).save("output.png")
image.resize((640,360)).show()