kromic
/

sd-crosswalk-augmentation

stable-diffusion

data-augmentation

crosswalk-segmentation

Model card Files Files and versions

sd-crosswalk-augmentation / generate.py

kromic's picture

Upload 5 files

296901d verified about 1 month ago

1.58 kB

	import torch
	from diffusers import StableDiffusionPipeline, UNet2DConditionModel, AutoencoderKL, DDPMScheduler
	from transformers import CLIPTextModel, CLIPImageProcessor, AutoTokenizer

	# Load the fine-tuned models
	vae = AutoencoderKL.from_pretrained("./Model/finetuned_vae_v1_150_epoch_9")
	unet = UNet2DConditionModel.from_pretrained("./Model/finetuned_crosswalk_model_v1_150_epoch_9")

	scheduler = DDPMScheduler.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="scheduler")

	# Load the CLIP text encoder, tokenizer, and feature extractor
	tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
	text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
	feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14")

	# Load the fine-tuned Stable Diffusion pipeline
	pipeline = StableDiffusionPipeline(
	vae=vae,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	unet=unet,
	scheduler=scheduler,
	feature_extractor=feature_extractor,
	safety_checker=None,
	)

	# Move the pipeline to GPU (if available)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print ("Working with: ",device)
	pipeline.to(device)

	# Generate an image from a text prompt
	prompt = "a crosswalk image" # Customize your prompt here
	with torch.amp.autocast('cuda'):
	image = pipeline(prompt, num_inference_steps=50, guidance_scale=9).images[0]

	# Save or show the generated image
	image.resize((640,360)).save("output.png")
	image.resize((640,360)).show()