### Getting Ready

In [None]:
#!pip install datasets
#!pip uninstall -y diffusers
!git clone https://github.com/huggingface/diffusers.git
!pip install git+https://github.com/huggingface/diffusers.git
#!pip install --upgrade transformers accelerate safetensors torch torchvision

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
#Add trigger word to dataset and create the training paramters

import os
import json
from datasets import load_dataset
from accelerate.utils import write_basic_config
from huggingface_hub import create_repo, upload_folder

# --- 2. Configuration ---
# This is where you set all the important parameters for the training job.

# Model and Dataset Parameters
base_model_id = "runwayml/stable-diffusion-v1-5"
dataset_name = "iresidentevil/pepe_the_frog" # The original dataset
text_column = "prompt"
image_column = "image"
trigger_word = "pepe_style_frog" # The trigger word we decided on

# Training Parameters
output_dir = "/content/drive/MyDrive/pepe-lora-sdxl-turbo_2" # Where the trained LoRA will be saved
resolution = 512 # SDXL-Turbo works well at 512x512. Higher resolutions need more VRAM.
learning_rate = 1e-4
train_batch_size = 1 # Keep this at 1 for a small dataset to see each image.
gradient_accumulation_steps = 4
max_train_steps = 500 # A good starting point for a small dataset. Adjust as needed.
checkpointing_steps = 100 # Save a checkpoint every 100 steps.

# LoRA Specific Parameters
lora_rank = 16 # Rank (dimension) of the LoRA. 16 is a good balance.

# Hugging Face Hub Parameters
hf_hub_repo_id = "your-username/pepe-lora-sdxl-turbo" # Change to your Hub username and desired repo name
push_to_hub = True # Set to True to automatically upload your LoRA to the Hub


# --- 3. Prepare Dataset in "Image Folder" format ---
# This section now creates a local folder with images and a metadata.jsonl file,
# which is the format expected by the training script.

print("Loading original dataset...")
dataset = load_dataset(dataset_name, split="train")


image_folder_path = "/content/drive/MyDrive/pepe-data"
os.makedirs(image_folder_path, exist_ok=True)
print(f"Created directory for prepared data: {image_folder_path}")

metadata_file_path = os.path.join(image_folder_path, "metadata.jsonl")

with open(metadata_file_path, "w") as f:
 for i, example in enumerate(dataset):
 # Get image and caption
 image = example[image_column]
 caption = example[text_column]

 # Add the trigger word
 full_caption = f"{trigger_word} {caption}"

 # Save the image
 image_filename = f"image_{i}.png"
 image.save(os.path.join(image_folder_path, image_filename))

 # Write the metadata entry
 metadata_entry = {
 "file_name": image_filename,
 text_column: full_caption
 }
 f.write(json.dumps(metadata_entry) + "\n")

print(f"Dataset prepared and saved in 'image folder' format at: {image_folder_path}")


# --- 4. Set up the Training Command ---
# This command now points to our correctly formatted image folder.
write_basic_config()

command = [
 "accelerate", "launch",
 "train_text_to_image_lora.py",
 f"--pretrained_model_name_or_path={base_model_id}",
 f"--train_data_dir={image_folder_path}",
 f"--caption_column={text_column}",
 f"--image_column={image_column}",
 f"--dataloader_num_workers=8",
 f"--resolution={resolution}", "--center_crop", "--random_flip",
 f"--train_batch_size={train_batch_size}",
 f"--gradient_accumulation_steps={gradient_accumulation_steps}",
 f"--max_train_steps={max_train_steps}",
 f"--learning_rate={learning_rate}",
 "--lr_scheduler=constant",
 "--lr_warmup_steps=0",
 f"--output_dir={output_dir}",
 f"--rank={lora_rank}",
 f"--validation_prompt='{trigger_word} a sad frog in a blue hoodie, cartoon style'",
 f"--checkpointing_steps={checkpointing_steps}",
 "--checkpoints_total_limit=3",
]

if push_to_hub:
 command.extend([f"--push_to_hub", f"--hub_model_id={hf_hub_repo_id}"])

training_command_str = " ".join(command)


# --- 5. Execute the Training ---
print("\n" + "="*80)
print(" TRAINING COMMAND")
print("="*80)
print("The following command will be executed in your terminal:")
print(training_command_str)
print("\n" + "="*80)
print("To start training, copy the command above and paste it into your terminal.")
print("Make sure you are in the correct environment where the diffusers examples are located.")
print("You may need to clone the diffusers repo first: git clone https://github.com/huggingface/diffusers.git")
print("CORRECTED PATH: Then navigate to: cd diffusers/examples/text_to_image")
print("="*80)



In [None]:
import os
import sys
import datasets
import diffusers
import huggingface_hub
import requests
import torch
from dotenv import load_dotenv
from huggingface_hub import HfApi
from IPython.display import display

We'll print out version number of the critical packages, to help with future reproducibility.

In [None]:
print("Platform:", sys.platform)
print("Python version:", sys.version)
print("---")
print("datasets version: ", datasets.__version__)
print("diffusers version: ", diffusers.__version__)
print("huggingface_hub version: ", huggingface_hub.__version__)
print("torch version:", torch.__version__)

Let's check if a GPU is available. If not, this notebook will take a long time to run!

In [None]:
if torch.cuda.is_available():
 device = "cuda"
 dtype = torch.float16
else:
 device = "cpu"
 dtype = torch.float32

print(f"Using {device} device with {dtype} data type.")

### Load Stable Diffusion

In [None]:

MODEL_NAME = "runwayml/stable-diffusion-v1-5"

pipeline = diffusers.AutoPipelineForText2Image.from_pretrained(
 MODEL_NAME, torch_dtype=dtype
)
pipeline.to(device)

print(type(pipeline))

Test base Model

In [None]:
images = pipeline(["pepe the frog rolling eyes"]*1).images

for im in images:
 display(im)

In [None]:
#DATASET_NAME = "worldquant-university/maya-dataset-v1"
DATASET_NAME= "iresidentevil/pepe_the_frog"
data_builder = datasets.load_dataset_builder(DATASET_NAME)

print(data_builder.dataset_name)

In [None]:
print(data_builder.info.features)

In [None]:
print(data_builder.info.splits)

In [None]:
data = datasets.load_dataset(DATASET_NAME)

print(data)

In [None]:
data["train"]["image"]

In [None]:
# The values are PIL images, so they will be displayed
# automatically by Jupyter.
data["train"]["image"][3]

In [None]:
# Use dictionary indexing to look up the text values.
data["train"]["prompt"]

### LoRA Fine-tuning

In [None]:
%cd diffusers/examples/text_to_image

!accelerate launch train_text_to_image_lora.py \
 --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \
 --train_data_dir=image_folder_path \
 --caption_column="prompt" \
 --image_column="image" \
 --resolution=512 --center_crop --random_flip \
 --train_batch_size=1 \
 --gradient_accumulation_steps=4 \
 --max_train_steps=2000 \
 --learning_rate=1e-4 \
 --lr_scheduler="cosine" \
 --lr_warmup_steps=0 \
 --output_dir=output_dir \
 --rank=16 \
 --validation_prompt="pepe_style_frog, a high-quality, detailed image of pepe the frog smiling and holding a cup of coffee at sunrise" \
 --seed=42 \
 --mixed_precision="fp16" \
 --checkpointing_steps=150

### Load LoRA Weights

In [None]:
pipeline.load_lora_weights(
 output_dir,


 weight_name="pytorch_lora_weights.safetensors",
)
pipeline.safety_checker = None

In [None]:
images = pipeline(["pepe_style_frog making fun of rabbit that racing a tortile"]).images

for im in images:
 display(im)