Qwen-Image EliGen Precise Region Control Model - E-commerce Poster
Model Introduction
This model is jointly developed and open-sourced by the DiffSynth-Studio team from ModelScope and the Taotian Experience Design team.
Built upon Qwen-Image, this model is specifically designed for e-commerce poster generation and supports precise regional layout control. Using a LoRA architecture, users can flexibly control the position and shape of each entity in the poster by providing text descriptions along with corresponding region masks. The model is trained using the DiffSynth-Studio framework and further fine-tuned on poster image data based on DiffSynth-Studio/Qwen-Image-EliGen-V2, significantly enhancing its ability to control poster layouts.
Result Demonstration
Inference Code
git clone https://github.com/modelscope/DiffSynth-Studio.git  
cd DiffSynth-Studio
pip install -e .
from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
import torch
from PIL import Image, ImageDraw, ImageFont
from modelscope import dataset_snapshot_download, snapshot_download
import random
def visualize_masks(image, masks, mask_prompts, output_path, font_size=35, use_random_colors=False):
    # Create a blank image for overlays
    overlay = Image.new('RGBA', image.size, (0, 0, 0, 0))
    colors = [
        (165, 238, 173, 80),
        (76, 102, 221, 80),
        (221, 160, 77, 80),
        (204, 93, 71, 80),
        (145, 187, 149, 80),
        (134, 141, 172, 80),
        (157, 137, 109, 80),
        (153, 104, 95, 80),
        (165, 238, 173, 80),
        (76, 102, 221, 80),
        (221, 160, 77, 80),
        (204, 93, 71, 80),
        (145, 187, 149, 80),
        (134, 141, 172, 80),
        (157, 137, 109, 80),
        (153, 104, 95, 80),
    ]
    # Generate random colors for each mask
    if use_random_colors:
        colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 80) for _ in range(len(masks))]
Font settings
try:
    font = ImageFont.truetype("wqy-zenhei.ttc", font_size)  # Adjust as needed
except IOError:
    font = ImageFont.load_default(font_size)
# Overlay each mask onto the overlay image
for mask, mask_prompt, color in zip(masks, mask_prompts, colors):
    # Convert mask to RGBA mode
    mask_rgba = mask.convert('RGBA')
    mask_data = mask_rgba.getdata()
    new_data = [(color if item[:3] == (255, 255, 255) else (0, 0, 0, 0)) for item in mask_data]
    mask_rgba.putdata(new_data)
    # Draw the mask prompt text on the mask
    draw = ImageDraw.Draw(mask_rgba)
    mask_bbox = mask.getbbox()  # Get the bounding box of the mask
    text_position = (mask_bbox[0] + 10, mask_bbox[1] + 10)  # Adjust text position based on mask position
    draw.text(text_position, mask_prompt, fill=(255, 255, 255, 255), font=font)
    # Alpha composite the overlay with this mask
    overlay = Image.alpha_composite(overlay, mask_rgba)
# Composite the overlay onto the original image
result = Image.alpha_composite(image.convert('RGBA'), overlay)
# Save or display the resulting image
result.save(output_path)
return result
def example(pipe, seeds, example_id, global_prompt, entity_prompts, height=784, width=1280): dataset_snapshot_download( dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern=f"data/examples/eligen/poster/example_{example_id}/*.png" ) masks = [ Image.open(f"./data/examples/eligen/poster/example_{example_id}/{i}.png").convert('RGB').resize((width, height)) for i in range(len(entity_prompts)) ] negative_prompt = "grid, regular grid, blur, low resolution, low quality, distortion, malformed, incorrect anatomy, deformed hands, deformed body, deformed face, deformed hair, deformed eyes, deformed mouth" for seed in seeds: # generate image image = pipe( prompt=global_prompt, cfg_scale=4.0, negative_prompt=negative_prompt, num_inference_steps=40, seed=seed, height=height, width=width, eligen_entity_prompts=entity_prompts, eligen_entity_masks=masks, ) image.save(f"eligen_poster_example_{example_id}{seed}.png") image = Image.new("RGB", (width, height), (0, 0, 0)) visualize_masks(image, masks, entity_prompts, f"eligen_poster_example{example_id}mask{seed}.png")
pipe = QwenImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
        ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
    ],
    tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
)
snapshot_download(
    "DiffSynth-Studio/Qwen-Image-EliGen-Poster",
    local_dir="models/DiffSynth-Studio/Qwen-Image-EliGen-Poster",
    allow_file_pattern="model.safetensors",
)
pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-EliGen-Poster/model.safetensors")
global_prompt = "A poster with a soft pink-purple background. On the left side, there is large pink-purple text reading \"Qwen-Image EliGen-Poster\", and inside a pink-purple oval frame, small white text reads: \"Image Generation Model with Precise Region Control\". On the right side, a little rabbit is unwrapping a gift, with a cartoon-style baby dragon standing beside it, wearing a mini firework launcher on its head. The background is dotted with some white clouds. The overall style is cute and cartoonish, conveying a festive and surprising theme."
entity_prompts = ["pink-purple text \"Qwen-Image EliGen-Poster\"", "small white text inside a pink-purple oval frame: \"Image Generation Model with Precise Region Control\"", "a little rabbit unwrapping a gift, with a cartoon-style baby dragon wearing a mini firework launcher standing beside it"]
seed = [42]
example(pipe, seed, 1, global_prompt, entity_prompts)
Citation
If you find our work helpful, please consider citing our paper:
@article{zhang2025eligen,
  title={Eligen: Entity-level controlled image generation with regional attention},
  author={Zhang, Hong and Duan, Zhongjie and Wang, Xingjun and Chen, Yingda and Zhang, Yu},
  journal={arXiv preprint arXiv:2501.01097},
  year={2025}
}







