--- license: apache-2.0 --- # Qwen-Image EliGen Precise Region Control Model - E-commerce Poster ![](./title_image.png) ## Model Introduction This model is jointly developed and open-sourced by the DiffSynth-Studio team from ModelScope and the Taotian Experience Design team. Built upon [Qwen-Image](https://www.modelscope.cn/models/Qwen/Qwen-Image), this model is specifically designed for e-commerce poster generation and supports precise regional layout control. Using a LoRA architecture, users can flexibly control the position and shape of each entity in the poster by providing text descriptions along with corresponding region masks. The model is trained using the [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) framework and further fine-tuned on poster image data based on [DiffSynth-Studio/Qwen-Image-EliGen-V2](https://www.modelscope.cn/models/DiffSynth-Studio/Qwen-Image-EliGen-V2), significantly enhancing its ability to control poster layouts. ## Result Demonstration |Entity Control Condition|Generated Image| |-|-| |![image1](./assets/1_mask.png)|![mask1](./assets/1.png)| |![image1](./assets/2_mask.png)|![mask1](./assets/2.png)| |![image1](./assets/3_mask.png)|![mask1](./assets/3.png)| ## Inference Code ``` git clone https://github.com/modelscope/DiffSynth-Studio.git cd DiffSynth-Studio pip install -e . ``` ```python from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig import torch from PIL import Image, ImageDraw, ImageFont from modelscope import dataset_snapshot_download, snapshot_download import random def visualize_masks(image, masks, mask_prompts, output_path, font_size=35, use_random_colors=False): # Create a blank image for overlays overlay = Image.new('RGBA', image.size, (0, 0, 0, 0)) colors = [ (165, 238, 173, 80), (76, 102, 221, 80), (221, 160, 77, 80), (204, 93, 71, 80), (145, 187, 149, 80), (134, 141, 172, 80), (157, 137, 109, 80), (153, 104, 95, 80), (165, 238, 173, 80), (76, 102, 221, 80), (221, 160, 77, 80), (204, 93, 71, 80), (145, 187, 149, 80), (134, 141, 172, 80), (157, 137, 109, 80), (153, 104, 95, 80), ] # Generate random colors for each mask if use_random_colors: colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), 80) for _ in range(len(masks))] ``` # Font settings try: font = ImageFont.truetype("wqy-zenhei.ttc", font_size) # Adjust as needed except IOError: font = ImageFont.load_default(font_size) # Overlay each mask onto the overlay image for mask, mask_prompt, color in zip(masks, mask_prompts, colors): # Convert mask to RGBA mode mask_rgba = mask.convert('RGBA') mask_data = mask_rgba.getdata() new_data = [(color if item[:3] == (255, 255, 255) else (0, 0, 0, 0)) for item in mask_data] mask_rgba.putdata(new_data) # Draw the mask prompt text on the mask draw = ImageDraw.Draw(mask_rgba) mask_bbox = mask.getbbox() # Get the bounding box of the mask text_position = (mask_bbox[0] + 10, mask_bbox[1] + 10) # Adjust text position based on mask position draw.text(text_position, mask_prompt, fill=(255, 255, 255, 255), font=font) # Alpha composite the overlay with this mask overlay = Image.alpha_composite(overlay, mask_rgba) # Composite the overlay onto the original image result = Image.alpha_composite(image.convert('RGBA'), overlay) # Save or display the resulting image result.save(output_path) return result def example(pipe, seeds, example_id, global_prompt, entity_prompts, height=784, width=1280): dataset_snapshot_download( dataset_id="DiffSynth-Studio/examples_in_diffsynth", local_dir="./", allow_file_pattern=f"data/examples/eligen/poster/example_{example_id}/*.png" ) masks = [ Image.open(f"./data/examples/eligen/poster/example_{example_id}/{i}.png").convert('RGB').resize((width, height)) for i in range(len(entity_prompts)) ] negative_prompt = "grid, regular grid, blur, low resolution, low quality, distortion, malformed, incorrect anatomy, deformed hands, deformed body, deformed face, deformed hair, deformed eyes, deformed mouth" for seed in seeds: # generate image image = pipe( prompt=global_prompt, cfg_scale=4.0, negative_prompt=negative_prompt, num_inference_steps=40, seed=seed, height=height, width=width, eligen_entity_prompts=entity_prompts, eligen_entity_masks=masks, ) image.save(f"eligen_poster_example_{example_id}_{seed}.png") image = Image.new("RGB", (width, height), (0, 0, 0)) visualize_masks(image, masks, entity_prompts, f"eligen_poster_example_{example_id}_mask_{seed}.png") ```python pipe = QwenImagePipeline.from_pretrained( torch_dtype=torch.bfloat16, device="cuda", model_configs=[ ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"), ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"), ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"), ], tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"), ) snapshot_download( "DiffSynth-Studio/Qwen-Image-EliGen-Poster", local_dir="models/DiffSynth-Studio/Qwen-Image-EliGen-Poster", allow_file_pattern="model.safetensors", ) pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-EliGen-Poster/model.safetensors") global_prompt = "A poster with a soft pink-purple background. On the left side, there is large pink-purple text reading \"Qwen-Image EliGen-Poster\", and inside a pink-purple oval frame, small white text reads: \"Image Generation Model with Precise Region Control\". On the right side, a little rabbit is unwrapping a gift, with a cartoon-style baby dragon standing beside it, wearing a mini firework launcher on its head. The background is dotted with some white clouds. The overall style is cute and cartoonish, conveying a festive and surprising theme." entity_prompts = ["pink-purple text \"Qwen-Image EliGen-Poster\"", "small white text inside a pink-purple oval frame: \"Image Generation Model with Precise Region Control\"", "a little rabbit unwrapping a gift, with a cartoon-style baby dragon wearing a mini firework launcher standing beside it"] seed = [42] example(pipe, seed, 1, global_prompt, entity_prompts) ``` ## Citation If you find our work helpful, please consider citing our paper: ``` @article{zhang2025eligen, title={Eligen: Entity-level controlled image generation with regional attention}, author={Zhang, Hong and Duan, Zhongjie and Wang, Xingjun and Chen, Yingda and Zhang, Yu}, journal={arXiv preprint arXiv:2501.01097}, year={2025} } ```