waifu-research-department
/

long-prompt-weighting-pipeline

Text-to-Image

Diffusers

English

stable-diffusion

Model card Files Files and versions

xet

Community

skytnt commited on Oct 19, 2022

Commit

7a5efb4

1 Parent(s): ef85cd3

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +149 -138

pipeline.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import inspect
 import re
-from typing import Callable, List, Optional, Union
 import PIL
 import numpy as np
 import torch
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 from diffusers.configuration_utils import FrozenDict
@@ -17,7 +16,8 @@ from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionS
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-re_attention = re.compile(r"""
 \\\(|
 \\\)|
 \\\[|
@@ -31,7 +31,9 @@ re_attention = re.compile(r"""
 ]|
 [^\\()\[\]:]+|
 :
-""", re.X)
 def parse_prompt_attention(text):
@@ -84,17 +86,17 @@ def parse_prompt_attention(text):
         text = m.group(0)
         weight = m.group(1)
-        if text.startswith('\\'):
             res.append([text[1:], 1.0])
-        elif text == '(':
             round_brackets.append(len(res))
-        elif text == '[':
             square_brackets.append(len(res))
         elif weight is not None and len(round_brackets) > 0:
             multiply_range(round_brackets.pop(), float(weight))
-        elif text == ')' and len(round_brackets) > 0:
             multiply_range(round_brackets.pop(), round_bracket_multiplier)
-        elif text == ']' and len(square_brackets) > 0:
             multiply_range(square_brackets.pop(), square_bracket_multiplier)
         else:
             res.append([text, 1.0])
@@ -120,11 +122,7 @@ def parse_prompt_attention(text):
     return res
-def get_prompts_with_weights(
-        pipe: DiffusionPipeline,
-        prompt: List[str],
-        max_length: int
-):
     r"""
     Tokenize a list of prompts and return its tokens with weights of each token.
@@ -158,9 +156,7 @@ def get_prompts_with_weights(
     return tokens, weights
-def pad_tokens_and_weights(tokens, weights, max_length, bos, eos,
-                           no_boseos_middle=True,
-                           chunk_length=77):
     r"""
     Pad the tokens (with starting and ending tokens) and weights (with 1.0) to max_length.
     """
@@ -169,27 +165,24 @@ def pad_tokens_and_weights(tokens, weights, max_length, bos, eos,
     for i in range(len(tokens)):
         tokens[i] = [bos] + tokens[i] + [eos] * (max_length - 1 - len(tokens[i]))
         if no_boseos_middle:
-            weights[i] = [1.] + weights[i] + [1.] * (max_length - 1 - len(weights[i]))
         else:
             w = []
             if len(weights[i]) == 0:
-                w = [1.] * weights_length
             else:
                 for j in range((len(weights[i]) - 1) // chunk_length + 1):
-                    w.append(1.)  # weight for starting token in this chunk
-                    w += weights[i][j * chunk_length: min(len(weights[i]), (j + 1) * chunk_length)]
-                    w.append(1.)  # weight for ending token in this chunk
-                w += [1.] * (weights_length - len(w))
             weights[i] = w[:]
     return tokens, weights
 def get_unweighted_text_embeddings(
-        pipe: DiffusionPipeline,
-        text_input: torch.Tensor,
-        chunk_length: int,
-        no_boseos_middle: Optional[bool] = True
 ):
     """
     When the length of tokens is a multiple of the capacity of the text encoder,
@@ -200,7 +193,7 @@ def get_unweighted_text_embeddings(
         text_embeddings = []
         for i in range(max_embeddings_multiples):
             # extract the i-th chunk
-            text_input_chunk = text_input[:, i * (chunk_length - 2):(i + 1) * (chunk_length - 2) + 2].clone()
             # cover the head and the tail by the starting and the ending tokens
             text_input_chunk[:, 0] = text_input[0, 0]
@@ -226,14 +219,14 @@ def get_unweighted_text_embeddings(
 def get_weighted_text_embeddings(
-        pipe: DiffusionPipeline,
-        prompt: Union[str, List[str]],
-        uncond_prompt: Optional[Union[str, List[str]]] = None,
-        max_embeddings_multiples: Optional[int] = 1,
-        no_boseos_middle: Optional[bool] = False,
-        skip_parsing: Optional[bool] = False,
-        skip_weighting: Optional[bool] = False,
-        **kwargs
 ):
     r"""
     Prompts can be assigned with local weights using brackets. For example,
@@ -271,46 +264,64 @@ def get_weighted_text_embeddings(
                 uncond_prompt = [uncond_prompt]
             uncond_tokens, uncond_weights = get_prompts_with_weights(pipe, uncond_prompt, max_length - 2)
     else:
-        prompt_tokens = [token[1:-1] for token in
-                         pipe.tokenizer(prompt, max_length=max_length, truncation=True).input_ids]
-        prompt_weights = [[1.] * len(token) for token in prompt_tokens]
         if uncond_prompt is not None:
             if isinstance(uncond_prompt, str):
                 uncond_prompt = [uncond_prompt]
-            uncond_tokens = [token[1:-1] for token in
-                             pipe.tokenizer(uncond_prompt, max_length=max_length, truncation=True).input_ids]
-            uncond_weights = [[1.] * len(token) for token in uncond_tokens]
     # round up the longest length of tokens to a multiple of (model_max_length - 2)
     max_length = max([len(token) for token in prompt_tokens])
     if uncond_prompt is not None:
         max_length = max(max_length, max([len(token) for token in uncond_tokens]))
-    max_embeddings_multiples = min(max_embeddings_multiples,
-                                   (max_length - 1) // (pipe.tokenizer.model_max_length - 2) + 1)
     max_embeddings_multiples = max(1, max_embeddings_multiples)
     max_length = (pipe.tokenizer.model_max_length - 2) * max_embeddings_multiples + 2
     # pad the length of tokens and weights
     bos = pipe.tokenizer.bos_token_id
     eos = pipe.tokenizer.eos_token_id
-    prompt_tokens, prompt_weights = pad_tokens_and_weights(prompt_tokens, prompt_weights, max_length, bos, eos,
-                                                           no_boseos_middle=no_boseos_middle,
-                                                           chunk_length=pipe.tokenizer.model_max_length)
     prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device=pipe.device)
     if uncond_prompt is not None:
-        uncond_tokens, uncond_weights = pad_tokens_and_weights(uncond_tokens, uncond_weights, max_length, bos, eos,
-                                                               no_boseos_middle=no_boseos_middle,
-                                                               chunk_length=pipe.tokenizer.model_max_length)
         uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=pipe.device)
     # get the embeddings
-    text_embeddings = get_unweighted_text_embeddings(pipe, prompt_tokens, pipe.tokenizer.model_max_length,
-                                                     no_boseos_middle=no_boseos_middle)
     prompt_weights = torch.tensor(prompt_weights, dtype=text_embeddings.dtype, device=pipe.device)
     if uncond_prompt is not None:
-        uncond_embeddings = get_unweighted_text_embeddings(pipe, uncond_tokens, pipe.tokenizer.model_max_length,
-                                                           no_boseos_middle=no_boseos_middle)
         uncond_weights = torch.tensor(uncond_weights, dtype=uncond_embeddings.dtype, device=pipe.device)
     # assign weights to the prompts and normalize in the sense of mean
@@ -382,14 +393,14 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
     """
     def __init__(
-            self,
-            vae: AutoencoderKL,
-            text_encoder: CLIPTextModel,
-            tokenizer: CLIPTokenizer,
-            unet: UNet2DConditionModel,
-            scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler],
-            safety_checker: StableDiffusionSafetyChecker,
-            feature_extractor: CLIPFeatureExtractor,
     ):
         super().__init__()
@@ -456,26 +467,26 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
     @torch.no_grad()
     def __call__(
-            self,
-            prompt: Union[str, List[str]],
-            negative_prompt: Optional[Union[str, List[str]]] = None,
-            init_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
-            mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
-            height: int = 512,
-            width: int = 512,
-            num_inference_steps: int = 50,
-            guidance_scale: float = 7.5,
-            strength: float = 0.8,
-            num_images_per_prompt: Optional[int] = 1,
-            eta: float = 0.0,
-            generator: Optional[torch.Generator] = None,
-            latents: Optional[torch.FloatTensor] = None,
-            max_embeddings_multiples: Optional[int] = 3,
-            output_type: Optional[str] = "pil",
-            return_dict: bool = True,
-            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-            callback_steps: Optional[int] = 1,
-            **kwargs,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -563,7 +574,7 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
         if (callback_steps is None) or (
-                callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
         ):
             raise ValueError(
                 f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
@@ -588,12 +599,12 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
                 " the batch size of `prompt`."
             )
-        text_embeddings, uncond_embeddings  = get_weighted_text_embeddings(
             pipe=self,
             prompt=prompt,
             uncond_prompt=negative_prompt if do_classifier_free_guidance else None,
             max_embeddings_multiples=max_embeddings_multiples,
-            **kwargs
         )
         bs_embed, seq_len, _ = text_embeddings.shape
         text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1)
@@ -742,23 +753,23 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
         return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
     def text2img(
-            self,
-            prompt: Union[str, List[str]],
-            negative_prompt: Optional[Union[str, List[str]]] = None,
-            height: int = 512,
-            width: int = 512,
-            num_inference_steps: int = 50,
-            guidance_scale: float = 7.5,
-            num_images_per_prompt: Optional[int] = 1,
-            eta: float = 0.0,
-            generator: Optional[torch.Generator] = None,
-            latents: Optional[torch.FloatTensor] = None,
-            max_embeddings_multiples: Optional[int] = 3,
-            output_type: Optional[str] = "pil",
-            return_dict: bool = True,
-            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-            callback_steps: Optional[int] = 1,
-            **kwargs,
     ):
         r"""
         Function for text-to-image generation.
@@ -830,26 +841,26 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
-            **kwargs
         )
     def img2img(
-            self,
-            init_image: Union[torch.FloatTensor, PIL.Image.Image],
-            prompt: Union[str, List[str]],
-            negative_prompt: Optional[Union[str, List[str]]] = None,
-            strength: float = 0.8,
-            num_inference_steps: Optional[int] = 50,
-            guidance_scale: Optional[float] = 7.5,
-            num_images_per_prompt: Optional[int] = 1,
-            eta: Optional[float] = 0.0,
-            generator: Optional[torch.Generator] = None,
-            max_embeddings_multiples: Optional[int] = 3,
-            output_type: Optional[str] = "pil",
-            return_dict: bool = True,
-            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-            callback_steps: Optional[int] = 1,
-            **kwargs,
     ):
         r"""
         Function for image-to-image generation.
@@ -921,27 +932,27 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
-            **kwargs
         )
     def inpaint(
-            self,
-            init_image: Union[torch.FloatTensor, PIL.Image.Image],
-            mask_image: Union[torch.FloatTensor, PIL.Image.Image],
-            prompt: Union[str, List[str]],
-            negative_prompt: Optional[Union[str, List[str]]] = None,
-            strength: float = 0.8,
-            num_inference_steps: Optional[int] = 50,
-            guidance_scale: Optional[float] = 7.5,
-            num_images_per_prompt: Optional[int] = 1,
-            eta: Optional[float] = 0.0,
-            generator: Optional[torch.Generator] = None,
-            max_embeddings_multiples: Optional[int] = 3,
-            output_type: Optional[str] = "pil",
-            return_dict: bool = True,
-            callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-            callback_steps: Optional[int] = 1,
-            **kwargs,
     ):
         r"""
         Function for inpaint.
@@ -1018,5 +1029,5 @@ class StableDiffusionLongPromptWeightingPipeline(DiffusionPipeline):
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
-            **kwargs
         )

 import inspect
 import re
 import PIL
 import numpy as np
 import torch
+from typing import Callable, List, Optional, Union
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 from diffusers.configuration_utils import FrozenDict
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+re_attention = re.compile(
+    r"""
 \\\(|
 \\\)|
 \\\[|
 ]|
 [^\\()\[\]:]+|
 :
+""",
+    re.X,
+)
 def parse_prompt_attention(text):
         text = m.group(0)
         weight = m.group(1)
+        if text.startswith("\\"):
             res.append([text[1:], 1.0])
+        elif text == "(":
             round_brackets.append(len(res))
+        elif text == "[":
             square_brackets.append(len(res))
         elif weight is not None and len(round_brackets) > 0:
             multiply_range(round_brackets.pop(), float(weight))
+        elif text == ")" and len(round_brackets) > 0:
             multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == "]" and len(square_brackets) > 0:
             multiply_range(square_brackets.pop(), square_bracket_multiplier)
         else:
             res.append([text, 1.0])
     return res
+def get_prompts_with_weights(pipe: DiffusionPipeline, prompt: List[str], max_length: int):
     r"""
     Tokenize a list of prompts and return its tokens with weights of each token.
     return tokens, weights
+def pad_tokens_and_weights(tokens, weights, max_length, bos, eos, no_boseos_middle=True, chunk_length=77):
     r"""
     Pad the tokens (with starting and ending tokens) and weights (with 1.0) to max_length.
     """
     for i in range(len(tokens)):
         tokens[i] = [bos] + tokens[i] + [eos] * (max_length - 1 - len(tokens[i]))
         if no_boseos_middle:
+            weights[i] = [1.0] + weights[i] + [1.0] * (max_length - 1 - len(weights[i]))
         else:
             w = []
             if len(weights[i]) == 0:
+                w = [1.0] * weights_length
             else:
                 for j in range((len(weights[i]) - 1) // chunk_length + 1):
+                    w.append(1.0)  # weight for starting token in this chunk
+                    w += weights[i][j * chunk_length : min(len(weights[i]), (j + 1) * chunk_length)]
+                    w.append(1.0)  # weight for ending token in this chunk
+                w += [1.0] * (weights_length - len(w))
             weights[i] = w[:]
     return tokens, weights
 def get_unweighted_text_embeddings(
+    pipe: DiffusionPipeline, text_input: torch.Tensor, chunk_length: int, no_boseos_middle: Optional[bool] = True
 ):
     """
     When the length of tokens is a multiple of the capacity of the text encoder,
         text_embeddings = []
         for i in range(max_embeddings_multiples):
             # extract the i-th chunk
+            text_input_chunk = text_input[:, i * (chunk_length - 2) : (i + 1) * (chunk_length - 2) + 2].clone()
             # cover the head and the tail by the starting and the ending tokens
             text_input_chunk[:, 0] = text_input[0, 0]
 def get_weighted_text_embeddings(
+    pipe: DiffusionPipeline,
+    prompt: Union[str, List[str]],
+    uncond_prompt: Optional[Union[str, List[str]]] = None,
+    max_embeddings_multiples: Optional[int] = 1,
+    no_boseos_middle: Optional[bool] = False,
+    skip_parsing: Optional[bool] = False,
+    skip_weighting: Optional[bool] = False,
+    **kwargs,
 ):
     r"""
     Prompts can be assigned with local weights using brackets. For example,
                 uncond_prompt = [uncond_prompt]
             uncond_tokens, uncond_weights = get_prompts_with_weights(pipe, uncond_prompt, max_length - 2)
     else:
+        prompt_tokens = [
+            token[1:-1] for token in pipe.tokenizer(prompt, max_length=max_length, truncation=True).input_ids
+        ]
+        prompt_weights = [[1.0] * len(token) for token in prompt_tokens]
         if uncond_prompt is not None:
             if isinstance(uncond_prompt, str):
                 uncond_prompt = [uncond_prompt]
+            uncond_tokens = [
+                token[1:-1]
+                for token in pipe.tokenizer(uncond_prompt, max_length=max_length, truncation=True).input_ids
+            ]
+            uncond_weights = [[1.0] * len(token) for token in uncond_tokens]
     # round up the longest length of tokens to a multiple of (model_max_length - 2)
     max_length = max([len(token) for token in prompt_tokens])
     if uncond_prompt is not None:
         max_length = max(max_length, max([len(token) for token in uncond_tokens]))
+    max_embeddings_multiples = min(
+        max_embeddings_multiples, (max_length - 1) // (pipe.tokenizer.model_max_length - 2) + 1
+    )
     max_embeddings_multiples = max(1, max_embeddings_multiples)
     max_length = (pipe.tokenizer.model_max_length - 2) * max_embeddings_multiples + 2
     # pad the length of tokens and weights
     bos = pipe.tokenizer.bos_token_id
     eos = pipe.tokenizer.eos_token_id
+    prompt_tokens, prompt_weights = pad_tokens_and_weights(
+        prompt_tokens,
+        prompt_weights,
+        max_length,
+        bos,
+        eos,
+        no_boseos_middle=no_boseos_middle,
+        chunk_length=pipe.tokenizer.model_max_length,
+    )
     prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device=pipe.device)
     if uncond_prompt is not None:
+        uncond_tokens, uncond_weights = pad_tokens_and_weights(
+            uncond_tokens,
+            uncond_weights,
+            max_length,
+            bos,
+            eos,
+            no_boseos_middle=no_boseos_middle,
+            chunk_length=pipe.tokenizer.model_max_length,
+        )
         uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=pipe.device)
     # get the embeddings
+    text_embeddings = get_unweighted_text_embeddings(
+        pipe, prompt_tokens, pipe.tokenizer.model_max_length, no_boseos_middle=no_boseos_middle
+    )
     prompt_weights = torch.tensor(prompt_weights, dtype=text_embeddings.dtype, device=pipe.device)
     if uncond_prompt is not None:
+        uncond_embeddings = get_unweighted_text_embeddings(
+            pipe, uncond_tokens, pipe.tokenizer.model_max_length, no_boseos_middle=no_boseos_middle
+        )
         uncond_weights = torch.tensor(uncond_weights, dtype=uncond_embeddings.dtype, device=pipe.device)
     # assign weights to the prompts and normalize in the sense of mean
     """
     def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        tokenizer: CLIPTokenizer,
+        unet: UNet2DConditionModel,
+        scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler],
+        safety_checker: StableDiffusionSafetyChecker,
+        feature_extractor: CLIPFeatureExtractor,
     ):
         super().__init__()
     @torch.no_grad()
     def __call__(
+        self,
+        prompt: Union[str, List[str]],
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        init_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
+        mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None,
+        height: int = 512,
+        width: int = 512,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        strength: float = 0.8,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[torch.Generator] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        max_embeddings_multiples: Optional[int] = 3,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: Optional[int] = 1,
+        **kwargs,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
         if (callback_steps is None) or (
+            callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
         ):
             raise ValueError(
                 f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
                 " the batch size of `prompt`."
             )
+        text_embeddings, uncond_embeddings = get_weighted_text_embeddings(
             pipe=self,
             prompt=prompt,
             uncond_prompt=negative_prompt if do_classifier_free_guidance else None,
             max_embeddings_multiples=max_embeddings_multiples,
+            **kwargs,
         )
         bs_embed, seq_len, _ = text_embeddings.shape
         text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1)
         return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
     def text2img(
+        self,
+        prompt: Union[str, List[str]],
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        height: int = 512,
+        width: int = 512,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[torch.Generator] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        max_embeddings_multiples: Optional[int] = 3,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: Optional[int] = 1,
+        **kwargs,
     ):
         r"""
         Function for text-to-image generation.
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
+            **kwargs,
         )
     def img2img(
+        self,
+        init_image: Union[torch.FloatTensor, PIL.Image.Image],
+        prompt: Union[str, List[str]],
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        strength: float = 0.8,
+        num_inference_steps: Optional[int] = 50,
+        guidance_scale: Optional[float] = 7.5,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: Optional[float] = 0.0,
+        generator: Optional[torch.Generator] = None,
+        max_embeddings_multiples: Optional[int] = 3,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: Optional[int] = 1,
+        **kwargs,
     ):
         r"""
         Function for image-to-image generation.
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
+            **kwargs,
         )
     def inpaint(
+        self,
+        init_image: Union[torch.FloatTensor, PIL.Image.Image],
+        mask_image: Union[torch.FloatTensor, PIL.Image.Image],
+        prompt: Union[str, List[str]],
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        strength: float = 0.8,
+        num_inference_steps: Optional[int] = 50,
+        guidance_scale: Optional[float] = 7.5,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: Optional[float] = 0.0,
+        generator: Optional[torch.Generator] = None,
+        max_embeddings_multiples: Optional[int] = 3,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: Optional[int] = 1,
+        **kwargs,
     ):
         r"""
         Function for inpaint.
             return_dict=return_dict,
             callback=callback,
             callback_steps=callback_steps,
+            **kwargs,
         )