Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

App Files Files Community

radames commited on May 22, 2023

Commit

ea476a5

1 Parent(s): 0585438

gradio app code

Browse files

Files changed (4) hide show

.gitignore +4 -0
interface/app.py +151 -0
interface/model_loader.py +242 -0
requirements.txt +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__
+venv
+pretrained_models/
+pretrained_models.tar.gz

interface/app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+from .model_loader import Model
+from PIL import Image
+import cv2
+import io
+# models fron pretrained/latent_transformer folder
+models_files = {
+    "anime": "pretrained_models/latent_transformer/anime.pt",
+    "car": "pretrained_models/latent_transformer/car.pt",
+    "cat": "pretrained_models/latent_transformer/cat.pt",
+    "church": "pretrained_models/latent_transformer/church.pt",
+    "ffhq": "pretrained_models/latent_transformer/ffhq.pt",
+}
+models = {name: Model(path) for name, path in models_files.items()}
+def cv_to_pil(img):
+    return Image.fromarray(cv2.cvtColor(img.astype("uint8"), cv2.COLOR_BGR2RGB))
+def random_sample(model_name: str):
+    model = models[model_name]
+    img, latents = model.random_sample()
+    pil_img = cv_to_pil(img)
+    return pil_img, model_name, latents
+def zoom(dx, dy, dz, model_state, latents_state):
+    model = models[model_state]
+    dx = dx
+    dy = dy
+    dz = dz
+    sx = 100
+    sy = 100
+    stop_points = []
+    img, latents_state = model.zoom(
+        latents_state, dz, sxsy=[sx, sy], stop_points=stop_points
+    )  # dz, sxsy=[sx, sy], stop_points=stop_points)
+    pil_img = cv_to_pil(img)
+    return pil_img, latents_state
+def translate(dx, dy, dz, model_state, latents_state):
+    model = models[model_state]
+    dx = dx
+    dy = dy
+    dz = dz
+    sx = 128
+    sy = 128
+    stop_points = []
+    zi = False
+    zo = False
+    img, latents_state = model.translate(
+        latents_state,
+        [dx, dy],
+        sxsy=[sx, sy],
+        stop_points=stop_points,
+        zoom_in=zi,
+        zoom_out=zo,
+    )
+    pil_img = cv_to_pil(img)
+    return pil_img, latents_state
+def change_style(image: Image.Image, model_state, latents_state):
+    model = models[model_state]
+    img, latents_state = model.change_style(latents_state)
+    pil_img = cv_to_pil(img)
+    return pil_img, latents_state
+def reset(model_state, latents_state):
+    model = models[model_state]
+    img, latents_state = model.reset(latents_state)
+    pil_img = cv_to_pil(img)
+    return pil_img, latents_state
+with gr.Blocks() as block:
+    model_state = gr.State(value="cat")
+    latents_state = gr.State({})
+    gr.Markdown("# UserControllableLT: User controllable latent transformer")
+    gr.Markdown("## Select model")
+    with gr.Row():
+        with gr.Column():
+            model_name = gr.Dropdown(
+                choices=list(models_files.keys()),
+                label="Select Pretrained Model",
+                value="cat",
+            )
+            with gr.Row():
+                button = gr.Button("Random sample")
+                reset_btn = gr.Button("Reset")
+            dx = gr.Slider(
+                minimum=-128, maximum=128, step_size=0.1, label="dx", value=0.0
+            )
+            dy = gr.Slider(
+                minimum=-128, maximum=128, step_size=0.1, label="dy", value=0.0
+            )
+            dz = gr.Slider(
+                minimum=-128, maximum=128, step_size=0.1, label="dz", value=0.0
+            )
+            with gr.Row():
+                change_style_bt = gr.Button("Change style")
+        with gr.Column():
+            image = gr.Image(type="pil", label="")
+    button.click(
+        random_sample, inputs=[model_name], outputs=[image, model_state, latents_state]
+    )
+    reset_btn.click(
+        reset,
+        inputs=[model_state, latents_state],
+        outputs=[image, latents_state],
+    )
+    change_style_bt.click(
+        change_style,
+        inputs=[image, model_state, latents_state],
+        outputs=[image, latents_state],
+    )
+    dx.change(
+        translate,
+        inputs=[dx, dy, dz, model_state, latents_state],
+        outputs=[image, latents_state],
+        show_progress=False,
+    )
+    dy.change(
+        translate,
+        inputs=[dx, dy, dz, model_state, latents_state],
+        outputs=[image, latents_state],
+        show_progress=False,
+    )
+    dz.change(
+        zoom,
+        inputs=[dx, dy, dz, model_state, latents_state],
+        outputs=[image, latents_state],
+        show_progress=False,
+    )
+block.launch()

interface/model_loader.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import os
+from argparse import Namespace
+import numpy as np
+import torch
+from models.StyleGANControler import StyleGANControler
+class Model:
+    def __init__(
+        self, checkpoint_path, truncation=0.5, use_average_code_as_input=False
+    ):
+        self.truncation = truncation
+        self.use_average_code_as_input = use_average_code_as_input
+        ckpt = torch.load(checkpoint_path, map_location="cpu")
+        opts = ckpt["opts"]
+        opts["checkpoint_path"] = checkpoint_path
+        self.opts = Namespace(**ckpt["opts"])
+        self.net = StyleGANControler(self.opts)
+        self.net.eval()
+        self.net.cuda()
+        self.target_layers = [0, 1, 2, 3, 4, 5]
+    def random_sample(self):
+        z1 = torch.randn(1, 512).to("cuda")
+        x1, w1, f1 = self.net.decoder(
+            [z1],
+            input_is_latent=False,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        w1_initial = w1.clone()
+        x1 = self.net.face_pool(x1)
+        image = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            image,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )  # return latent vector along with the image
+    def latents_to_tensor(self, latents):
+        w1 = latents["w1"]
+        w1_initial = latents["w1_initial"]
+        w1 = torch.tensor(w1).to("cuda")
+        w1_initial = torch.tensor(w1_initial).to("cuda")
+        x1, w1 = self.net.decoder(
+            [w1],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=False,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        x1, _, f1 = self.net.decoder(
+            [w1_initial],
+            input_is_latent=False,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        return (w1, w1_initial, f1)
+    def zoom(self, latents, dz, sxsy=[0, 0], stop_points=[]):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        vec_num = abs(dz) / 5
+        dz = 100 * np.sign(dz)
+        x = torch.from_numpy(np.array([[[1.0, 0, dz]]], dtype=np.float32)).cuda()
+        f1 = torch.nn.functional.interpolate(f1, (256, 256))
+        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
+        if len(stop_points) > 0:
+            x = torch.cat(
+                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
+            )
+            tmp = []
+            for sp in stop_points:
+                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
+            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
+        if not self.use_average_code_as_input:
+            w_hat = self.net.encoder(
+                w1[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = w_hat
+        else:
+            w_hat = self.net.encoder(
+                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = (
+                w1.clone()[:, self.target_layers]
+                + w_hat
+                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
+            )
+        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
+        x1 = self.net.face_pool(x1)
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )  # return latent vector along with the image
+    def translate(
+        self, latents, dxy, sxsy=[0, 0], stop_points=[], zoom_in=False, zoom_out=False
+    ):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        dz = -5.0 if zoom_in else 0.0
+        dz = 5.0 if zoom_out else dz
+        dxyz = np.array([dxy[0], dxy[1], dz], dtype=np.float32)
+        dxy_norm = np.linalg.norm(dxyz[:2], ord=2)
+        dxyz[:2] = dxyz[:2] / dxy_norm
+        vec_num = dxy_norm / 10
+        x = torch.from_numpy(np.array([[dxyz]], dtype=np.float32)).cuda()
+        f1 = torch.nn.functional.interpolate(f1, (256, 256))
+        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
+        if len(stop_points) > 0:
+            x = torch.cat(
+                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
+            )
+            tmp = []
+            for sp in stop_points:
+                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
+            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
+        if not self.use_average_code_as_input:
+            w_hat = self.net.encoder(
+                w1[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = w_hat
+        else:
+            w_hat = self.net.encoder(
+                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = (
+                w1.clone()[:, self.target_layers]
+                + w_hat
+                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
+            )
+        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
+        x1 = self.net.face_pool(x1)
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )
+    def change_style(self, latents):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        z1 = torch.randn(1, 512).to("cuda")
+        x1, w2 = self.net.decoder(
+            [z1],
+            input_is_latent=False,
+            randomize_noise=False,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        w1[:, 6:] = w2.detach()[:, 0]
+        x1, w1_new, f1 = self.net.decoder(
+            [w1],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+        )
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1_new.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )
+    def reset(self, latents):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        x1, w1_new, f1 = self.net.decoder(
+            [w1_initial],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+        )
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1_new.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask
+torch
+opencv-python
+Pillow
+einops
+ninja==1.10.2
+einops==0.3.2
+gradio