TorchTransformers-CV-SFT

Sleeping

App Files Files Community

awacke1 commited on Mar 24

Commit

83f7f1b

verified ·

1 Parent(s): 26a04a2

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -122

app.py CHANGED Viewed

@@ -69,6 +69,10 @@ if 'selected_model_type' not in st.session_state:
     st.session_state['selected_model_type'] = "Causal LM"
 if 'selected_model' not in st.session_state:
     st.session_state['selected_model'] = "None"
 @dataclass
 class ModelConfig:
@@ -219,7 +223,11 @@ async def process_ocr(image, output_file):
     status.text("Processing GOT-OCR2_0... (0s)")
     tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
     model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
-    result = model.chat(tokenizer, image, ocr_type='ocr')
     elapsed = int(time.time() - start_time)
     status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
@@ -231,7 +239,10 @@ async def process_image_gen(prompt, output_file):
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
-    pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
@@ -239,101 +250,6 @@ async def process_image_gen(prompt, output_file):
     update_gallery()
     return gen_image
-async def process_custom_diffusion(images, output_file, model_name):
-    start_time = time.time()
-    status = st.empty()
-    status.text(f"Training {model_name}... (0s)")
-    unet = TinyUNet()
-    diffusion = TinyDiffusion(unet)
-    diffusion.train(images)
-    gen_image = diffusion.generate()
-    upscaled_image = diffusion.upscale(gen_image, scale_factor=2)
-    elapsed = int(time.time() - start_time)
-    status.text(f"{model_name} completed in {elapsed}s!")
-    upscaled_image.save(output_file)
-    update_gallery()
-    return upscaled_image
-class TinyUNet(nn.Module):
-    def __init__(self, in_channels=3, out_channels=3):
-        super(TinyUNet, self).__init__()
-        self.down1 = nn.Conv2d(in_channels, 32, 3, padding=1)
-        self.down2 = nn.Conv2d(32, 64, 3, padding=1, stride=2)
-        self.mid = nn.Conv2d(64, 128, 3, padding=1)
-        self.up1 = nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1)
-        self.up2 = nn.Conv2d(64 + 32, 32, 3, padding=1)
-        self.out = nn.Conv2d(32, out_channels, 3, padding=1)
-        self.time_embed = nn.Linear(1, 64)
-    def forward(self, x, t):
-        t_embed = F.relu(self.time_embed(t.unsqueeze(-1)))
-        t_embed = t_embed.view(t_embed.size(0), t_embed.size(1), 1, 1)
-        x1 = F.relu(self.down1(x))
-        x2 = F.relu(self.down2(x1))
-        x_mid = F.relu(self.mid(x2)) + t_embed
-        x_up1 = F.relu(self.up1(x_mid))
-        x_up2 = F.relu(self.up2(torch.cat([x_up1, x1], dim=1)))
-        return self.out(x_up2)
-class TinyDiffusion:
-    def __init__(self, model, timesteps=100):
-        self.model = model
-        self.timesteps = timesteps
-        self.beta = torch.linspace(0.0001, 0.02, timesteps)
-        self.alpha = 1 - self.beta
-        self.alpha_cumprod = torch.cumprod(self.alpha, dim=0)
-    def train(self, images, epochs=50):
-        dataset = TinyDiffusionDataset(images)
-        dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
-        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
-        device = torch.device("cpu")
-        self.model.to(device)
-        for epoch in range(epochs):
-            total_loss = 0
-            for x in dataloader:
-                x = x.to(device)
-                t = torch.randint(0, self.timesteps, (x.size(0),), device=device).float()
-                noise = torch.randn_like(x)
-                alpha_t = self.alpha_cumprod[t.long()].view(-1, 1, 1, 1)
-                x_noisy = torch.sqrt(alpha_t) * x + torch.sqrt(1 - alpha_t) * noise
-                pred_noise = self.model(x_noisy, t)
-                loss = F.mse_loss(pred_noise, noise)
-                optimizer.zero_grad()
-                loss.backward()
-                optimizer.step()
-                total_loss += loss.item()
-            logger.info(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")
-        return self
-    def generate(self, size=(64, 64), steps=100):
-        device = torch.device("cpu")
-        x = torch.randn(1, 3, size[0], size[1], device=device)
-        for t in reversed(range(steps)):
-            t_tensor = torch.full((1,), t, device=device, dtype=torch.float32)
-            alpha_t = self.alpha_cumprod[t].view(-1, 1, 1, 1)
-            pred_noise = self.model(x, t_tensor)
-            x = (x - (1 - self.alpha[t]) / torch.sqrt(1 - alpha_t) * pred_noise) / torch.sqrt(self.alpha[t])
-            if t > 0:
-                x += torch.sqrt(self.beta[t]) * torch.randn_like(x)
-        x = torch.clamp(x * 255, 0, 255).byte()
-        return Image.fromarray(x.squeeze(0).permute(1, 2, 0).cpu().numpy())
-    def upscale(self, image, scale_factor=2):
-        img_tensor = torch.tensor(np.array(image.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32).unsqueeze(0) / 255.0
-        upscaled = F.interpolate(img_tensor, scale_factor=scale_factor, mode='bilinear', align_corners=False)
-        upscaled = torch.clamp(upscaled * 255, 0, 255).byte()
-        return Image.fromarray(upscaled.squeeze(0).permute(1, 2, 0).cpu().numpy())
-class TinyDiffusionDataset(Dataset):
-    def __init__(self, images):
-        self.images = [torch.tensor(np.array(img.convert("RGB")).transpose(2, 0, 1), dtype=torch.float32) / 255.0 for img in images]
-    def __len__(self):
-        return len(self.images)
-    def __getitem__(self, idx):
-        return self.images[idx]
 st.title("AI Vision & SFT Titans 🚀")
 # Sidebar
@@ -365,6 +281,8 @@ with cols[1]:
             os.remove(file)
         st.session_state['asset_checkboxes'].clear()
         st.session_state['downloaded_pdfs'].clear()
         st.sidebar.success("All assets vaporized! 💨")
         st.rerun()
@@ -402,6 +320,10 @@ def update_gallery():
                         url_key = next((k for k, v in st.session_state['downloaded_pdfs'].items() if v == file), None)
                         if url_key:
                             del st.session_state['downloaded_pdfs'][url_key]
                     st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
                     st.rerun()
 update_gallery()
@@ -418,8 +340,8 @@ with history_container:
     for entry in st.session_state['history'][-gallery_size * 2:]:
         st.write(entry)
-tab1, tab2, tab3, tab4, tab5 = st.tabs([
-    "Camera Snap 📷", "Download PDFs 📥", "Build Titan 🌱", "Test OCR 🔍", "Test Image Gen 🎨"
 ])
 with tab1:
@@ -430,26 +352,36 @@ with tab1:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
             filename = generate_filename("cam0")
             with open(filename, "wb") as f:
                 f.write(cam0_img.getvalue())
             entry = f"Snapshot from Cam 0: {filename}"
             if entry not in st.session_state['history']:
                 st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
             st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 0: {filename}")
             update_gallery()
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
             filename = generate_filename("cam1")
             with open(filename, "wb") as f:
                 f.write(cam1_img.getvalue())
             entry = f"Snapshot from Cam 1: {filename}"
             if entry not in st.session_state['history']:
                 st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
             update_gallery()
 with tab2:
     st.header("Download PDFs 📥")
@@ -488,6 +420,7 @@ with tab2:
                         entry = f"Downloaded PDF: {output_path}"
                         if entry not in st.session_state['history']:
                             st.session_state['history'].append(entry)
                     else:
                         st.error(f"Failed to nab {url} 😿")
                 else:
@@ -506,33 +439,12 @@ with tab2:
                 snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar gallery.")
 with tab3:
-    st.header("Build Titan 🌱")
-    model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
-    base_model = st.selectbox("Select Tiny Model",
-        ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM" else
-        ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"])
-    model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
-    domain = st.text_input("Target Domain", "general")
-    if st.button("Download Model ⬇️"):
-        config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small", domain=domain)
-        builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
-        builder.load_model(base_model, config)
-        builder.save_model(config.model_path)
-        st.session_state['builder'] = builder
-        st.session_state['model_loaded'] = True
-        st.session_state['selected_model_type'] = model_type
-        st.session_state['selected_model'] = config.model_path
-        entry = f"Built {model_type} model: {model_name}"
-        if entry not in st.session_state['history']:
-            st.session_state['history'].append(entry)
-        st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
-        st.rerun()
-with tab4:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
@@ -597,6 +509,30 @@ with tab4:
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
 with tab5:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()

     st.session_state['selected_model_type'] = "Causal LM"
 if 'selected_model' not in st.session_state:
     st.session_state['selected_model'] = "None"
+if 'cam0_file' not in st.session_state:
+    st.session_state['cam0_file'] = None
+if 'cam1_file' not in st.session_state:
+    st.session_state['cam1_file'] = None
 @dataclass
 class ModelConfig:
     status.text("Processing GOT-OCR2_0... (0s)")
     tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
     model = AutoModel.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True, torch_dtype=torch.float32).to("cpu").eval()
+    # Save image to temporary file since GOT-OCR2_0 expects a file path
+    temp_file = f"temp_{int(time.time())}.png"
+    image.save(temp_file)
+    result = model.chat(tokenizer, temp_file, ocr_type='ocr')
+    os.remove(temp_file)  # Clean up temporary file
     elapsed = int(time.time() - start_time)
     status.text(f"GOT-OCR2_0 completed in {elapsed}s!")
     async with aiofiles.open(output_file, "w") as f:
     start_time = time.time()
     status = st.empty()
     status.text("Processing Image Gen... (0s)")
+    if st.session_state['builder'] and isinstance(st.session_state['builder'], DiffusionBuilder) and st.session_state['builder'].pipeline:
+        pipeline = st.session_state['builder'].pipeline
+    else:
+        pipeline = StableDiffusionPipeline.from_pretrained("OFA-Sys/small-stable-diffusion-v0", torch_dtype=torch.float32).to("cpu")
     gen_image = pipeline(prompt, num_inference_steps=20).images[0]
     elapsed = int(time.time() - start_time)
     status.text(f"Image Gen completed in {elapsed}s!")
     update_gallery()
     return gen_image
 st.title("AI Vision & SFT Titans 🚀")
 # Sidebar
             os.remove(file)
         st.session_state['asset_checkboxes'].clear()
         st.session_state['downloaded_pdfs'].clear()
+        st.session_state['cam0_file'] = None
+        st.session_state['cam1_file'] = None
         st.sidebar.success("All assets vaporized! 💨")
         st.rerun()
                         url_key = next((k for k, v in st.session_state['downloaded_pdfs'].items() if v == file), None)
                         if url_key:
                             del st.session_state['downloaded_pdfs'][url_key]
+                    if file == st.session_state['cam0_file']:
+                        st.session_state['cam0_file'] = None
+                    if file == st.session_state['cam1_file']:
+                        st.session_state['cam1_file'] = None
                     st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
                     st.rerun()
 update_gallery()
     for entry in st.session_state['history'][-gallery_size * 2:]:
         st.write(entry)
+tab1, tab2, tab3, tab4 = st.tabs([
+    "Camera Snap 📷", "Download PDFs 📥", "Test OCR 🔍", "Build Titan 🌱"
 ])
 with tab1:
         cam0_img = st.camera_input("Take a picture - Cam 0", key="cam0")
         if cam0_img:
             filename = generate_filename("cam0")
+            if st.session_state['cam0_file'] and os.path.exists(st.session_state['cam0_file']):
+                os.remove(st.session_state['cam0_file'])
             with open(filename, "wb") as f:
                 f.write(cam0_img.getvalue())
+            st.session_state['cam0_file'] = filename
             entry = f"Snapshot from Cam 0: {filename}"
             if entry not in st.session_state['history']:
                 st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 0:")] + [entry]
             st.image(Image.open(filename), caption="Camera 0", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 0: {filename}")
             update_gallery()
+        elif st.session_state['cam0_file'] and os.path.exists(st.session_state['cam0_file']):
+            st.image(Image.open(st.session_state['cam0_file']), caption="Camera 0", use_container_width=True)
     with cols[1]:
         cam1_img = st.camera_input("Take a picture - Cam 1", key="cam1")
         if cam1_img:
             filename = generate_filename("cam1")
+            if st.session_state['cam1_file'] and os.path.exists(st.session_state['cam1_file']):
+                os.remove(st.session_state['cam1_file'])
             with open(filename, "wb") as f:
                 f.write(cam1_img.getvalue())
+            st.session_state['cam1_file'] = filename
             entry = f"Snapshot from Cam 1: {filename}"
             if entry not in st.session_state['history']:
                 st.session_state['history'] = [e for e in st.session_state['history'] if not e.startswith("Snapshot from Cam 1:")] + [entry]
             st.image(Image.open(filename), caption="Camera 1", use_container_width=True)
             logger.info(f"Saved snapshot from Camera 1: {filename}")
             update_gallery()
+        elif st.session_state['cam1_file'] and os.path.exists(st.session_state['cam1_file']):
+            st.image(Image.open(st.session_state['cam1_file']), caption="Camera 1", use_container_width=True)
 with tab2:
     st.header("Download PDFs 📥")
                         entry = f"Downloaded PDF: {output_path}"
                         if entry not in st.session_state['history']:
                             st.session_state['history'].append(entry)
+                        st.session_state['asset_checkboxes'][output_path] = True  # Auto-check the box
                     else:
                         st.error(f"Failed to nab {url} 😿")
                 else:
                 snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
                 for snapshot in snapshots:
                     st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
+                    st.session_state['asset_checkboxes'][snapshot] = True  # Auto-check new snapshots
+            update_gallery()
         else:
             st.warning("No PDFs selected for snapshotting! Check some boxes in the sidebar gallery.")
 with tab3:
     st.header("Test OCR 🔍")
     all_files = get_gallery_files()
     if all_files:
     else:
         st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
+with tab4:
+    st.header("Build Titan 🌱")
+    model_type = st.selectbox("Model Type", ["Causal LM", "Diffusion"], key="build_type")
+    base_model = st.selectbox("Select Tiny Model",
+        ["HuggingFaceTB/SmolLM-135M", "Qwen/Qwen1.5-0.5B-Chat"] if model_type == "Causal LM" else
+        ["OFA-Sys/small-stable-diffusion-v0", "stabilityai/stable-diffusion-2-base"])
+    model_name = st.text_input("Model Name", f"tiny-titan-{int(time.time())}")
+    domain = st.text_input("Target Domain", "general")
+    if st.button("Download Model ⬇️"):
+        config = (ModelConfig if model_type == "Causal LM" else DiffusionConfig)(name=model_name, base_model=base_model, size="small", domain=domain)
+        builder = ModelBuilder() if model_type == "Causal LM" else DiffusionBuilder()
+        builder.load_model(base_model, config)
+        builder.save_model(config.model_path)
+        st.session_state['builder'] = builder
+        st.session_state['model_loaded'] = True
+        st.session_state['selected_model_type'] = model_type
+        st.session_state['selected_model'] = config.model_path
+        entry = f"Built {model_type} model: {model_name}"
+        if entry not in st.session_state['history']:
+            st.session_state['history'].append(entry)
+        st.success(f"Model downloaded and saved to {config.model_path}! 🎉")
+        st.rerun()
+tab5 = st.tabs(["Test Image Gen 🎨"])[0]
 with tab5:
     st.header("Test Image Gen 🎨")
     all_files = get_gallery_files()