Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

App Files Files Community

saakshigupta commited on Apr 10

Commit

cf310a7

verified ·

1 Parent(s): af6361c

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -45

app.py CHANGED Viewed

@@ -533,7 +533,7 @@ def load_llm_model():
             )
             # Load the adapter
-            adapter_id = "saakshigupta/deepfake-explainer-1"
             model = PeftModel.from_pretrained(model, adapter_id)
             # Set to inference mode
@@ -552,50 +552,77 @@ def analyze_image_with_llm(image, gradcam_overlay, face_box, pred_label, confide
     else:
         full_prompt = f"{question}\n\nThe image has been processed with GradCAM and classified as {pred_label} with confidence {confidence:.2f}. Focus on the highlighted regions in red/yellow which show the areas the detection model found suspicious."
-    # Format the message to include both the original image and the GradCAM visualization
-    messages = [
-        {"role": "user", "content": [
-            {"type": "image", "image": image},  # Original image
-            {"type": "image", "image": gradcam_overlay},  # GradCAM overlay
-            {"type": "text", "text": full_prompt}
-        ]}
-    ]
-    # Apply chat template
-    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
-    # Process with image
-    inputs = tokenizer(
-        [image, gradcam_overlay],  # Send both images
-        input_text,
-        add_special_tokens=False,
-        return_tensors="pt",
-    ).to(model.device)
-    # Fix cross-attention mask if needed
-    inputs = fix_cross_attention_mask(inputs)
-    # Generate response
-    with st.spinner("Generating detailed analysis... (this may take 15-30 seconds)"):
-        with torch.no_grad():
-            output_ids = model.generate(
-                **inputs,
-                max_new_tokens=max_tokens,
-                use_cache=True,
-                temperature=temperature,
-                top_p=0.9
-            )
-        # Decode the output
-        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Try to extract just the model's response (after the prompt)
-        if full_prompt in response:
-            result = response.split(full_prompt)[-1].strip()
-        else:
-            result = response
-        return result
 # Main app
 def main():
@@ -818,7 +845,7 @@ def main():
                 caption_text += f"\n\nGradCAM Analysis:\n{st.session_state.gradcam_caption}"
             # Default question with option to customize
-            default_question = f"This image has been classified as {st.session_state.current_pred_label}. Analyze the key features that led to this classification, focusing on the highlighted areas in the GradCAM visualization. Provide both a technical explanation for experts and a simple explanation for non-technical users."
             # User input for new question
             new_question = st.text_area("Ask a question about the image:", value=default_question if not st.session_state.chat_history else "", height=100)
@@ -902,5 +929,8 @@ def main():
     # Footer
     st.markdown("---")
 if __name__ == "__main__":
     main()

             )
             # Load the adapter
+            adapter_id = "saakshigupta/deepfake-explainer-2"
             model = PeftModel.from_pretrained(model, adapter_id)
             # Set to inference mode
     else:
         full_prompt = f"{question}\n\nThe image has been processed with GradCAM and classified as {pred_label} with confidence {confidence:.2f}. Focus on the highlighted regions in red/yellow which show the areas the detection model found suspicious."
+    try:
+        # Format the message to include all available images
+        message_content = [{"type": "text", "text": full_prompt}]
+        # Add original image
+        message_content.insert(0, {"type": "image", "image": image})
+        # Add GradCAM overlay
+        message_content.insert(1, {"type": "image", "image": gradcam_overlay})
+        # Add comparison image if available
+        if hasattr(st.session_state, 'comparison_image'):
+            message_content.insert(2, {"type": "image", "image": st.session_state.comparison_image})
+        messages = [{"role": "user", "content": message_content}]
+        # Apply chat template
+        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+        # Create list of images to process
+        image_list = [image, gradcam_overlay]
+        if hasattr(st.session_state, 'comparison_image'):
+            image_list.append(st.session_state.comparison_image)
+        try:
+            # Try with multiple images first
+            inputs = tokenizer(
+                image_list,
+                input_text,
+                add_special_tokens=False,
+                return_tensors="pt",
+            ).to(model.device)
+        except Exception as e:
+            st.warning(f"Multiple image analysis encountered an issue: {str(e)}")
+            st.info("Falling back to single image analysis")
+            # Fallback to single image
+            inputs = tokenizer(
+                image,
+                input_text,
+                add_special_tokens=False,
+                return_tensors="pt",
+            ).to(model.device)
+        # Fix cross-attention mask if needed
+        inputs = fix_cross_attention_mask(inputs)
+        # Generate response
+        with st.spinner("Generating detailed analysis... (this may take 15-30 seconds)"):
+            with torch.no_grad():
+                output_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=max_tokens,
+                    use_cache=True,
+                    temperature=temperature,
+                    top_p=0.9
+                )
+            # Decode the output
+            response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            # Try to extract just the model's response (after the prompt)
+            if full_prompt in response:
+                result = response.split(full_prompt)[-1].strip()
+            else:
+                result = response
+            return result
+    except Exception as e:
+        st.error(f"Error during LLM analysis: {str(e)}")
+        return f"Error analyzing image: {str(e)}"
 # Main app
 def main():
                 caption_text += f"\n\nGradCAM Analysis:\n{st.session_state.gradcam_caption}"
             # Default question with option to customize
+            default_question = f"This image has been classified as {{pred_label}}. Analyze all the provided images (original, GradCAM visualization, and comparison) to determine if this is a deepfake. Focus on highlighted areas in the GradCAM visualization. Provide both a technical explanation for experts and a simple explanation for non-technical users."
             # User input for new question
             new_question = st.text_area("Ask a question about the image:", value=default_question if not st.session_state.chat_history else "", height=100)
     # Footer
     st.markdown("---")
+    # Add model version indicator in sidebar
+    st.sidebar.info("Using deepfake-explainer-2 model")
 if __name__ == "__main__":
     main()