Spaces:

reagvis
/

clip-deepfakedetector

Sleeping

App Files Files Community

reagvis commited on Aug 14

Commit

b024686

verified ·

1 Parent(s): f405593

Create app.py

Browse files

Files changed (1) hide show

app.py +181 -0

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import torch
+import torch.nn as nn
+from PIL import Image
+import gradio as gr
+import os
+import numpy as np
+from transformers import CLIPModel, CLIPProcessor
+class C2P_CLIP(nn.Module):
+    def __init__(self, name='openai/clip-vit-large-patch14', num_classes=1):
+        super(C2P_CLIP, self).__init__()
+        self.model = CLIPModel.from_pretrained(name)
+        del self.model.text_model
+        del self.model.text_projection
+        del self.model.logit_scale
+        self.model.vision_model.requires_grad_(False)
+        self.model.visual_projection.requires_grad_(False)
+        self.model.fc = nn.Linear(768, num_classes)
+        torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02)
+        # Create processor for image preprocessing
+        self.processor = CLIPProcessor.from_pretrained(name)
+    def encode_image(self, img):
+        # Updated to handle different argument expectations
+        vision_outputs = self.model.vision_model(
+            pixel_values=img,
+            # Removed problematic arguments for compatibility
+        )
+        # Check if output is a tuple or an object with hidden states
+        if isinstance(vision_outputs, tuple):
+            pooled_output = vision_outputs[1]  # pooled_output
+        else:
+            # Handle the case where output is an object
+            pooled_output = vision_outputs.pooler_output
+        image_features = self.model.visual_projection(pooled_output)
+        return image_features
+    def forward(self, img):
+        image_embeds = self.encode_image(img)
+        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
+        return self.model.fc(image_embeds)
+# Initialize model with cache directory
+model_path = "model/C2P_CLIP_release_20240901.pth"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Create model cache directory if it doesn't exist
+os.makedirs("model", exist_ok=True)
+# Download the model if it doesn't exist
+if not os.path.exists(model_path):
+    print("Downloading model for the first time...")
+    model_url = "https://www.now61.com/f/95OefW/C2P_CLIP_release_20240901.zip"
+    try:
+        state_dict = torch.hub.load_state_dict_from_url(
+            model_url, map_location="cpu", progress=True,
+            file_name="C2P_CLIP_release_20240901.pth",
+            check_hash=False, model_dir="model"
+        )
+        # Save model for future use
+        torch.save(state_dict, model_path)
+    except Exception as e:
+        print(f"Error downloading model: {e}")
+# Initialize model
+def load_model():
+    print("Loading model...")
+    model = C2P_CLIP(name='openai/clip-vit-large-patch14', num_classes=1)
+    try:
+        state_dict = torch.load(model_path, map_location=device)
+        model.load_state_dict(state_dict, strict=False)
+        print("Model loaded successfully!")
+    except Exception as e:
+        print(f"Error loading model: {e}")
+    model = model.to(device)
+    model.eval()
+    return model
+# Global model instance
+model = load_model()
+processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
+def analyze_image(image):
+    """Process an image and return deepfake detection results"""
+    if image is None:
+        return None, "Please upload an image.", None
+    try:
+        # Ensure image is in RGB mode
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image).convert("RGB")
+        else:
+            image = image.convert("RGB")
+        # Process the image
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        # Run inference
+        with torch.no_grad():
+            prediction = model(inputs.pixel_values).sigmoid().item()
+        # Create visual output
+        # Add a colored border based on the prediction
+        border_color = (255, 0, 0) if prediction > 0.5 else (0, 255, 0)  # Red for fake, green for real
+        border_width = 10
+        # Create a new image with border
+        bordered_image = Image.new('RGB', (image.width + 2*border_width, image.height + 2*border_width), border_color)
+        bordered_image.paste(image, (border_width, border_width))
+        # Create result text
+        if prediction > 0.5:
+            result = "FAKE (AI-generated or manipulated)"
+            confidence = prediction
+        else:
+            result = "REAL (authentic)"
+            confidence = 1 - prediction
+        confidence_text = f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)"
+        return bordered_image, result, confidence_text
+    except Exception as e:
+        import traceback
+        error_msg = f"Error analyzing image: {str(e)}"
+        traceback.print_exc()
+        return image, "Error", error_msg
+# Create Gradio interface
+title = "C2P-CLIP Deepfake Detector"
+description = """
+<p style='text-align: center'>
+<b>C2P-CLIP: Deepfake Detection with Enhanced Generalization</b>
+</p>
+This app uses the C2P-CLIP model to detect if an image is real or AI-generated/manipulated.
+<b>How to use:</b>
+1. Upload an image or use one of the examples
+2. The model will analyze and show if it's likely real or fake
+3. A colored border will be added (green = real, red = fake)
+<b>Limitations:</b>
+- The model provides a binary classification (real/fake) without detailed explanation
+- No localization of manipulated regions
+- Performance may vary across different types of manipulations
+"""
+# Example images
+examples = [
+    ["examples/real1.jpg"],
+    ["examples/fake1.jpg"],
+]
+# Create example directory if it doesn't exist
+os.makedirs("examples", exist_ok=True)
+interface = gr.Interface(
+    fn=analyze_image,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=[
+        gr.Image(type="pil", label="Analyzed Image"),
+        gr.Textbox(label="Result"),
+        gr.Textbox(label="Confidence"),
+    ],
+    title=title,
+    description=description,
+    examples=examples if all(os.path.exists(ex[0]) for ex in examples) else None,
+    allow_flagging="never",
+    theme=gr.themes.Soft(),
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch()