import torch import torch.nn as nn from PIL import Image import gradio as gr import os import numpy as np from transformers import CLIPModel, CLIPProcessor class C2P_CLIP(nn.Module): def __init__(self, name='openai/clip-vit-large-patch14', num_classes=1): super(C2P_CLIP, self).__init__() self.model = CLIPModel.from_pretrained(name) del self.model.text_model del self.model.text_projection del self.model.logit_scale self.model.vision_model.requires_grad_(False) self.model.visual_projection.requires_grad_(False) self.model.fc = nn.Linear(768, num_classes) torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02) # Create processor for image preprocessing self.processor = CLIPProcessor.from_pretrained(name) def encode_image(self, img): # Updated to handle different argument expectations vision_outputs = self.model.vision_model( pixel_values=img, # Removed problematic arguments for compatibility ) # Check if output is a tuple or an object with hidden states if isinstance(vision_outputs, tuple): pooled_output = vision_outputs[1] # pooled_output else: # Handle the case where output is an object pooled_output = vision_outputs.pooler_output image_features = self.model.visual_projection(pooled_output) return image_features def forward(self, img): image_embeds = self.encode_image(img) image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True) return self.model.fc(image_embeds) # Initialize model with cache directory model_path = "model/C2P_CLIP_release_20240901.pth" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Create model cache directory if it doesn't exist os.makedirs("model", exist_ok=True) # Download the model if it doesn't exist if not os.path.exists(model_path): print("Downloading model for the first time...") model_url = "https://www.now61.com/f/95OefW/C2P_CLIP_release_20240901.zip" try: state_dict = torch.hub.load_state_dict_from_url( model_url, map_location="cpu", progress=True, file_name="C2P_CLIP_release_20240901.pth", check_hash=False, model_dir="model" ) # Save model for future use torch.save(state_dict, model_path) except Exception as e: print(f"Error downloading model: {e}") # Initialize model def load_model(): print("Loading model...") model = C2P_CLIP(name='openai/clip-vit-large-patch14', num_classes=1) try: state_dict = torch.load(model_path, map_location=device) model.load_state_dict(state_dict, strict=False) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") model = model.to(device) model.eval() return model # Global model instance model = load_model() processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14') def analyze_image(image): """Process an image and return deepfake detection results""" if image is None: return None, "Please upload an image.", None try: # Ensure image is in RGB mode if isinstance(image, np.ndarray): image = Image.fromarray(image).convert("RGB") else: image = image.convert("RGB") # Process the image inputs = processor(images=image, return_tensors="pt").to(device) # Run inference with torch.no_grad(): prediction = model(inputs.pixel_values).sigmoid().item() # Create visual output # Add a colored border based on the prediction border_color = (255, 0, 0) if prediction > 0.5 else (0, 255, 0) # Red for fake, green for real border_width = 10 # Create a new image with border bordered_image = Image.new('RGB', (image.width + 2*border_width, image.height + 2*border_width), border_color) bordered_image.paste(image, (border_width, border_width)) # Create result text if prediction > 0.5: result = "FAKE (AI-generated or manipulated)" confidence = prediction else: result = "REAL (authentic)" confidence = 1 - prediction confidence_text = f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)" return bordered_image, result, confidence_text except Exception as e: import traceback error_msg = f"Error analyzing image: {str(e)}" traceback.print_exc() return image, "Error", error_msg # Create Gradio interface title = "C2P-CLIP Deepfake Detector" description = """
C2P-CLIP: Deepfake Detection with Enhanced Generalization
This app uses the C2P-CLIP model to detect if an image is real or AI-generated/manipulated. How to use: 1. Upload an image or use one of the examples 2. The model will analyze and show if it's likely real or fake 3. A colored border will be added (green = real, red = fake) Limitations: - The model provides a binary classification (real/fake) without detailed explanation - No localization of manipulated regions - Performance may vary across different types of manipulations """ # Example images examples = [ ["examples/real1.jpg"], ["examples/fake1.jpg"], ] # Create example directory if it doesn't exist os.makedirs("examples", exist_ok=True) interface = gr.Interface( fn=analyze_image, inputs=gr.Image(type="pil", label="Upload Image"), outputs=[ gr.Image(type="pil", label="Analyzed Image"), gr.Textbox(label="Result"), gr.Textbox(label="Confidence"), ], title=title, description=description, examples=examples if all(os.path.exists(ex[0]) for ex in examples) else None, allow_flagging="never", theme=gr.themes.Soft(), ) # Launch the app if __name__ == "__main__": interface.launch()