import torch
import torch.nn as nn
from PIL import Image
import gradio as gr
import os
import numpy as np
from transformers import CLIPModel, CLIPProcessor

class C2P_CLIP(nn.Module):
    def __init__(self, name='openai/clip-vit-large-patch14', num_classes=1):
        super(C2P_CLIP, self).__init__()
        self.model = CLIPModel.from_pretrained(name)
        del self.model.text_model
        del self.model.text_projection
        del self.model.logit_scale
        
        self.model.vision_model.requires_grad_(False)
        self.model.visual_projection.requires_grad_(False)
        self.model.fc = nn.Linear(768, num_classes)
        torch.nn.init.normal_(self.model.fc.weight.data, 0.0, 0.02)
        
        # Create processor for image preprocessing
        self.processor = CLIPProcessor.from_pretrained(name)

    def encode_image(self, img):
        # Updated to handle different argument expectations
        vision_outputs = self.model.vision_model(
            pixel_values=img,
            # Removed problematic arguments for compatibility
        )
        # Check if output is a tuple or an object with hidden states
        if isinstance(vision_outputs, tuple):
            pooled_output = vision_outputs[1]  # pooled_output
        else:
            # Handle the case where output is an object
            pooled_output = vision_outputs.pooler_output
            
        image_features = self.model.visual_projection(pooled_output)
        return image_features    

    def forward(self, img):
        image_embeds = self.encode_image(img)
        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
        return self.model.fc(image_embeds)

# Initialize model with cache directory
model_path = "model/C2P_CLIP_release_20240901.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create model cache directory if it doesn't exist
os.makedirs("model", exist_ok=True)

# Download the model if it doesn't exist
if not os.path.exists(model_path):
    print("Downloading model for the first time...")
    model_url = "https://www.now61.com/f/95OefW/C2P_CLIP_release_20240901.zip"
    try:
        state_dict = torch.hub.load_state_dict_from_url(
            model_url, map_location="cpu", progress=True, 
            file_name="C2P_CLIP_release_20240901.pth",
            check_hash=False, model_dir="model"
        )
        # Save model for future use
        torch.save(state_dict, model_path)
    except Exception as e:
        print(f"Error downloading model: {e}")

# Initialize model
def load_model():
    print("Loading model...")
    model = C2P_CLIP(name='openai/clip-vit-large-patch14', num_classes=1)
    
    try:
        state_dict = torch.load(model_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {e}")
    
    model = model.to(device)
    model.eval()
    return model

# Global model instance
model = load_model()
processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')

def analyze_image(image):
    """Process an image and return deepfake detection results"""
    if image is None:
        return None, "Please upload an image.", None
    
    try:
        # Ensure image is in RGB mode
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image).convert("RGB")
        else:
            image = image.convert("RGB")
        
        # Process the image
        inputs = processor(images=image, return_tensors="pt").to(device)
        
        # Run inference
        with torch.no_grad():
            prediction = model(inputs.pixel_values).sigmoid().item()
        
        # Create visual output
        # Add a colored border based on the prediction
        border_color = (255, 0, 0) if prediction > 0.5 else (0, 255, 0)  # Red for fake, green for real
        border_width = 10
        
        # Create a new image with border
        bordered_image = Image.new('RGB', (image.width + 2*border_width, image.height + 2*border_width), border_color)
        bordered_image.paste(image, (border_width, border_width))
        
        # Create result text
        if prediction > 0.5:
            result = "FAKE (AI-generated or manipulated)"
            confidence = prediction
        else:
            result = "REAL (authentic)"
            confidence = 1 - prediction
            
        confidence_text = f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)"
        
        return bordered_image, result, confidence_text
    
    except Exception as e:
        import traceback
        error_msg = f"Error analyzing image: {str(e)}"
        traceback.print_exc()
        return image, "Error", error_msg

# Create Gradio interface
title = "C2P-CLIP Deepfake Detector"
description = """
<p style='text-align: center'>
<b>C2P-CLIP: Deepfake Detection with Enhanced Generalization</b>
</p>

This app uses the C2P-CLIP model to detect if an image is real or AI-generated/manipulated.

<b>How to use:</b>
1. Upload an image or use one of the examples
2. The model will analyze and show if it's likely real or fake
3. A colored border will be added (green = real, red = fake)

<b>Limitations:</b>
- The model provides a binary classification (real/fake) without detailed explanation
- No localization of manipulated regions
- Performance may vary across different types of manipulations
"""

# Example images
examples = [
    ["examples/real1.jpg"],
    ["examples/fake1.jpg"],
]

# Create example directory if it doesn't exist
os.makedirs("examples", exist_ok=True)

interface = gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[
        gr.Image(type="pil", label="Analyzed Image"),
        gr.Textbox(label="Result"),
        gr.Textbox(label="Confidence"),
    ],
    title=title,
    description=description,
    examples=examples if all(os.path.exists(ex[0]) for ex in examples) else None,
    allow_flagging="never",
    theme=gr.themes.Soft(),
)

# Launch the app
if __name__ == "__main__":
    interface.launch()