import torch
from transformers import BartTokenizer, BartForConditionalGeneration
import gradio as gr
import os

def clean_text(text: str) -> str:
    """Remove unwanted characters from the text."""
    replacements = {
        '!': '',
        '?': '.',
        '(': '',
        ')': '',
        ':': '',
        '/': '',
        '\\': '',
        '\n': ' '
    }
    
    for old_char, new_char in replacements.items():
        text = text.replace(old_char, new_char)
    
    return text

def chunk_text(article: str, max_chunk_size: int = 500) -> list[str]:
    """Chunk the text into smaller parts based on the specified chunk size."""
    chunks = []
    current_words = 0
    
    # Split text into sentences
    for sentence in article.split('. '):
        current_words += len(sentence.split())
        
        if current_words > max_chunk_size * (len(chunks) + 1):
            chunks.append('')
        
        if chunks:  # If there is at least one chunk
            chunks[-1] += sentence + '. '
        else:
            chunks.append(sentence + '. ')
    
    return [chunk.strip() for chunk in chunks]

def summarize_text(text: str, max_chunk_size: int = 400, max_length: int = 130) -> str:
    """Summarize the input text using a pre-trained model."""
    if not text.strip():  # Handle empty input
        return "Please provide some text to summarize."
    
    cleaned_text = clean_text(text)
    chunks = chunk_text(cleaned_text, max_chunk_size)
    
    # Load the BART model and tokenizer
    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
    model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

    summary = ""
    for chunk in chunks:
        inputs = tokenizer.encode("summarize: " + chunk, return_tensors="pt", max_length=1024, truncation=True)
        summary_ids = model.generate(inputs, max_length=max_length, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary += tokenizer.decode(summary_ids[0], skip_special_tokens=True) + "\n"

    return summary.strip()

def load_texts(file_paths: list[str]) -> list[str]:
    """Load text content from a list of file paths."""
    texts = []
    for path in file_paths:
        try:
            with open(path, 'r', encoding='utf-8') as file:
                texts.append(file.read())
        except FileNotFoundError:
            print(f"File not found: {path}")
            texts.append("")  # Append an empty string if file is not found
    return texts

def main():
    # Load example texts
    ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
    
    example_paths = [
        os.path.join(ROOT_DIR, r'./texts/sample1.txt'), 
        os.path.join(ROOT_DIR, r'./texts/sample2.txt')
    ]
    example_texts = load_texts(example_paths)

    gr.Interface(
        title="Text Summarizer",
        fn=summarize_text, 
        inputs=[
            gr.TextArea(label='Input Text', lines=3, max_lines=7, placeholder="Enter text here...", max_length=5000),
            gr.Slider(50, 500, step=10, value=400, label="Max Chunk Size", info="Choose between 50 and 500"),
            gr.Slider(30, 150, step=10, value=130, label="Max Length of Summary", info="Choose between 30 and 150")
        ],
        outputs=gr.Textbox(label="Summary"),  
        examples=example_texts,
        theme="default",
        css=".footer{display:none !important}"
    ).launch(share=True, debug=True)

if __name__ == '__main__':
    main()