import torch from transformers import BartTokenizer, BartForConditionalGeneration import gradio as gr import os def clean_text(text: str) -> str: """Remove unwanted characters from the text.""" replacements = { '!': '', '?': '.', '(': '', ')': '', ':': '', '/': '', '\\': '', '\n': ' ' } for old_char, new_char in replacements.items(): text = text.replace(old_char, new_char) return text def chunk_text(article: str, max_chunk_size: int = 500) -> list[str]: """Chunk the text into smaller parts based on the specified chunk size.""" chunks = [] current_words = 0 # Split text into sentences for sentence in article.split('. '): current_words += len(sentence.split()) if current_words > max_chunk_size * (len(chunks) + 1): chunks.append('') if chunks: # If there is at least one chunk chunks[-1] += sentence + '. ' else: chunks.append(sentence + '. ') return [chunk.strip() for chunk in chunks] def summarize_text(text: str, max_chunk_size: int = 400, max_length: int = 130) -> str: """Summarize the input text using a pre-trained model.""" if not text.strip(): # Handle empty input return "Please provide some text to summarize." cleaned_text = clean_text(text) chunks = chunk_text(cleaned_text, max_chunk_size) # Load the BART model and tokenizer tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") summary = "" for chunk in chunks: inputs = tokenizer.encode("summarize: " + chunk, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model.generate(inputs, max_length=max_length, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True) summary += tokenizer.decode(summary_ids[0], skip_special_tokens=True) + "\n" return summary.strip() def load_texts(file_paths: list[str]) -> list[str]: """Load text content from a list of file paths.""" texts = [] for path in file_paths: try: with open(path, 'r', encoding='utf-8') as file: texts.append(file.read()) except FileNotFoundError: print(f"File not found: {path}") texts.append("") # Append an empty string if file is not found return texts def main(): # Load example texts ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) example_paths = [ os.path.join(ROOT_DIR, r'./texts/sample1.txt'), os.path.join(ROOT_DIR, r'./texts/sample2.txt') ] example_texts = load_texts(example_paths) gr.Interface( title="Text Summarizer", fn=summarize_text, inputs=[ gr.TextArea(label='Input Text', lines=3, max_lines=7, placeholder="Enter text here...", max_length=5000), gr.Slider(50, 500, step=10, value=400, label="Max Chunk Size", info="Choose between 50 and 500"), gr.Slider(30, 150, step=10, value=130, label="Max Length of Summary", info="Choose between 30 and 150") ], outputs=gr.Textbox(label="Summary"), examples=example_texts, theme="default", css=".footer{display:none !important}" ).launch(share=True, debug=True) if __name__ == '__main__': main()