import gradio as gr
import logging
import time
import json
import csv
import io
from transformers import pipeline
from typing import Tuple, Optional, List, Dict
import traceback
from datetime import datetime
import re

# Configure logging for debugging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class LinguisticTranslationApp:
    def __init__(self):
        self.translators = {}
        self.translation_history = []
        self.load_models()
    
    def load_models(self):
        """Load translation models with error handling"""
        try:
            logger.info("Loading translation models...")
            self.translators['en_to_ss'] = pipeline(
                "translation", 
                model="dsfsi/en-ss-m2m100-combo", 
                src_lang="en", 
                tgt_lang="ss"
            )
            self.translators['ss_to_en'] = pipeline(
                "translation", 
                model="dsfsi/ss-en-m2m100-combo", 
                src_lang="ss", 
                tgt_lang="en"
            )
            logger.info("Models loaded successfully!")
        except Exception as e:
            logger.error(f"Error loading models: {str(e)}")
            raise e

    def analyze_text_complexity(self, text: str, lang: str) -> Dict:
        """Analyze linguistic features of the input text"""
        words = text.split()
        sentences = re.split(r'[.!?]+', text)
        sentences = [s.strip() for s in sentences if s.strip()]
        
        # Basic linguistic metrics
        analysis = {
            'character_count': len(text),
            'word_count': len(words),
            'sentence_count': len(sentences),
            'avg_word_length': sum(len(word) for word in words) / len(words) if words else 0,
            'avg_sentence_length': len(words) / len(sentences) if sentences else 0,
            'unique_words': len(set(word.lower() for word in words)),
            'lexical_diversity': len(set(word.lower() for word in words)) / len(words) if words else 0
        }
        
        # Language-specific features
        if lang == 'ss':  # Siswati
            # Check for common Siswati features
            analysis['potential_agglutination'] = sum(1 for word in words if len(word) > 10)
            analysis['click_consonants'] = sum(text.count(click) for click in ['c', 'q', 'x'])
            analysis['tone_markers'] = text.count('́') + text.count('̀')  # Acute and grave accents
        
        return analysis

    def translate_text(self, text: str, direction: str, save_to_history: bool = True) -> Tuple[str, str, bool, Dict]:
        """
        Translate text with comprehensive linguistic analysis
        
        Returns:
            Tuple[str, str, bool, Dict]: (translated_text, status_message, success, analysis)
        """
        if not text or not text.strip():
            return "", "⚠️ Please enter some text to translate", False, {}
        
        if not direction:
            return "", "⚠️ Please select a translation direction", False, {}
        
        # Input validation
        if len(text) > 2000:  # Increased limit for linguistic work
            return "", "⚠️ Text is too long. Please limit to 2000 characters.", False, {}
        
        try:
            start_time = time.time()
            
            # Determine source and target languages
            if direction == 'English → Siswati':
                translator = self.translators['en_to_ss']
                source_lang = "English"
                target_lang = "Siswati"
                source_code = "en"
                target_code = "ss"
            else:
                translator = self.translators['ss_to_en']
                source_lang = "Siswati"
                target_lang = "English"
                source_code = "ss"
                target_code = "en"
            
            logger.info(f"Translating from {source_lang} to {target_lang}")
            
            # Analyze source text
            source_analysis = self.analyze_text_complexity(text, source_code)
            
            # Perform translation
            result = translator(
                text, 
                max_length=512, 
                early_stopping=True,
                do_sample=False,
                num_beams=4  # Better quality for linguistic analysis
            )
            
            translation = result[0]['translation_text']
            
            # Analyze translated text
            target_analysis = self.analyze_text_complexity(translation, target_code)
            
            # Calculate processing time
            processing_time = time.time() - start_time
            
            # Linguistic comparison
            analysis = {
                'source': source_analysis,
                'target': target_analysis,
                'translation_ratio': len(translation) / len(text) if text else 0,
                'word_ratio': target_analysis['word_count'] / source_analysis['word_count'] if source_analysis['word_count'] else 0,
                'processing_time': processing_time,
                'timestamp': datetime.now().isoformat()
            }
            
            # Save to history for linguistic research
            if save_to_history:
                history_entry = {
                    'source_text': text,
                    'translated_text': translation,
                    'direction': direction,
                    'source_lang': source_lang,
                    'target_lang': target_lang,
                    'analysis': analysis,
                    'timestamp': datetime.now().isoformat()
                }
                self.translation_history.append(history_entry)
            
            # Success message with linguistic metadata
            status_msg = f"✅ Translation completed in {processing_time:.2f}s | Word ratio: {analysis['word_ratio']:.2f} | Character ratio: {analysis['translation_ratio']:.2f}"
            
            logger.info(f"Translation completed: {processing_time:.2f}s")
            
            return translation, status_msg, True, analysis
            
        except Exception as e:
            error_msg = f"❌ Translation failed: {str(e)}"
            logger.error(f"Translation error: {str(e)}")
            logger.error(traceback.format_exc())
            return "", error_msg, False, {}

    def batch_translate(self, text_list: List[str], direction: str) -> List[Dict]:
        """Translate multiple texts for corpus analysis"""
        results = []
        for i, text in enumerate(text_list):
            if text.strip():
                translation, status, success, analysis = self.translate_text(text, direction, False)
                results.append({
                    'index': i + 1,
                    'source': text,
                    'translation': translation,
                    'success': success,
                    'analysis': analysis
                })
        return results

    def export_history_csv(self) -> str:
        """Export translation history as CSV for linguistic analysis"""
        if not self.translation_history:
            return None
        
        output = io.StringIO()
        writer = csv.writer(output)
        
        # Headers
        writer.writerow([
            'Timestamp', 'Source Language', 'Target Language', 'Source Text', 
            'Translation', 'Source Words', 'Target Words', 'Word Ratio',
            'Source Characters', 'Target Characters', 'Character Ratio',
            'Lexical Diversity (Source)', 'Lexical Diversity (Target)',
            'Processing Time (s)'
        ])
        
        # Data rows
        for entry in self.translation_history:
            analysis = entry['analysis']
            writer.writerow([
                entry['timestamp'],
                entry['source_lang'],
                entry['target_lang'],
                entry['source_text'],
                entry['translated_text'],
                analysis['source']['word_count'],
                analysis['target']['word_count'],
                analysis['word_ratio'],
                analysis['source']['character_count'],
                analysis['target']['character_count'],
                analysis['translation_ratio'],
                analysis['source']['lexical_diversity'],
                analysis['target']['lexical_diversity'],
                analysis['processing_time']
            ])
        
        return output.getvalue()

# Initialize the app
app = LinguisticTranslationApp()

# Custom CSS for linguistic interface
custom_css = """
#logo {
    display: block;
    margin: 0 auto 20px auto;
}

.linguistic-panel {
    background: linear-gradient(135deg, #f0f9ff 0%, #e0f2fe 100%);
    border: 1px solid #0891b2;
    border-radius: 12px;
    padding: 20px;
    margin: 10px 0;
}

.analysis-metric {
    background: white;
    padding: 10px;
    border-radius: 8px;
    margin: 5px;
    border-left: 4px solid #0891b2;
}

.status-success {
    color: #059669 !important;
    font-weight: 500;
}

.status-error {
    color: #DC2626 !important;
    font-weight: 500;
}

.gradient-text {
    background: linear-gradient(45deg, #059669, #0891b2);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
}

.linguistic-header {
    text-align: center;
    margin-bottom: 30px;
    padding: 20px;
    background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
    border-radius: 16px;
    border: 1px solid #cbd5e1;
}

.comparison-grid {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 15px;
    margin: 15px 0;
}

.metric-card {
    background: white;
    padding: 15px;
    border-radius: 8px;
    border: 1px solid #e2e8f0;
    text-align: center;
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, title="Linguistic Translation Analysis Tool", theme=gr.themes.Soft()) as demo:
    
    # Header section
    with gr.Row():
        with gr.Column():
            gr.HTML("""
                <div class='linguistic-header'>
                    <h1 class='gradient-text' style='font-size: 2.5em; margin-bottom: 10px;'>
                        🔬 Siswati ⇄ English Linguistic Analysis Tool
                    </h1>
                    <p style='font-size: 1.1em; color: #475569; max-width: 800px; margin: 0 auto;'>
                        Advanced translation system with comprehensive linguistic analysis for researchers, 
                        linguists, and language documentation projects. Includes morphological insights, 
                        statistical analysis, and corpus management features.
                    </p>
                </div>
            """)
    
    # Main translation interface
    with gr.Row():
        with gr.Column(scale=2):
            # Input section
            with gr.Group():
                gr.HTML("<h3>📝 Translation Input</h3>")
                direction = gr.Radio(
                    choices=['English → Siswati', 'Siswati → English'], 
                    label="Translation Direction",
                    value='English → Siswati',
                    interactive=True
                )
                
                input_text = gr.Textbox(
                    lines=6,
                    placeholder="Enter your text here for linguistic analysis... (maximum 2000 characters)",
                    label="Source Text",
                    max_lines=12,
                    show_copy_button=True
                )
                
                char_count = gr.HTML("Character count: 0/2000")
                
                with gr.Row():
                    translate_btn = gr.Button("🔄 Translate & Analyze", variant="primary", size="lg")
                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
            
            # Output section
            with gr.Group():
                gr.HTML("<h3>✨ Translation Output</h3>")
                output_text = gr.Textbox(
                    label="Translation",
                    lines=6,
                    max_lines=12,
                    show_copy_button=True,
                    interactive=False
                )
                status_display = gr.HTML()
        
        # Linguistic analysis panel
        with gr.Column(scale=1):
            with gr.Group():
                gr.HTML("<h3>📊 Linguistic Analysis</h3>")
                
                # Real-time metrics
                with gr.Accordion("📈 Text Metrics", open=True):
                    metrics_display = gr.HTML("""
                        <div style='text-align: center; color: #64748b; padding: 20px;'>
                            <em>Translate text to see linguistic analysis</em>
                        </div>
                    """)
                
                # Language-specific features
                with gr.Accordion("🔍 Language Features", open=False):
                    features_display = gr.HTML("")
                
                # Translation quality indicators
                with gr.Accordion("⚖️ Translation Ratios", open=False):
                    ratios_display = gr.HTML("")
    
    # Batch processing section
    with gr.Accordion("📚 Batch Translation & Corpus Analysis", open=False):
        with gr.Row():
            with gr.Column():
                gr.HTML("<h4>Upload text file or enter multiple lines:</h4>")
                batch_input = gr.File(
                    label="Upload .txt file",
                    file_types=[".txt"],
                    type="filepath"
                )
                batch_text = gr.Textbox(
                    lines=8,
                    placeholder="Or paste multiple lines here (one per line)...",
                    label="Batch Text Input",
                    show_copy_button=True
                )
                batch_direction = gr.Radio(
                    choices=['English → Siswati', 'Siswati → English'], 
                    label="Batch Translation Direction",
                    value='English → Siswati'
                )
                batch_btn = gr.Button("🔄 Process Batch", variant="primary")
            
            with gr.Column():
                batch_results = gr.Dataframe(
                    headers=["Index", "Source", "Translation", "Words (S→T)", "Chars (S→T)"],
                    label="Batch Results",
                    interactive=False
                )
    
    # Research tools section
    with gr.Accordion("🔬 Research & Export Tools", open=False):
        with gr.Row():
            with gr.Column():
                gr.HTML("<h4>Translation History & Export</h4>")
                history_display = gr.Dataframe(
                    headers=["Timestamp", "Direction", "Source", "Translation"],
                    label="Translation History",
                    interactive=False
                )
                
                with gr.Row():
                    refresh_history_btn = gr.Button("🔄 Refresh History")
                    export_csv_btn = gr.Button("📊 Export CSV", variant="secondary")
                    clear_history_btn = gr.Button("🗑️ Clear History", variant="stop")
                
                csv_download = gr.File(label="Download CSV", visible=False)
            
            with gr.Column():
                gr.HTML("<h4>Linguistic Resources</h4>")
                gr.HTML("""
                    <div style='background: #f8fafc; padding: 20px; border-radius: 8px; border: 1px solid #e2e8f0;'>
                        <h5>📖 Siswati Language Notes:</h5>
                        <ul style='text-align: left; margin: 10px 0;'>
                            <li><strong>Script:</strong> Latin alphabet</li>
                            <li><strong>Family:</strong> Niger-Congo, Bantu</li>
                            <li><strong>Features:</strong> Agglutinative, click consonants</li>
                            <li><strong>Speakers:</strong> ~2.3 million (Eswatini, South Africa)</li>
                        </ul>
                        <h5>🔧 Research Features:</h5>
                        <ul style='text-align: left; margin: 10px 0;'>
                            <li>Morphological complexity analysis</li>
                            <li>Translation ratio tracking</li>
                            <li>Lexical diversity measurement</li>
                            <li>Batch processing for corpora</li>
                            <li>Export capabilities for further analysis</li>
                        </ul>
                    </div>
                """)
    
    # Examples for linguists
    with gr.Accordion("💡 Linguistic Examples", open=False):
        examples = gr.Examples(
            examples=[
                ["The child is playing with traditional toys.", "English → Siswati"],
                ["Umntfwana udlala ngetinsisimane tesintu.", "Siswati → English"],
                ["Agglutination demonstrates morphological complexity in Bantu languages.", "English → Siswati"],
                ["Lolimi lune-morphology leyinkimbinkimbi.", "Siswati → English"],
                ["What are the phonological features of this language?", "English → Siswati"],
                ["Yini tinchubo te-phonology talolimi?", "Siswati → English"],
            ],
            inputs=[input_text, direction],
            label="Click examples to analyze linguistic features:"
        )
    
    # Footer
    with gr.Row():
        with gr.Column():
            gr.HTML("""
                <div style='text-align: center; margin-top: 40px; padding: 30px; border-top: 1px solid #E5E7EB; background: #f8fafc;'>
                    <div style='margin-bottom: 20px;'>
                        <a href='https://github.com/dsfsi/en-ss-m2m100-combo' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>📁 En→Ss Model Repository</a>
                        <a href='https://github.com/dsfsi/ss-en-m2m100-combo' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>📁 Ss→En Model Repository</a>
                        <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>💬 Research Feedback</a>
                    </div>
                    <div style='color: #475569; font-size: 0.95em;'>
                        <strong>Research Team:</strong> Vukosi Marivate, Richard Lastrucci<br>
                        <em>Supporting African language documentation and computational linguistics research</em><br>
                        <small style='color: #64748b; margin-top: 10px; display: block;'>
                            For academic use: Please cite the original models in your publications
                        </small>
                    </div>
                </div>
            """)

    # Event handlers
    def update_char_count(text):
        count = len(text) if text else 0
        color = "#DC2626" if count > 2000 else "#059669" if count > 1600 else "#64748b"
        return f"<span style='color: {color}; font-weight: 500;'>Character count: {count}/2000</span>"
    
    def clear_all():
        return "", "", "Character count: 0/2000", "", "", "", ""
    
    def translate_with_analysis(text, direction):
        translation, status, success, analysis = app.translate_text(text, direction)
        status_html = f"<div class='{'status-success' if success else 'status-error'}'>{status}</div>"
        
        if success and analysis:
            # Create metrics display
            source_metrics = analysis['source']
            target_metrics = analysis['target']
            
            metrics_html = f"""
            <div class='comparison-grid'>
                <div class='metric-card'>
                    <h5>📊 Source Text</h5>
                    <p><strong>Words:</strong> {source_metrics['word_count']}</p>
                    <p><strong>Characters:</strong> {source_metrics['character_count']}</p>
                    <p><strong>Sentences:</strong> {source_metrics['sentence_count']}</p>
                    <p><strong>Lexical Diversity:</strong> {source_metrics['lexical_diversity']:.3f}</p>
                </div>
                <div class='metric-card' style='border-left: 4px solid #059669;'>
                    <h5>📊 Translation</h5>  
                    <p><strong>Words:</strong> {target_metrics['word_count']}</p>
                    <p><strong>Characters:</strong> {target_metrics['character_count']}</p>
                    <p><strong>Sentences:</strong> {target_metrics['sentence_count']}</p>
                    <p><strong>Lexical Diversity:</strong> {target_metrics['lexical_diversity']:.3f}</p>
                </div>
            </div>
            """
            
            # Language features
            features_html = ""
            if 'potential_agglutination' in source_metrics:
                features_html = f"""
                <div class='analysis-metric'>
                    <h5>🔍 Siswati Features Detected:</h5>
                    <p><strong>Potential agglutinated words:</strong> {source_metrics['potential_agglutination']}</p>
                    <p><strong>Click consonants (c,q,x):</strong> {source_metrics['click_consonants']}</p>
                    <p><strong>Tone markers:</strong> {source_metrics['tone_markers']}</p>
                </div>
                """
            
            # Translation ratios
            ratios_html = f"""
            <div class='analysis-metric'>
                <h5>⚖️ Translation Ratios:</h5>
                <p><strong>Word ratio:</strong> {analysis['word_ratio']:.3f}</p>
                <p><strong>Character ratio:</strong> {analysis['translation_ratio']:.3f}</p>
                <p><strong>Processing time:</strong> {analysis['processing_time']:.3f}s</p>
            </div>
            """
            
            return translation, status_html, metrics_html, features_html, ratios_html
        
        return translation, status_html, "", "", ""
    
    def process_batch(file_path, batch_text, direction):
        texts = []
        
        if file_path:
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    texts = [line.strip() for line in f.readlines() if line.strip()]
            except Exception as e:
                return [[f"Error reading file: {str(e)}", "", "", "", ""]]
        elif batch_text:
            texts = [line.strip() for line in batch_text.split('\n') if line.strip()]
        
        if not texts:
            return [["No text provided", "", "", "", ""]]
        
        results = app.batch_translate(texts, direction)
        
        # Format for display
        display_data = []
        for r in results:
            if r['success']:
                word_ratio = f"{r['analysis']['source']['word_count']}→{r['analysis']['target']['word_count']}"
                char_ratio = f"{r['analysis']['source']['character_count']}→{r['analysis']['target']['character_count']}"
            else:
                word_ratio = "Error"
                char_ratio = "Error"
            
            display_data.append([
                r['index'],
                r['source'][:50] + "..." if len(r['source']) > 50 else r['source'],
                r['translation'][:50] + "..." if len(r['translation']) > 50 else r['translation'],
                word_ratio,
                char_ratio
            ])
        
        return display_data
    
    def get_history():
        if not app.translation_history:
            return []
        
        return [[
            entry['timestamp'][:19],  # Remove microseconds
            entry['direction'],
            entry['source_text'][:50] + "..." if len(entry['source_text']) > 50 else entry['source_text'],
            entry['translated_text'][:50] + "..." if len(entry['translated_text']) > 50 else entry['translated_text']
        ] for entry in app.translation_history[-20:]]  # Show last 20
    
    def export_csv():
        csv_content = app.export_history_csv()
        if csv_content:
            filename = f"translation_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
            return gr.File.update(value=csv_content, visible=True, label=f"📊 {filename}")
        return gr.File.update(visible=False)
    
    def clear_history():
        app.translation_history = []
        return []
    
    # Wire up events
    input_text.change(fn=update_char_count, inputs=input_text, outputs=char_count)
    
    translate_btn.click(
        fn=translate_with_analysis,
        inputs=[input_text, direction],
        outputs=[output_text, status_display, metrics_display, features_display, ratios_display]
    )
    
    clear_btn.click(
        fn=clear_all,
        outputs=[input_text, output_text, char_count, status_display, metrics_display, features_display, ratios_display]
    )
    
    batch_btn.click(
        fn=process_batch,
        inputs=[batch_input, batch_text, batch_direction],
        outputs=batch_results
    )
    
    refresh_history_btn.click(fn=get_history, outputs=history_display)
    export_csv_btn.click(fn=export_csv, outputs=csv_download)
    clear_history_btn.click(fn=clear_history, outputs=history_display)
    
    # Auto-translate on Enter
    input_text.submit(
        fn=translate_with_analysis,
        inputs=[input_text, direction],
        outputs=[output_text, status_display, metrics_display, features_display, ratios_display]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        debug=True
    )