Spaces:

Thadillo
/

participatory-planner

Sleeping

thadillo Claude commited on 23 days ago

Commit

e60b22c

1 Parent(s): af68c84

Fix HuggingFace deployment errors: database locking, matplotlib permissions, and deprecation warnings

This commit addresses three critical issues encountered on HuggingFace Spaces:

1. **Fixed TRANSFORMERS_CACHE deprecation warning**
- Removed deprecated TRANSFORMERS_CACHE environment variable
- Using only HF_HOME as recommended by transformers v5

2. **Fixed matplotlib permission errors**
- Added MPLCONFIGDIR=/tmp/matplotlib in Dockerfile
- Set config directory in pdf_export.py before matplotlib import
- Prevents "Permission denied: /.config" errors on HuggingFace

3. **Fixed SQLite database locking errors (CRITICAL)**
- Optimized DELETE operations with synchronize_session=False
- Added retry logic with exponential backoff (3 retries)
- Implemented batch commits (every 10 submissions) to reduce lock duration
- Increased SQLite timeouts from 30s to 60s
- Increased PRAGMA busy_timeout from 30000ms to 60000ms
- Better transaction isolation and error handling

These changes significantly improve concurrent request handling on HuggingFace Spaces
and eliminate the "database is locked" errors during sentence-level analysis.

Files modified:
- Dockerfile: Environment variables and matplotlib config
- app/__init__.py: Increased SQLite timeouts
- app/routes/admin.py: Optimized analyze_submissions with retry logic
- app/utils/pdf_export.py: Matplotlib config directory

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (5) hide show

Dockerfile +4 -2
app/__init__.py +2 -2
app/routes/admin.py +73 -36
app/utils/pdf_export.py +4 -0
app/utils/pdf_export.py.backup +336 -0

Dockerfile CHANGED Viewed

@@ -40,9 +40,12 @@ RUN mkdir -p /data/models/finetuned && chmod -R 777 /data/models
 # Create model cache in container (not in /data) to save persistent storage
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
 # Pre-download models into container image to avoid using /data storage
 ENV HF_HOME=/app/.cache/huggingface
-ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
 # Download zero-shot models (for immediate analysis capability)
 # These are loaded on first analysis, pre-downloading saves time and /data space
@@ -65,7 +68,6 @@ ENV PORT=7860
 ENV DATABASE_PATH=/data/app.db
 # Keep model cache in container, only store database and fine-tuned models in /data
 ENV HF_HOME=/app/.cache/huggingface
-ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
 ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface
 # Health check

 # Create model cache in container (not in /data) to save persistent storage
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
+# Create matplotlib config directory (prevent permission errors)
+RUN mkdir -p /tmp/matplotlib && chmod 777 /tmp/matplotlib
+ENV MPLCONFIGDIR=/tmp/matplotlib
 # Pre-download models into container image to avoid using /data storage
 ENV HF_HOME=/app/.cache/huggingface
 # Download zero-shot models (for immediate analysis capability)
 # These are loaded on first analysis, pre-downloading saves time and /data space
 ENV DATABASE_PATH=/data/app.db
 # Keep model cache in container, only store database and fine-tuned models in /data
 ENV HF_HOME=/app/.cache/huggingface
 ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface
 # Health check

app/__init__.py CHANGED Viewed

@@ -32,7 +32,7 @@ def create_app():
     # SQLite-specific settings to reduce locking issues
     app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
         'connect_args': {
-            'timeout': 30,  # Increase timeout to 30 seconds
             'check_same_thread': False  # Allow multi-threaded access
         },
         'pool_pre_ping': True,  # Verify connections before using
@@ -51,7 +51,7 @@ def create_app():
             cursor = dbapi_conn.cursor()
             cursor.execute("PRAGMA journal_mode=WAL")  # Write-Ahead Logging
             cursor.execute("PRAGMA synchronous=NORMAL")  # Balance safety/performance
-            cursor.execute("PRAGMA busy_timeout=30000")  # 30 second timeout
             cursor.close()
     # Import models

     # SQLite-specific settings to reduce locking issues
     app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
         'connect_args': {
+            'timeout': 60,  # Increase timeout to 60 seconds for HuggingFace
             'check_same_thread': False  # Allow multi-threaded access
         },
         'pool_pre_ping': True,  # Verify connections before using
             cursor = dbapi_conn.cursor()
             cursor.execute("PRAGMA journal_mode=WAL")  # Write-Ahead Logging
             cursor.execute("PRAGMA synchronous=NORMAL")  # Balance safety/performance
+            cursor.execute("PRAGMA busy_timeout=60000")  # 60 second timeout for HuggingFace
             cursor.close()
     # Import models

app/routes/admin.py CHANGED Viewed

@@ -594,6 +594,9 @@ def delete_submission(submission_id):
 @bp.route('/api/analyze', methods=['POST'])
 @admin_required
 def analyze_submissions():
     data = request.json
     analyze_all = data.get('analyze_all', False)
     use_sentences = data.get('use_sentences', True)  # NEW: sentence-level flag (default: True)
@@ -616,45 +619,79 @@ def analyze_submissions():
     success_count = 0
     error_count = 0
-    for submission in to_analyze:
-        try:
-            if use_sentences:
-                # NEW: Sentence-level analysis
-                sentence_results = analyzer.analyze_with_sentences(submission.message)
-                # Clear old sentences for this submission
-                SubmissionSentence.query.filter_by(submission_id=submission.id).delete()
-                # Create new sentence records
-                for idx, result in enumerate(sentence_results):
-                    sentence = SubmissionSentence(
-                        submission_id=submission.id,
-                        sentence_index=idx,
-                        text=result['text'],
-                        category=result['category'],
-                        confidence=result.get('confidence')
-                    )
-                    db.session.add(sentence)
-                submission.sentence_analysis_done = True
-                # Set primary category for backward compatibility
-                submission.category = submission.get_primary_category()
-                logger.info(f"Analyzed submission {submission.id} into {len(sentence_results)} sentences")
-            else:
-                # OLD: Submission-level analysis (backward compatible)
-                category = analyzer.analyze(submission.message)
-                submission.category = category
-            success_count += 1
-        except Exception as e:
-            logger.error(f"Error analyzing submission {submission.id}: {e}")
-            error_count += 1
-            continue
-    db.session.commit()
     return jsonify({
         'success': True,

 @bp.route('/api/analyze', methods=['POST'])
 @admin_required
 def analyze_submissions():
+    import time
+    from sqlalchemy.exc import OperationalError
     data = request.json
     analyze_all = data.get('analyze_all', False)
     use_sentences = data.get('use_sentences', True)  # NEW: sentence-level flag (default: True)
     success_count = 0
     error_count = 0
+    batch_size = 10  # Commit every 10 submissions to reduce lock time
+    for idx, submission in enumerate(to_analyze):
+        max_retries = 3
+        retry_delay = 1  # seconds
+        for attempt in range(max_retries):
+            try:
+                if use_sentences:
+                    # NEW: Sentence-level analysis
+                    sentence_results = analyzer.analyze_with_sentences(submission.message)
+                    # Optimized DELETE: Use synchronize_session=False for better performance
+                    SubmissionSentence.query.filter_by(submission_id=submission.id).delete(synchronize_session=False)
+                    # Create new sentence records
+                    for sent_idx, result in enumerate(sentence_results):
+                        sentence = SubmissionSentence(
+                            submission_id=submission.id,
+                            sentence_index=sent_idx,
+                            text=result['text'],
+                            category=result['category'],
+                            confidence=result.get('confidence')
+                        )
+                        db.session.add(sentence)
+                    submission.sentence_analysis_done = True
+                    # Set primary category for backward compatibility
+                    submission.category = submission.get_primary_category()
+                    logger.info(f"Analyzed submission {submission.id} into {len(sentence_results)} sentences")
+                else:
+                    # OLD: Submission-level analysis (backward compatible)
+                    category = analyzer.analyze(submission.message)
+                    submission.category = category
+                success_count += 1
+                # Commit in batches to reduce lock duration
+                if (idx + 1) % batch_size == 0:
+                    db.session.commit()
+                    logger.info(f"Committed batch of {batch_size} submissions")
+                break  # Success, exit retry loop
+            except OperationalError as e:
+                # Database locked error - retry with exponential backoff
+                if 'database is locked' in str(e) and attempt < max_retries - 1:
+                    db.session.rollback()
+                    wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
+                    logger.warning(f"Database locked for submission {submission.id}, retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})")
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    # Max retries reached or different error
+                    db.session.rollback()
+                    logger.error(f"Error analyzing submission {submission.id}: {e}")
+                    error_count += 1
+                    break
+            except Exception as e:
+                db.session.rollback()
+                logger.error(f"Error analyzing submission {submission.id}: {e}")
+                error_count += 1
+                break
+    # Final commit for remaining items
+    try:
+        db.session.commit()
+        logger.info(f"Final commit completed")
+    except Exception as e:
+        db.session.rollback()
+        logger.error(f"Error in final commit: {e}")
     return jsonify({
         'success': True,

app/utils/pdf_export.py CHANGED Viewed

@@ -3,6 +3,7 @@ PDF export utility for dashboard data
 Generates PDF reports matching the Analytics Dashboard exactly
 """
 import io
 from datetime import datetime
 from reportlab.lib import colors
 from reportlab.lib.pagesizes import letter
@@ -10,6 +11,9 @@ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak, Image
 from reportlab.lib.enums import TA_CENTER
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt

 Generates PDF reports matching the Analytics Dashboard exactly
 """
 import io
+import os
 from datetime import datetime
 from reportlab.lib import colors
 from reportlab.lib.pagesizes import letter
 from reportlab.lib.units import inch
 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak, Image
 from reportlab.lib.enums import TA_CENTER
+# Set matplotlib config directory before import (prevent permission errors on HuggingFace)
+os.environ.setdefault('MPLCONFIGDIR', '/tmp/matplotlib')
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt

app/utils/pdf_export.py.backup ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+PDF export utility for dashboard data
+Generates professional PDF reports with charts and maps using matplotlib
+"""
+import io
+from datetime import datetime
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.units import inch
+from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak, Image
+from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
+import matplotlib.pyplot as plt
+import numpy as np
+try:
+    import contextily as cx
+    HAS_CONTEXTILY = True
+except ImportError:
+    HAS_CONTEXTILY = False
+class DashboardPDFExporter:
+    """Export dashboard data to PDF with charts and maps"""
+    def __init__(self, pagesize=letter):
+        self.pagesize = pagesize
+        self.styles = getSampleStyleSheet()
+        self._setup_custom_styles()
+    def _setup_custom_styles(self):
+        """Setup custom paragraph styles"""
+        self.styles.add(ParagraphStyle(
+            name='CustomTitle',
+            parent=self.styles['Heading1'],
+            fontSize=24,
+            textColor=colors.HexColor('#2c3e50'),
+            spaceAfter=30,
+            alignment=TA_CENTER
+        ))
+        self.styles.add(ParagraphStyle(
+            name='SectionHeader',
+            parent=self.styles['Heading2'],
+            fontSize=16,
+            textColor=colors.HexColor('#34495e'),
+            spaceAfter=12,
+            spaceBefore=12
+        ))
+    def generate_pdf(self, buffer, data):
+        """
+        Generate PDF report
+        Args:
+            buffer: BytesIO buffer to write PDF to
+            data: Dictionary containing dashboard data
+        """
+        doc = SimpleDocTemplate(buffer, pagesize=self.pagesize,
+                               rightMargin=72, leftMargin=72,
+                               topMargin=72, bottomMargin=18)
+        story = []
+        # Title
+        title = Paragraph("Participatory Planning Dashboard Report", self.styles['CustomTitle'])
+        story.append(title)
+        story.append(Spacer(1, 12))
+        # Metadata
+        view_mode_label = "Sentence-Level" if data['view_mode'] == 'sentences' else "Submission-Level"
+        metadata = Paragraph(
+            f"<font size=10>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}<br/>"
+            f"Analysis Mode: {view_mode_label}</font>",
+            self.styles['Normal']
+        )
+        story.append(metadata)
+        story.append(Spacer(1, 24))
+        # Summary Statistics
+        story.append(Paragraph("Summary Statistics", self.styles['SectionHeader']))
+        story.extend(self._create_summary_stats(data))
+        story.append(Spacer(1, 24))
+        # Category Distribution Chart
+        story.append(Paragraph("Category Distribution", self.styles['SectionHeader']))
+        category_chart = self._create_category_chart(data['category_stats'])
+        if category_chart:
+            story.append(category_chart)
+        story.append(Spacer(1, 24))
+        # Contributor Type Distribution
+        story.append(Paragraph("Contributor Type Distribution", self.styles['SectionHeader']))
+        contributor_chart = self._create_contributor_chart(data['contributor_stats'])
+        if contributor_chart:
+            story.append(contributor_chart)
+        story.append(PageBreak())
+        # Breakdown Table
+        story.append(Paragraph("Category Breakdown by Contributor Type", self.styles['SectionHeader']))
+        breakdown_table = self._create_breakdown_table(data['breakdown'], data['contributor_types'])
+        story.append(breakdown_table)
+        story.append(Spacer(1, 24))
+        # Map
+        if data['geotagged_submissions']:
+            story.append(PageBreak())
+            story.append(Paragraph("Geographic Distribution", self.styles['SectionHeader']))
+            map_image = self._create_map(data['geotagged_submissions'], data['categories'])
+            if map_image:
+                story.append(map_image)
+        # Build PDF
+        doc.build(story)
+        return buffer
+    def _create_summary_stats(self, data):
+        """Create summary statistics section"""
+        elements = []
+        total_items = sum(count for _, count in data['category_stats'])
+        total_submissions = len(data['submissions'])
+        total_geotagged = len(data['geotagged_submissions'])
+        # Create metrics table
+        metrics_data = [
+            ['Total Submissions', str(total_submissions)],
+            ['Total Items Analyzed', str(total_items)],
+            ['Geotagged Items', str(total_geotagged)],
+            ['Categories', str(len([c for c, count in data['category_stats'] if count > 0]))]
+        ]
+        metrics_table = Table(metrics_data, colWidths=[3*inch, 2*inch])
+        metrics_table.setStyle(TableStyle([
+            ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
+            ('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
+            ('FONTSIZE', (0, 0), (-1, -1), 12),
+            ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#2c3e50')),
+            ('TEXTCOLOR', (1, 0), (1, -1), colors.HexColor('#3498db')),
+            ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
+            ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
+        ]))
+        elements.append(metrics_table)
+        return elements
+    def _create_category_chart(self, category_stats):
+        """Create category distribution pie chart using matplotlib"""
+        if not category_stats:
+            return None
+        try:
+            # Prepare data
+            labels = [cat for cat, _ in category_stats]
+            values = [count for _, count in category_stats]
+            # Create matplotlib figure
+            fig, ax = plt.subplots(figsize=(6, 5))
+            colors_list = ['#3498db', '#2ecc71', '#f39c12', '#e74c3c', '#9b59b6', '#1abc9c']
+            wedges, texts, autotexts = ax.pie(values, labels=labels, autopct='%1.1f%%',
+                                               colors=colors_list[:len(labels)],
+                                               startangle=90)
+            # Make percentage text more readable
+            for autotext in autotexts:
+                autotext.set_color('white')
+                autotext.set_fontsize(10)
+                autotext.set_weight('bold')
+            ax.set_title('Category Distribution', fontsize=14, fontweight='bold')
+            # Convert to image
+            img_buffer = io.BytesIO()
+            plt.tight_layout()
+            plt.savefig(img_buffer, format='png', dpi=150, bbox_inches='tight')
+            plt.close(fig)
+            img_buffer.seek(0)
+            img = Image(img_buffer, width=5*inch, height=4*inch)
+            return img
+        except Exception as e:
+            print(f"Error creating category chart: {e}")
+            return None
+    def _create_contributor_chart(self, contributor_stats):
+        """Create contributor type bar chart using matplotlib"""
+        if not contributor_stats:
+            return None
+        try:
+            # Prepare data
+            types = [ctype for ctype, _ in contributor_stats]
+            counts = [count for _, count in contributor_stats]
+            # Create matplotlib figure
+            fig, ax = plt.subplots(figsize=(6, 4))
+            bars = ax.bar(types, counts, color='#3498db', edgecolor='#2980b9', linewidth=1.5)
+            # Add value labels on bars
+            for bar in bars:
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width()/2., height,
+                       f'{int(height)}',
+                       ha='center', va='bottom', fontsize=10, fontweight='bold')
+            ax.set_xlabel('Contributor Type', fontsize=11, fontweight='bold')
+            ax.set_ylabel('Count', fontsize=11, fontweight='bold')
+            ax.set_title('Submissions by Contributor Type', fontsize=14, fontweight='bold')
+            ax.grid(axis='y', alpha=0.3)
+            plt.xticks(rotation=45, ha='right')
+            # Convert to image
+            img_buffer = io.BytesIO()
+            plt.tight_layout()
+            plt.savefig(img_buffer, format='png', dpi=150, bbox_inches='tight')
+            plt.close(fig)
+            img_buffer.seek(0)
+            img = Image(img_buffer, width=5*inch, height=3.5*inch)
+            return img
+        except Exception as e:
+            print(f"Error creating contributor chart: {e}")
+            return None
+    def _create_breakdown_table(self, breakdown, contributor_types):
+        """Create category breakdown table"""
+        # Prepare table data
+        headers = ['Category'] + [ct['label'] for ct in contributor_types]
+        data = [headers]
+        for category, counts in breakdown.items():
+            row = [category]
+            for ct in contributor_types:
+                row.append(str(counts.get(ct['value'], 0)))
+            data.append(row)
+        # Calculate column widths
+        num_cols = len(headers)
+        col_width = 6.5 * inch / num_cols
+        table = Table(data, colWidths=[col_width] * num_cols)
+        table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#3498db')),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, -1), 10),
+            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+            ('GRID', (0, 0), (-1, -1), 1, colors.grey),
+            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#ecf0f1')])
+        ]))
+        return table
+    def _create_map(self, geotagged_submissions, categories):
+        """Create geographic distribution map with real OpenStreetMap tiles"""
+        if not geotagged_submissions:
+            return None
+        try:
+            # Prepare data
+            lats = [s.latitude for s in geotagged_submissions]
+            lons = [s.longitude for s in geotagged_submissions]
+            cats = [s.category for s in geotagged_submissions]
+            # Create matplotlib figure
+            fig, ax = plt.subplots(figsize=(10, 8))
+            # Color map for categories
+            category_colors = {
+                'Vision': '#3498db',
+                'Problem': '#e74c3c',
+                'Objectives': '#2ecc71',
+                'Directives': '#f39c12',
+                'Values': '#9b59b6',
+                'Actions': '#1abc9c'
+            }
+            # Plot points by category
+            for category in set(cats):
+                cat_lats = [lat for lat, cat in zip(lats, cats) if cat == category]
+                cat_lons = [lon for lon, cat in zip(lons, cats) if cat == category]
+                color = category_colors.get(category, '#95a5a6')
+                ax.scatter(cat_lons, cat_lats, c=color, label=category,
+                          s=150, alpha=0.8, edgecolors='white', linewidths=2, zorder=5)
+            # Add OpenStreetMap basemap if contextily is available
+            if HAS_CONTEXTILY:
+                try:
+                    # Add map tiles
+                    cx.add_basemap(ax, crs='EPSG:4326', source=cx.providers.OpenStreetMap.Mapnik,
+                                  attribution=False, alpha=0.8)
+                except Exception as e:
+                    print(f"Could not add basemap: {e}")
+                    # Fallback to grid
+                    ax.grid(True, alpha=0.3)
+            else:
+                # Fallback: simple grid
+                ax.grid(True, alpha=0.3)
+            ax.set_xlabel('Longitude', fontsize=12, fontweight='bold')
+            ax.set_ylabel('Latitude', fontsize=12, fontweight='bold')
+            ax.set_title('Geographic Distribution of Submissions',
+                        fontsize=16, fontweight='bold', pad=20)
+            # Legend outside plot area
+            ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1),
+                     fontsize=10, frameon=True, fancybox=True, shadow=True)
+            # Add attribution text if using OpenStreetMap
+            if HAS_CONTEXTILY:
+                fig.text(0.99, 0.01, '© OpenStreetMap contributors',
+                        ha='right', va='bottom', fontsize=7, style='italic', alpha=0.7)
+            # Convert to image
+            img_buffer = io.BytesIO()
+            plt.tight_layout()
+            plt.savefig(img_buffer, format='png', dpi=200, bbox_inches='tight')
+            plt.close(fig)
+            img_buffer.seek(0)
+            img = Image(img_buffer, width=7*inch, height=5.5*inch)
+            return img
+        except Exception as e:
+            print(f"Error creating map: {e}")
+            import traceback
+            traceback.print_exc()
+            return None