Spaces:

BuildingBench
/

BuildingBench-Dataset

Sleeping

App Files Files Community

BuildingBench commited on Jul 7, 2025

Commit

d7f7deb

verified ·

1 Parent(s): d80611a

Upload streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +1395 -0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,1395 @@

+def create_dataset_statistics(buildings_df: pd.DataFrame, weather_df: pd.DataFrame, combinations_df: pd.DataFrame):
+    """Create comprehensive dataset statistics section"""
+    st.subheader("📊 Dataset Statistics & Information")
+    # Dataset description
+    st.markdown("""
+    <div style="background-color: #1e1e1e; border-radius: 10px; padding: 20px; margin: 20px 0;
+                border-left: 4px solid #2196f3; color: white;">
+        <h4 style="color: #2196f3; margin-bottom: 15px;">🏗️ About This Dataset</h4>
+        <p style="font-size: 1.1em; line-height: 1.6; margin-bottom: 10px;">
+            This comprehensive building energy dataset contains energy models for various building types across different climate zones.
+            The dataset is designed for energy simulation research, building performance analysis, and climate impact studies.
+        </p>
+        <p style="font-size: 1.1em; line-height: 1.6; margin-bottom: 10px;">
+            Each building model includes detailed geometric properties, construction materials, HVAC systems, and occupancy schedules.
+            Multiple variations are generated from base models to study the impact of different parameters on energy performance.
+        </p>
+        <p style="font-size: 1.1em; line-height: 1.6;">
+            Weather data is sourced from global meteorological stations and covers multiple climate zones as defined by ASHRAE standards,
+            enabling comprehensive climate-specific energy analysis.
+        </p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Detailed statistics
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("🏢 Building Dataset Details")
+        if not buildings_df.empty:
+            # Building type breakdown
+            building_types = buildings_df['building_type'].value_counts()
+            st.markdown("**Building Types Distribution:**")
+            for btype, count in building_types.items():
+                percentage = (count / len(buildings_df)) * 100
+                st.write(f"• **{btype.title()}**: {count} models ({percentage:.1f}%)")
+            st.markdown("---")
+            # Variation breakdown
+            variation_types = buildings_df['variation_type'].value_counts()
+            st.markdown("**Variation Types:**")
+            for var_type, count in variation_types.items():
+                percentage = (count / len(buildings_df)) * 100
+                st.write(f"• **{var_type.title()}**: {count} models ({percentage:.1f}%)")
+            st.markdown("---")
+            # Climate zone coverage
+            climate_zones = buildings_df['climate_zone'].value_counts().sort_index()
+            st.markdown("**Climate Zone Coverage:**")
+            for zone, count in climate_zones.items():
+                st.write(f"• **Zone {zone}**: {count} buildings")
+        else:
+            st.warning("No building data available")
+    with col2:
+        st.subheader("🌍 Weather Dataset Details")
+        if not weather_df.empty:
+            # Geographic coverage
+            st.markdown("**Geographic Coverage:**")
+            st.write(f"• **Total Locations**: {len(weather_df)}")
+            st.write(f"• **Countries Covered**: {weather_df['country'].nunique()}")
+            st.write(f"• **Climate Zones**: {weather_df['climate_zone_code'].nunique()}")
+            # Climate zone distribution in weather data
+            weather_climate_zones = weather_df['climate_zone_code'].value_counts().sort_index()
+            st.markdown("**Weather Locations by Climate Zone:**")
+            for zone, count in weather_climate_zones.head(10).items():
+                st.write(f"• **Zone {zone}**: {count} locations")
+            st.markdown("---")
+            # Top countries by location count
+            top_countries = weather_df['country'].value_counts().head(8)
+            st.markdown("**Top Countries by Weather Locations:**")
+            for country, count in top_countries.items():
+                st.write(f"• **{country}**: {count} locations")
+            # Data sources if available
+            if 'data_source' in weather_df.columns:
+                st.markdown("---")
+                data_sources = weather_df['data_source'].value_counts()
+                st.markdown("**Data Sources:**")
+                for source, count in data_sources.items():
+                    st.write(f"• **{source}**: {count} files")
+        else:
+            st.warning("No weather data available")
+    # Dataset quality metrics
+    st.subheader("🎯 Dataset Quality Metrics")
+    quality_col1, quality_col2, quality_col3, quality_col4 = st.columns(4)
+    with quality_col1:
+        completeness = 0
+        if not buildings_df.empty:
+            total_fields = len(buildings_df.columns)
+            missing_fields = buildings_df.isnull().sum().sum()
+            total_possible = len(buildings_df) * total_fields
+            completeness = ((total_possible - missing_fields) / total_possible) * 100 if total_possible > 0 else 0
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
+                    border: 1px solid #404040; color: white;">
+            <div style="font-size: 1.5em; margin-bottom: 5px;">📈</div>
+            <div style="font-size: 1.8em; font-weight: bold; color: #4caf50;">{completeness:.1f}%</div>
+            <div style="font-size: 0.9em; opacity: 0.8;">Data Completeness</div>
+        </div>
+        """, unsafe_allow_html=True)
+    with quality_col2:
+        file_coverage = 0
+        if not buildings_df.empty and 'filepath' in buildings_df.columns:
+            existing_files = 0
+            for _, row in buildings_df.iterrows():
+                filepath = Path("data") / row['filepath']
+                if filepath.exists():
+                    existing_files += 1
+            file_coverage = (existing_files / len(buildings_df)) * 100
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
+                    border: 1px solid #404040; color: white;">
+            <div style="font-size: 1.5em; margin-bottom: 5px;">📁</div>
+            <div style="font-size: 1.8em; font-weight: bold; color: #2196f3;">{file_coverage:.1f}%</div>
+            <div style="font-size: 0.9em; opacity: 0.8;">File Availability</div>
+        </div>
+        """, unsafe_allow_html=True)
+    with quality_col3:
+        diversity_score = 0
+        if not buildings_df.empty:
+            type_entropy = len(buildings_df['building_type'].unique()) / len(buildings_df) * 100
+            climate_entropy = len(buildings_df['climate_zone'].unique()) / len(buildings_df) * 100
+            diversity_score = (type_entropy + climate_entropy) / 2
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
+                    border: 1px solid #404040; color: white;">
+            <div style="font-size: 1.5em; margin-bottom: 5px;">🎨</div>
+            <div style="font-size: 1.8em; font-weight: bold; color: #ff9800;">{diversity_score:.1f}%</div>
+            <div style="font-size: 0.9em; opacity: 0.8;">Dataset Diversity</div>
+        </div>
+        """, unsafe_allow_html=True)
+    with quality_col4:
+        simulation_readiness = 0
+        if not combinations_df.empty:
+            simulation_readiness = 100
+        elif not buildings_df.empty and not weather_df.empty:
+            simulation_readiness = 75
+        elif not buildings_df.empty or not weather_df.empty:
+            simulation_readiness = 50
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
+                    border: 1px solid #404040; color: white;">
+            <div style="font-size: 1.5em; margin-bottom: 5px;">⚡</div>
+            <div style="font-size: 1.8em; font-weight: bold; color: #9c27b0;">{simulation_readiness}%</div>
+            <div style="font-size: 0.9em; opacity: 0.8;">Simulation Ready</div>
+        </div>
+        """, unsafe_allow_html=True)
+    # Usage recommendations
+    st.subheader("💡 Usage Recommendations")
+    recommendation_col1, recommendation_col2 = st.columns(2)
+    with recommendation_col1:
+        st.markdown("""
+        <div style="background-color: #1a237e; border-radius: 10px; padding: 20px; margin: 10px 0;
+                    border-left: 4px solid #3f51b5; color: white;">
+            <h5 style="color: #64b5f6; margin-bottom: 15px;">🔬 Research Applications</h5>
+            <ul style="line-height: 1.8;">
+                <li>Building energy performance analysis</li>
+                <li>Climate change impact studies</li>
+                <li>HVAC system optimization</li>
+                <li>Retrofit strategy evaluation</li>
+                <li>Code compliance verification</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)
+    with recommendation_col2:
+        st.markdown("""
+        <div style="background-color: #1b5e20; border-radius: 10px; padding: 20px; margin: 10px 0;
+                    border-left: 4px solid #4caf50; color: white;">
+            <h5 style="color: #81c784; margin-bottom: 15px;">⚙️ Getting Started</h5>
+            <ul style="line-height: 1.8;">
+                <li>Use <strong>Building Explorer</strong> to browse models</li>
+                <li>Check <strong>Weather Data</strong> for climate coverage</li>
+                <li>Generate combinations for simulations</li>
+                <li>Export filtered datasets for analysis</li>
+                <li>Run quality checks before processing</li>
+            </ul>
+        </div>
+        """, unsafe_allow_html=True)# dashboard/streamlit_app.py
+"""
+Building Generator Dashboard - Main Streamlit Application
+Interactive web interface for exploring building energy models and weather data
+"""
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import numpy as np
+from pathlib import Path
+import sys
+import json
+from typing import Dict, List, Optional
+import logging
+# Add the project root to Python path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+from building_gen.core.pipeline import BuildingPipeline
+# Configure page
+st.set_page_config(
+    page_title="Building Generator Dashboard",
+    page_icon="🏗️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for dark theme styling
+st.markdown("""
+<style>
+    .main > div {
+        padding-top: 2rem;
+    }
+    /* Dark theme metric cards */
+    .stMetric {
+        background-color: #1e1e1e;
+        border: 1px solid #333;
+        border-radius: 10px;
+        padding: 15px;
+        margin: 5px 0;
+        color: white;
+    }
+    /* Dark theme filter container */
+    .filter-container {
+        background-color: #2d2d2d;
+        border: 1px solid #404040;
+        border-radius: 10px;
+        padding: 15px;
+        margin: 10px 0;
+        color: white;
+    }
+    /* Dark theme building cards */
+    .building-card {
+        border: 2px solid #404040;
+        border-radius: 10px;
+        padding: 15px;
+        margin: 10px 0;
+        background-color: #1e1e1e;
+        color: white;
+    }
+    /* Dark theme comparison highlight */
+    .comparison-highlight {
+        background-color: #1a237e;
+        border-left: 4px solid #3f51b5;
+        border-radius: 5px;
+        padding: 15px;
+        margin: 5px 0;
+        color: white;
+    }
+    /* Plotly chart dark theme */
+    .js-plotly-plot {
+        background-color: transparent !important;
+    }
+    /* Data editor dark theme */
+    .stDataFrame {
+        background-color: #1e1e1e;
+    }
+    /* Sidebar dark theme adjustments */
+    .css-1d391kg {
+        background-color: #1e1e1e;
+    }
+    /* Success/Info/Warning message styling */
+    .stSuccess {
+        background-color: #1b5e20;
+        border: 1px solid #4caf50;
+    }
+    .stInfo {
+        background-color: #0d47a1;
+        border: 1px solid #2196f3;
+    }
+    .stWarning {
+        background-color: #e65100;
+        border: 1px solid #ff9800;
+    }
+    .stError {
+        background-color: #b71c1c;
+        border: 1px solid #f44336;
+    }
+</style>
+""", unsafe_allow_html=True)
+@st.cache_data
+def load_pipeline_data(data_dir: str = "data"):
+    """Load and cache pipeline data"""
+    try:
+        pipeline = BuildingPipeline(data_dir)
+        # Load building data
+        buildings_path = Path(data_dir) / "tables/buildings.csv"
+        buildings_df = pd.read_csv(buildings_path) if buildings_path.exists() else pd.DataFrame()
+        # Load weather data
+        weather_path = Path(data_dir) / "weather/tables/all_weather.csv"
+        weather_df = pd.read_csv(weather_path) if weather_path.exists() else pd.DataFrame()
+        # Load combinations if available
+        combinations_path = Path(data_dir) / "tables/building_weather_combinations.csv"
+        combinations_df = pd.read_csv(combinations_path) if combinations_path.exists() else pd.DataFrame()
+        return pipeline, buildings_df, weather_df, combinations_df
+    except Exception as e:
+        st.error(f"Failed to load data: {e}")
+        return None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+def create_building_filters(buildings_df: pd.DataFrame) -> Dict:
+    """Create filter widgets for buildings"""
+    st.markdown('<div class="filter-container">', unsafe_allow_html=True)
+    st.subheader("🔍 Filter Buildings")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        building_types = st.multiselect(
+            "Building Type",
+            options=sorted(buildings_df['building_type'].unique()) if not buildings_df.empty else [],
+            default=[]
+        )
+        climate_zones = st.multiselect(
+            "Climate Zone",
+            options=sorted(buildings_df['climate_zone'].unique()) if not buildings_df.empty else [],
+            default=[]
+        )
+    with col2:
+        variation_types = st.multiselect(
+            "Variation Type",
+            options=sorted(buildings_df['variation_type'].unique()) if not buildings_df.empty else [],
+            default=[]
+        )
+        # Floor area range
+        if not buildings_df.empty and 'floor_area' in buildings_df.columns:
+            min_area = float(buildings_df['floor_area'].min())
+            max_area = float(buildings_df['floor_area'].max())
+            area_range = st.slider(
+                "Floor Area Range (m²)",
+                min_value=min_area,
+                max_value=max_area,
+                value=(min_area, max_area),
+                format="%.0f"
+            )
+        else:
+            area_range = (0, 10000)
+    with col3:
+        # Window-to-wall ratio if available
+        if not buildings_df.empty and 'window_wall_ratio' in buildings_df.columns:
+            min_wwr = float(buildings_df['window_wall_ratio'].min())
+            max_wwr = float(buildings_df['window_wall_ratio'].max())
+            wwr_range = st.slider(
+                "Window-to-Wall Ratio",
+                min_value=min_wwr,
+                max_value=max_wwr,
+                value=(min_wwr, max_wwr),
+                format="%.2f"
+            )
+        else:
+            wwr_range = (0.0, 1.0)
+        # Number of zones range
+        if not buildings_df.empty and 'num_zones' in buildings_df.columns:
+            min_zones = int(buildings_df['num_zones'].min())
+            max_zones = int(buildings_df['num_zones'].max())
+            zones_range = st.slider(
+                "Number of Zones",
+                min_value=min_zones,
+                max_value=max_zones,
+                value=(min_zones, max_zones)
+            )
+        else:
+            zones_range = (1, 100)
+    st.markdown('</div>', unsafe_allow_html=True)
+    return {
+        'building_types': building_types,
+        'climate_zones': climate_zones,
+        'variation_types': variation_types,
+        'area_range': area_range,
+        'wwr_range': wwr_range,
+        'zones_range': zones_range
+    }
+def apply_building_filters(buildings_df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
+    """Apply filters to buildings dataframe"""
+    filtered_df = buildings_df.copy()
+    if filters['building_types']:
+        filtered_df = filtered_df[filtered_df['building_type'].isin(filters['building_types'])]
+    if filters['climate_zones']:
+        filtered_df = filtered_df[filtered_df['climate_zone'].isin(filters['climate_zones'])]
+    if filters['variation_types']:
+        filtered_df = filtered_df[filtered_df['variation_type'].isin(filters['variation_types'])]
+    if 'floor_area' in filtered_df.columns:
+        filtered_df = filtered_df[
+            (filtered_df['floor_area'] >= filters['area_range'][0]) &
+            (filtered_df['floor_area'] <= filters['area_range'][1])
+        ]
+    if 'window_wall_ratio' in filtered_df.columns:
+        filtered_df = filtered_df[
+            (filtered_df['window_wall_ratio'] >= filters['wwr_range'][0]) &
+            (filtered_df['window_wall_ratio'] <= filters['wwr_range'][1])
+        ]
+    if 'num_zones' in filtered_df.columns:
+        filtered_df = filtered_df[
+            (filtered_df['num_zones'] >= filters['zones_range'][0]) &
+            (filtered_df['num_zones'] <= filters['zones_range'][1])
+        ]
+    return filtered_df
+def create_overview_metrics(buildings_df: pd.DataFrame, weather_df: pd.DataFrame, combinations_df: pd.DataFrame):
+    """Create overview metrics display with consistent sizing and dark theme"""
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
+                    border: 1px solid #404040; color: white; height: 200px; display: flex;
+                    flex-direction: column; justify-content: space-between;">
+            <div style="display: flex; align-items: center; margin-bottom: 15px;">
+                <span style="font-size: 2em; margin-right: 15px;">🏢</span>
+                <span style="font-size: 1.3em; font-weight: bold;">Buildings</span>
+            </div>
+            <div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(buildings_df)}</div>
+            <div style="font-size: 1em; text-align: center; opacity: 0.8;">
+                {len(buildings_df[buildings_df['variation_type'] != 'base']) if not buildings_df.empty else 0} variations
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+    with col2:
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
+                    border: 1px solid #404040; color: white; height: 200px; display: flex;
+                    flex-direction: column; justify-content: space-between;">
+            <div style="display: flex; align-items: center; margin-bottom: 15px;">
+                <span style="font-size: 2em; margin-right: 15px;">🌍</span>
+                <span style="font-size: 1.3em; font-weight: bold;">Weather Locations</span>
+            </div>
+            <div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(weather_df)}</div>
+            <div style="font-size: 1em; text-align: center; opacity: 0.8;">
+                {weather_df['country'].nunique() if not weather_df.empty else 0} countries
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+    with col3:
+        combinations_status = "Not created" if len(combinations_df) == 0 else "Ready"
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
+                    border: 1px solid #404040; color: white; height: 200px; display: flex;
+                    flex-direction: column; justify-content: space-between;">
+            <div style="display: flex; align-items: center; margin-bottom: 15px;">
+                <span style="font-size: 2em; margin-right: 15px;">🔄</span>
+                <span style="font-size: 1.3em; font-weight: bold;">Combinations</span>
+            </div>
+            <div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(combinations_df)}</div>
+            <div style="font-size: 1em; text-align: center; opacity: 0.8;">
+                {combinations_status}
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+    with col4:
+        climate_zones = buildings_df['climate_zone'].nunique() if not buildings_df.empty else 0
+        st.markdown(f"""
+        <div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
+                    border: 1px solid #404040; color: white; height: 200px; display: flex;
+                    flex-direction: column; justify-content: space-between;">
+            <div style="display: flex; align-items: center; margin-bottom: 15px;">
+                <span style="font-size: 2em; margin-right: 15px;">🌡️</span>
+                <span style="font-size: 1.3em; font-weight: bold;">Climate Zones</span>
+            </div>
+            <div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{climate_zones}</div>
+            <div style="font-size: 1em; text-align: center; opacity: 0.8;">
+                ASHRAE zones
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
+def create_dark_theme_plotly_layout():
+    """Create consistent dark theme layout for Plotly charts"""
+    return {
+        'plot_bgcolor': 'rgba(0,0,0,0)',
+        'paper_bgcolor': 'rgba(0,0,0,0)',
+        'font': {'color': 'white'},
+        'xaxis': {
+            'gridcolor': '#404040',
+            'linecolor': '#404040',
+            'tickcolor': '#404040',
+            'color': 'white'
+        },
+        'yaxis': {
+            'gridcolor': '#404040',
+            'linecolor': '#404040',
+            'tickcolor': '#404040',
+            'color': 'white'
+        }
+    }
+def create_building_characteristics_chart(buildings_df: pd.DataFrame):
+    """Create building characteristics visualization with dark theme"""
+    if buildings_df.empty:
+        st.warning("No building data available")
+        return
+    tab1, tab2, tab3, tab4 = st.tabs(["📊 Distribution", "🗺️ Climate Zones", "🏗️ Types", "📏 Properties"])
+    with tab1:
+        col1, col2 = st.columns(2)
+        with col1:
+            # Building type distribution
+            type_counts = buildings_df['building_type'].value_counts()
+            fig_types = px.pie(
+                values=type_counts.values,
+                names=type_counts.index,
+                title="Building Types Distribution",
+                color_discrete_sequence=px.colors.qualitative.Set3
+            )
+            fig_types.update_layout(**create_dark_theme_plotly_layout(), height=400)
+            st.plotly_chart(fig_types, use_container_width=True)
+        with col2:
+            # Variation type distribution
+            var_counts = buildings_df['variation_type'].value_counts()
+            fig_vars = px.bar(
+                x=var_counts.index,
+                y=var_counts.values,
+                title="Variation Types",
+                color=var_counts.index,
+                color_discrete_sequence=px.colors.qualitative.Pastel
+            )
+            fig_vars.update_layout(**create_dark_theme_plotly_layout(), height=400, showlegend=False)
+            st.plotly_chart(fig_vars, use_container_width=True)
+    with tab2:
+        # Climate zone analysis
+        climate_counts = buildings_df['climate_zone'].value_counts()
+        fig_climate = px.bar(
+            x=climate_counts.index,
+            y=climate_counts.values,
+            title="Buildings by Climate Zone",
+            color=climate_counts.values,
+            color_continuous_scale='viridis'
+        )
+        fig_climate.update_layout(**create_dark_theme_plotly_layout(), height=400)
+        st.plotly_chart(fig_climate, use_container_width=True)
+        # Climate zone descriptions
+        climate_descriptions = {
+            '1A': 'Very Hot - Humid', '1B': 'Very Hot - Dry',
+            '2A': 'Hot - Humid', '2B': 'Hot - Dry',
+            '3A': 'Warm - Humid', '3B': 'Warm - Dry', '3C': 'Warm - Marine',
+            '4A': 'Mixed - Humid', '4B': 'Mixed - Dry', '4C': 'Mixed - Marine',
+            '5A': 'Cool - Humid', '5B': 'Cool - Dry', '5C': 'Cool - Marine',
+            '6A': 'Cold - Humid', '6B': 'Cold - Dry',
+            '7': 'Very Cold', '8': 'Subarctic'
+        }
+        st.subheader("Climate Zone Descriptions")
+        for zone in sorted(buildings_df['climate_zone'].unique()):
+            if zone in climate_descriptions:
+                st.info(f"**{zone}**: {climate_descriptions[zone]}")
+    with tab3:
+        # Building type details
+        st.subheader("Building Type Analysis")
+        # Check which columns exist before grouping
+        agg_dict = {'floor_area': ['count']}
+        if 'floor_area' in buildings_df.columns:
+            agg_dict['floor_area'] = ['count', 'mean', 'std']
+        if 'num_zones' in buildings_df.columns:
+            agg_dict['num_zones'] = ['mean', 'std']
+        if 'window_wall_ratio' in buildings_df.columns:
+            agg_dict['window_wall_ratio'] = ['mean', 'std']
+        type_summary = buildings_df.groupby('building_type').agg(agg_dict).round(2)
+        st.dataframe(type_summary, use_container_width=True)
+    with tab4:
+        # Property correlations
+        numeric_cols = []
+        for col in ['floor_area', 'num_zones', 'window_wall_ratio']:
+            if col in buildings_df.columns:
+                numeric_cols.append(col)
+        if len(numeric_cols) >= 2:
+            corr_matrix = buildings_df[numeric_cols].corr()
+            fig_corr = px.imshow(
+                corr_matrix,
+                color_continuous_scale='RdBu',
+                aspect='auto',
+                title='Building Property Correlations'
+            )
+            fig_corr.update_layout(**create_dark_theme_plotly_layout())
+            st.plotly_chart(fig_corr, use_container_width=True)
+            # Scatter plots
+            if len(numeric_cols) >= 2:
+                col1, col2 = st.columns(2)
+                with col1:
+                    if 'floor_area' in numeric_cols and 'num_zones' in numeric_cols:
+                        fig_scatter1 = px.scatter(
+                            buildings_df,
+                            x='floor_area',
+                            y='num_zones',
+                            color='building_type',
+                            title='Floor Area vs Number of Zones',
+                            hover_data=['name']
+                        )
+                        fig_scatter1.update_layout(**create_dark_theme_plotly_layout())
+                        st.plotly_chart(fig_scatter1, use_container_width=True)
+                with col2:
+                    if 'window_wall_ratio' in numeric_cols and 'floor_area' in numeric_cols:
+                        fig_scatter2 = px.scatter(
+                            buildings_df,
+                            x='window_wall_ratio',
+                            y='floor_area',
+                            color='building_type',
+                            title='Window-Wall Ratio vs Floor Area',
+                            hover_data=['name']
+                        )
+                        fig_scatter2.update_layout(**create_dark_theme_plotly_layout())
+                        st.plotly_chart(fig_scatter2, use_container_width=True)
+def display_buildings_table(buildings_df: pd.DataFrame):
+    """Display interactive buildings table"""
+    st.subheader("📋 Buildings Database")
+    if buildings_df.empty:
+        st.warning("No buildings found matching the current filters.")
+        return
+    # Prepare column config based on available columns
+    column_config = {
+        "id": st.column_config.NumberColumn("ID", width="small"),
+        "name": st.column_config.TextColumn("Building Name", width="large"),
+        "building_type": st.column_config.TextColumn("Type", width="medium"),
+        "climate_zone": st.column_config.TextColumn("Climate", width="small"),
+        "variation_type": st.column_config.TextColumn("Variation", width="medium"),
+        "filepath": st.column_config.TextColumn("File Path", width="large")
+    }
+    # Add optional columns if they exist
+    if 'floor_area' in buildings_df.columns:
+        column_config["floor_area"] = st.column_config.NumberColumn("Floor Area (m²)", format="%.0f", width="medium")
+    if 'num_zones' in buildings_df.columns:
+        column_config["num_zones"] = st.column_config.NumberColumn("Zones", width="small")
+    if 'window_wall_ratio' in buildings_df.columns:
+        column_config["window_wall_ratio"] = st.column_config.NumberColumn("WWR", format="%.2f", width="small")
+    if 'created_date' in buildings_df.columns:
+        column_config["created_date"] = st.column_config.DatetimeColumn("Created", width="medium")
+    # Display the table
+    selected_buildings = st.data_editor(
+        buildings_df,
+        use_container_width=True,
+        hide_index=True,
+        column_config=column_config,
+        disabled=list(buildings_df.columns)  # Make all columns read-only
+    )
+    # Export functionality
+    col1, col2, col3 = st.columns([1, 1, 2])
+    with col1:
+        if st.button("📥 Export to CSV"):
+            csv = buildings_df.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv,
+                file_name=f"buildings_filtered_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                mime="text/csv"
+            )
+def load_building_epjson(filepath: str, data_dir: str = "data") -> Optional[Dict]:
+    """Load building epJSON file"""
+    try:
+        full_path = Path(data_dir) / filepath
+        if full_path.exists():
+            with open(full_path, 'r') as f:
+                return json.load(f)
+        else:
+            st.error(f"Building file not found: {full_path}")
+            return None
+    except Exception as e:
+        st.error(f"Error loading building file: {e}")
+        return None
+def analyze_building_epjson(epjson_data: Dict) -> Dict:
+    """Analyze epJSON building data and extract key metrics"""
+    analysis = {
+        'zones': 0,
+        'surfaces': 0,
+        'windows': 0,
+        'hvac_systems': 0,
+        'schedules': 0,
+        'materials': 0,
+        'constructions': 0,
+        'has_meters': False,
+        'has_setpoints': False,
+        'timestep': None
+    }
+    # Count building components
+    if 'Zone' in epjson_data:
+        analysis['zones'] = len(epjson_data['Zone'])
+    if 'BuildingSurface:Detailed' in epjson_data:
+        analysis['surfaces'] = len(epjson_data['BuildingSurface:Detailed'])
+    if 'FenestrationSurface:Detailed' in epjson_data:
+        analysis['windows'] = len(epjson_data['FenestrationSurface:Detailed'])
+    if 'Schedule:Compact' in epjson_data:
+        analysis['schedules'] = len(epjson_data['Schedule:Compact'])
+    if 'Material' in epjson_data:
+        analysis['materials'] = len(epjson_data['Material'])
+    if 'Construction' in epjson_data:
+        analysis['constructions'] = len(epjson_data['Construction'])
+    # Check for HVAC systems
+    hvac_objects = ['AirLoopHVAC', 'PlantLoop', 'ZoneHVAC:IdealLoadsAirSystem']
+    analysis['hvac_systems'] = sum(len(epjson_data.get(obj, {})) for obj in hvac_objects)
+    # Check for meters and outputs
+    analysis['has_meters'] = 'Output:Meter' in epjson_data
+    analysis['has_setpoints'] = any('Setpoint' in key for key in epjson_data.keys())
+    # Get timestep
+    if 'Timestep' in epjson_data:
+        timestep_obj = list(epjson_data['Timestep'].values())[0]
+        analysis['timestep'] = timestep_obj.get('number_of_timesteps_per_hour', 'Unknown')
+    return analysis
+def create_mock_energy_profile(building_name: str):
+    """Create mock energy profile for demonstration with dark theme"""
+    st.subheader("⚡ Energy Profile (Demo)")
+    st.info("📝 Note: This is demonstration data. Connect to actual EnergyPlus simulation results for real data.")
+    # Mock hourly load profile
+    hours = list(range(24))
+    base_load = 100
+    peak_factor = np.sin(np.array(hours) * np.pi / 12)
+    mock_load = base_load + 50 * peak_factor + np.random.normal(0, 10, 24)
+    mock_load = np.maximum(mock_load, 20)  # Minimum load
+    # Mock monthly energy
+    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+    heating_load = [150, 120, 80, 40, 10, 0, 0, 0, 20, 60, 100, 140]
+    cooling_load = [0, 0, 10, 30, 60, 100, 120, 110, 70, 30, 5, 0]
+    col1, col2 = st.columns(2)
+    with col1:
+        # Hourly profile
+        fig_hourly = px.line(
+            x=hours,
+            y=mock_load,
+            title="Typical Daily Load Profile",
+            labels={'x': 'Hour of Day', 'y': 'Power (kW)'}
+        )
+        fig_hourly.update_traces(line_color='#00d4ff')
+        fig_hourly.update_layout(**create_dark_theme_plotly_layout())
+        st.plotly_chart(fig_hourly, use_container_width=True)
+    with col2:
+        # Monthly profile
+        fig_monthly = go.Figure()
+        fig_monthly.add_trace(go.Bar(x=months, y=heating_load, name='Heating', marker_color='#ff6b6b'))
+        fig_monthly.add_trace(go.Bar(x=months, y=cooling_load, name='Cooling', marker_color='#4ecdc4'))
+        fig_monthly.update_layout(
+            **create_dark_theme_plotly_layout(),
+            title="Monthly Energy Consumption",
+            xaxis_title="Month",
+            yaxis_title="Energy (kWh/m²)",
+            barmode='stack'
+        )
+        st.plotly_chart(fig_monthly, use_container_width=True)
+def main():
+    """Main Streamlit application"""
+    st.title("🏗️ Building Generator Dashboard")
+    st.markdown("Interactive exploration of building energy models and weather data")
+    # Load data
+    with st.spinner("Loading building and weather data..."):
+        pipeline, buildings_df, weather_df, combinations_df = load_pipeline_data()
+    if pipeline is None:
+        st.error("Failed to initialize application. Please check your data directory.")
+        st.info("Make sure you have run: `python scripts/main.py --create-table` and `python scripts/main.py --create-weather-table`")
+        return
+    # Sidebar for navigation
+    st.sidebar.title("🗂️ Navigation")
+    # Initialize session state for page navigation
+    if 'current_page' not in st.session_state:
+        st.session_state.current_page = "🏠 Overview"
+    # Use session state to control the selectbox
+    page_options = ["🏠 Overview", "🏢 Building Explorer", "🌍 Weather Data", "⚖️ Compare Buildings", "📊 Analysis & Reports"]
+    current_index = page_options.index(st.session_state.current_page) if st.session_state.current_page in page_options else 0
+    page = st.sidebar.selectbox(
+        "Choose a page:",
+        page_options,
+        index=current_index,
+        key="page_selector"
+    )
+    # Update session state when selectbox changes
+    if page != st.session_state.current_page:
+        st.session_state.current_page = page
+    # Use the current page from session state
+    current_page = st.session_state.current_page
+    if current_page == "🏠 Overview":
+        st.header("System Overview")
+        # System overview
+        create_overview_metrics(buildings_df, weather_df, combinations_df)
+    elif current_page == "🏢 Building Explorer":
+        st.header("Building Explorer")
+        if buildings_df.empty:
+            st.warning("No building data available. Run `python scripts/main.py --create-table` first.")
+            return
+        # Filters
+        filters = create_building_filters(buildings_df)
+        # Apply filters
+        filtered_buildings = apply_building_filters(buildings_df, filters)
+        st.subheader(f"📊 Found {len(filtered_buildings)} buildings")
+        # Visualizations
+        if not filtered_buildings.empty:
+            create_building_characteristics_chart(filtered_buildings)
+            # Buildings table
+            display_buildings_table(filtered_buildings)
+            # Building details expander
+            if not filtered_buildings.empty:
+                st.subheader("🔍 Building Details")
+                selected_building = st.selectbox(
+                    "Select a building to analyze:",
+                    options=filtered_buildings['name'].tolist(),
+                    index=0
+                )
+                if selected_building:
+                    building_info = filtered_buildings[filtered_buildings['name'] == selected_building].iloc[0]
+                    col1, col2 = st.columns([1, 2])
+                    with col1:
+                        st.markdown('<div class="building-card">', unsafe_allow_html=True)
+                        st.subheader(f"📋 {building_info['name']}")
+                        st.write(f"**Type**: {building_info['building_type']}")
+                        st.write(f"**Climate Zone**: {building_info['climate_zone']}")
+                        st.write(f"**Variation**: {building_info['variation_type']}")
+                        # Add optional fields if they exist
+                        if 'floor_area' in building_info and pd.notna(building_info['floor_area']):
+                            st.write(f"**Floor Area**: {building_info['floor_area']:.0f} m²")
+                        if 'num_zones' in building_info and pd.notna(building_info['num_zones']):
+                            st.write(f"**Zones**: {building_info['num_zones']}")
+                        if 'window_wall_ratio' in building_info and pd.notna(building_info['window_wall_ratio']):
+                            st.write(f"**WWR**: {building_info['window_wall_ratio']:.2%}")
+                        st.markdown('</div>', unsafe_allow_html=True)
+                    with col2:
+                        # Load and analyze building file
+                        epjson_data = load_building_epjson(building_info['filepath'])
+                        if epjson_data:
+                            analysis = analyze_building_epjson(epjson_data)
+                            st.subheader("🔧 Building Analysis")
+                            # Create metrics display
+                            metric_col1, metric_col2, metric_col3 = st.columns(3)
+                            with metric_col1:
+                                st.metric("Zones", analysis['zones'])
+                                st.metric("Surfaces", analysis['surfaces'])
+                            with metric_col2:
+                                st.metric("Windows", analysis['windows'])
+                                st.metric("HVAC Systems", analysis['hvac_systems'])
+                            with metric_col3:
+                                st.metric("Schedules", analysis['schedules'])
+                                st.metric("Materials", analysis['materials'])
+                            # Status indicators
+                            st.subheader("⚡ Processing Status")
+                            status_col1, status_col2, status_col3 = st.columns(3)
+                            with status_col1:
+                                meter_status = "✅ Yes" if analysis['has_meters'] else "❌ No"
+                                st.metric("Has Meters", meter_status)
+                            with status_col2:
+                                setpoint_status = "✅ Yes" if analysis['has_setpoints'] else "❌ No"
+                                st.metric("Has Setpoints", setpoint_status)
+                            with status_col3:
+                                timestep_value = analysis['timestep'] or "Not set"
+                                st.metric("Timesteps/Hour", timestep_value)
+                    # Mock energy profile
+                    create_mock_energy_profile(selected_building)
+    elif current_page == "🌍 Weather Data":
+        st.header("Weather Data Explorer")
+        if weather_df.empty:
+            st.warning("No weather data available. Run `python scripts/main.py --create-weather-table` first.")
+            return
+        # Weather filters
+        st.subheader("🌡️ Filter Weather Locations")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            countries = st.multiselect(
+                "Countries",
+                options=sorted(weather_df['country'].unique()),
+                default=[]
+            )
+            climate_zones_weather = st.multiselect(
+                "Climate Zones",
+                options=sorted(weather_df['climate_zone_code'].unique()),
+                default=[]
+            )
+        with col2:
+            if 'data_source' in weather_df.columns:
+                data_sources = st.multiselect(
+                    "Data Sources",
+                    options=weather_df['data_source'].unique(),
+                    default=[]
+                )
+            else:
+                data_sources = []
+            lat_range = st.slider(
+                "Latitude Range",
+                min_value=float(weather_df['latitude'].min()),
+                max_value=float(weather_df['latitude'].max()),
+                value=(float(weather_df['latitude'].min()), float(weather_df['latitude'].max()))
+            )
+        with col3:
+            lon_range = st.slider(
+                "Longitude Range",
+                min_value=float(weather_df['longitude'].min()),
+                max_value=float(weather_df['longitude'].max()),
+                value=(float(weather_df['longitude'].min()), float(weather_df['longitude'].max()))
+            )
+        # Apply weather filters
+        filtered_weather = weather_df.copy()
+        if countries:
+            filtered_weather = filtered_weather[filtered_weather['country'].isin(countries)]
+        if climate_zones_weather:
+            filtered_weather = filtered_weather[filtered_weather['climate_zone_code'].isin(climate_zones_weather)]
+        if data_sources:
+            filtered_weather = filtered_weather[filtered_weather['data_source'].isin(data_sources)]
+        filtered_weather = filtered_weather[
+            (filtered_weather['latitude'] >= lat_range[0]) &
+            (filtered_weather['latitude'] <= lat_range[1]) &
+            (filtered_weather['longitude'] >= lon_range[0]) &
+            (filtered_weather['longitude'] <= lon_range[1])
+        ]
+        st.subheader(f"🌍 Found {len(filtered_weather)} weather locations")
+        # Weather visualizations
+        tab1, tab2, tab3 = st.tabs(["🗺️ Map", "📊 Distribution", "📋 Table"])
+        with tab1:
+            # World map of weather locations
+            fig_map = px.scatter_mapbox(
+                filtered_weather,
+                lat='latitude',
+                lon='longitude',
+                color='climate_zone_code',
+                hover_data=['place', 'country'],
+                mapbox_style='carto-darkmatter',  # Dark theme map
+                zoom=1,
+                title='Weather Locations Worldwide'
+            )
+            fig_map.update_layout(**create_dark_theme_plotly_layout(), height=600)
+            st.plotly_chart(fig_map, use_container_width=True)
+        with tab2:
+            col1, col2 = st.columns(2)
+            with col1:
+                # Country distribution
+                country_counts = filtered_weather['country'].value_counts().head(15)
+                fig_countries = px.bar(
+                    x=country_counts.values,
+                    y=country_counts.index,
+                    orientation='h',
+                    title='Top 15 Countries by Weather Locations',
+                    color=country_counts.values,
+                    color_continuous_scale='viridis'
+                )
+                fig_countries.update_layout(**create_dark_theme_plotly_layout(), height=500)
+                st.plotly_chart(fig_countries, use_container_width=True)
+            with col2:
+                # Climate zone distribution
+                climate_counts = filtered_weather['climate_zone_code'].value_counts()
+                fig_climate = px.pie(
+                    values=climate_counts.values,
+                    names=climate_counts.index,
+                    title='Climate Zone Distribution',
+                    color_discrete_sequence=px.colors.qualitative.Set3
+                )
+                fig_climate.update_layout(**create_dark_theme_plotly_layout(), height=500)
+                st.plotly_chart(fig_climate, use_container_width=True)
+        with tab3:
+            # Weather locations table
+            st.dataframe(
+                filtered_weather,
+                use_container_width=True,
+                hide_index=True,
+                column_config={
+                    "id": st.column_config.NumberColumn("ID", width="small"),
+                    "place": st.column_config.TextColumn("Location", width="large"),
+                    "country": st.column_config.TextColumn("Country", width="small"),
+                    "climate_zone_code": st.column_config.TextColumn("Climate", width="small"),
+                    "latitude": st.column_config.NumberColumn("Latitude", format="%.2f", width="medium"),
+                    "longitude": st.column_config.NumberColumn("Longitude", format="%.2f", width="medium"),
+                    "elevation": st.column_config.NumberColumn("Elevation (m)", width="medium") if 'elevation' in filtered_weather.columns else None,
+                    "data_source": st.column_config.TextColumn("Source", width="small") if 'data_source' in filtered_weather.columns else None
+                }
+            )
+    elif current_page == "⚖️ Compare Buildings":
+        st.header("Building Comparison Tool")
+        if buildings_df.empty:
+            st.warning("No building data available for comparison.")
+            return
+        st.subheader("Select Buildings to Compare")
+        # Building selection for comparison
+        col1, col2 = st.columns(2)
+        with col1:
+            building1 = st.selectbox(
+                "Building 1:",
+                options=buildings_df['name'].tolist(),
+                key="building1"
+            )
+        with col2:
+            building2 = st.selectbox(
+                "Building 2:",
+                options=buildings_df['name'].tolist(),
+                key="building2"
+            )
+        if building1 and building2 and building1 != building2:
+            # Get building data
+            building1_data = buildings_df[buildings_df['name'] == building1].iloc[0]
+            building2_data = buildings_df[buildings_df['name'] == building2].iloc[0]
+            # Comparison display
+            st.subheader("🔍 Building Comparison")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown('<div class="comparison-highlight">', unsafe_allow_html=True)
+                st.subheader(f"🏢 {building1}")
+                st.write(f"**Type**: {building1_data['building_type']}")
+                st.write(f"**Climate Zone**: {building1_data['climate_zone']}")
+                st.write(f"**Variation**: {building1_data['variation_type']}")
+                # Add optional fields if they exist
+                for field, label in [('floor_area', 'Floor Area'), ('num_zones', 'Zones'), ('window_wall_ratio', 'WWR')]:
+                    if field in building1_data and pd.notna(building1_data[field]):
+                        if field == 'floor_area':
+                            st.write(f"**{label}**: {building1_data[field]:.0f} m²")
+                        elif field == 'window_wall_ratio':
+                            st.write(f"**{label}**: {building1_data[field]:.2%}")
+                        else:
+                            st.write(f"**{label}**: {building1_data[field]}")
+                st.markdown('</div>', unsafe_allow_html=True)
+            with col2:
+                st.markdown('<div class="comparison-highlight">', unsafe_allow_html=True)
+                st.subheader(f"🏢 {building2}")
+                st.write(f"**Type**: {building2_data['building_type']}")
+                st.write(f"**Climate Zone**: {building2_data['climate_zone']}")
+                st.write(f"**Variation**: {building2_data['variation_type']}")
+                # Add optional fields if they exist
+                for field, label in [('floor_area', 'Floor Area'), ('num_zones', 'Zones'), ('window_wall_ratio', 'WWR')]:
+                    if field in building2_data and pd.notna(building2_data[field]):
+                        if field == 'floor_area':
+                            st.write(f"**{label}**: {building2_data[field]:.0f} m²")
+                        elif field == 'window_wall_ratio':
+                            st.write(f"**{label}**: {building2_data[field]:.2%}")
+                        else:
+                            st.write(f"**{label}**: {building2_data[field]}")
+                st.markdown('</div>', unsafe_allow_html=True)
+            # Load and compare epJSON files
+            st.subheader("🔧 Technical Comparison")
+            epjson1 = load_building_epjson(building1_data['filepath'])
+            epjson2 = load_building_epjson(building2_data['filepath'])
+            if epjson1 and epjson2:
+                analysis1 = analyze_building_epjson(epjson1)
+                analysis2 = analyze_building_epjson(epjson2)
+                # Technical comparison table
+                tech_comparison = pd.DataFrame({
+                    'Component': ['Zones', 'Surfaces', 'Windows', 'HVAC Systems', 'Schedules', 'Materials'],
+                    building1: [
+                        analysis1['zones'], analysis1['surfaces'], analysis1['windows'],
+                        analysis1['hvac_systems'], analysis1['schedules'], analysis1['materials']
+                    ],
+                    building2: [
+                        analysis2['zones'], analysis2['surfaces'], analysis2['windows'],
+                        analysis2['hvac_systems'], analysis2['schedules'], analysis2['materials']
+                    ]
+                })
+                # Add difference column
+                tech_comparison['Difference'] = tech_comparison[building2] - tech_comparison[building1]
+                st.dataframe(tech_comparison, use_container_width=True)
+                # Processing status comparison
+                st.subheader("⚡ Processing Status Comparison")
+                status_comparison = pd.DataFrame({
+                    'Status': ['Has Meters', 'Has Setpoints', 'Timesteps/Hour'],
+                    building1: [
+                        "✅" if analysis1['has_meters'] else "❌",
+                        "✅" if analysis1['has_setpoints'] else "❌",
+                        str(analysis1['timestep'] or 'Not set')
+                    ],
+                    building2: [
+                        "✅" if analysis2['has_meters'] else "❌",
+                        "✅" if analysis2['has_setpoints'] else "❌",
+                        str(analysis2['timestep'] or 'Not set')
+                    ]
+                })
+                st.dataframe(status_comparison, use_container_width=True)
+        else:
+            st.info("Please select two different buildings to compare.")
+    elif current_page == "📊 Analysis & Reports":
+        st.header("Analysis & Reports")
+        if buildings_df.empty:
+            st.warning("No building data available for analysis.")
+            return
+        # Analysis options
+        analysis_type = st.selectbox(
+            "Choose analysis type:",
+            ["📈 Statistical Summary", "🔍 Data Quality Check", "📋 Detailed Report", "🎯 Custom Analysis"]
+        )
+        if analysis_type == "📈 Statistical Summary":
+            st.subheader("Statistical Summary")
+            # Numeric column statistics
+            numeric_cols = []
+            for col in ['floor_area', 'num_zones', 'window_wall_ratio']:
+                if col in buildings_df.columns:
+                    numeric_cols.append(col)
+            if numeric_cols:
+                st.write("**Numeric Properties Statistics:**")
+                stats_df = buildings_df[numeric_cols].describe()
+                st.dataframe(stats_df, use_container_width=True)
+            # Categorical distributions
+            st.write("**Categorical Distributions:**")
+            col1, col2 = st.columns(2)
+            with col1:
+                if 'building_type' in buildings_df.columns:
+                    type_dist = buildings_df['building_type'].value_counts()
+                    st.write("Building Types:")
+                    st.bar_chart(type_dist)
+            with col2:
+                if 'climate_zone' in buildings_df.columns:
+                    climate_dist = buildings_df['climate_zone'].value_counts()
+                    st.write("Climate Zones:")
+                    st.bar_chart(climate_dist)
+        elif analysis_type == "🔍 Data Quality Check":
+            st.subheader("Data Quality Assessment")
+            # Missing data check
+            missing_data = buildings_df.isnull().sum()
+            if missing_data.sum() > 0:
+                st.write("**Missing Data:**")
+                missing_df = missing_data[missing_data > 0].to_frame('Missing Count')
+                missing_df['Percentage'] = (missing_df['Missing Count'] / len(buildings_df) * 100).round(2)
+                st.dataframe(missing_df)
+            else:
+                st.success("✅ No missing data found!")
+            # Duplicate check
+            duplicates = buildings_df.duplicated().sum()
+            if duplicates > 0:
+                st.warning(f"⚠️ Found {duplicates} duplicate rows")
+            else:
+                st.success("✅ No duplicate rows found!")
+            # File existence check
+            if 'filepath' in buildings_df.columns:
+                st.write("**File Existence Check:**")
+                missing_files = []
+                for idx, row in buildings_df.iterrows():
+                    filepath = Path("data") / row['filepath']
+                    if not filepath.exists():
+                        missing_files.append(row['name'])
+                if missing_files:
+                    st.error(f"❌ {len(missing_files)} building files not found")
+                    with st.expander("Show missing files"):
+                        for file in missing_files[:10]:  # Show first 10
+                            st.write(f"- {file}")
+                        if len(missing_files) > 10:
+                            st.write(f"... and {len(missing_files) - 10} more")
+                else:
+                    st.success("✅ All building files exist!")
+        elif analysis_type == "📋 Detailed Report":
+            st.subheader("Generate Detailed Report")
+            # Report options
+            include_weather = st.checkbox("Include weather data analysis", value=True)
+            include_combinations = st.checkbox("Include combination analysis", value=True)
+            if st.button("Generate Report"):
+                with st.spinner("Generating report..."):
+                    # Generate comprehensive report
+                    report_data = {
+                        'timestamp': pd.Timestamp.now(),
+                        'buildings_total': len(buildings_df),
+                        'weather_total': len(weather_df) if not weather_df.empty else 0,
+                        'combinations_total': len(combinations_df) if not combinations_df.empty else 0
+                    }
+                    st.success("📊 Report generated successfully!")
+                    # Display key metrics
+                    metric_col1, metric_col2, metric_col3 = st.columns(3)
+                    with metric_col1:
+                        st.metric("Buildings Analyzed", report_data['buildings_total'])
+                    with metric_col2:
+                        if include_weather:
+                            st.metric("Weather Locations", report_data['weather_total'])
+                    with metric_col3:
+                        if include_combinations:
+                            st.metric("Combinations", report_data['combinations_total'])
+                    # Download report
+                    report_text = f"""
+Building Generator Analysis Report
+Generated: {report_data['timestamp']}
+Summary Statistics:
+- Total Buildings: {report_data['buildings_total']}
+- Weather Locations: {report_data['weather_total']}
+- Simulation Combinations: {report_data['combinations_total']}
+Building Type Distribution:
+{buildings_df['building_type'].value_counts().to_string() if not buildings_df.empty else 'No data'}
+Climate Zone Distribution:
+{buildings_df['climate_zone'].value_counts().to_string() if not buildings_df.empty else 'No data'}
+                    """
+                    st.download_button(
+                        label="📥 Download Report",
+                        data=report_text,
+                        file_name=f"building_analysis_report_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.txt",
+                        mime="text/plain"
+                    )
+        elif analysis_type == "🎯 Custom Analysis":
+            st.subheader("Custom Analysis")
+            st.info("🚧 Custom analysis features coming soon! This will include:")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("""
+                **Planned Features:**
+                - Building performance correlation analysis
+                - Climate impact assessment
+                - Variation effectiveness studies
+                - Energy consumption modeling
+                - Optimization recommendations
+                """)
+            with col2:
+                st.markdown("""
+                **Interactive Tools:**
+                - Custom filter combinations
+                - Advanced statistical analysis
+                - Machine learning insights
+                - Predictive modeling
+                - Export to research formats
+                """)
+if __name__ == "__main__":
+    main()