BuildingBench-Dataset / streamlit_app.py
BuildingBench's picture
Upload streamlit_app.py
d7f7deb verified
def create_dataset_statistics(buildings_df: pd.DataFrame, weather_df: pd.DataFrame, combinations_df: pd.DataFrame):
"""Create comprehensive dataset statistics section"""
st.subheader("πŸ“Š Dataset Statistics & Information")
# Dataset description
st.markdown("""
<div style="background-color: #1e1e1e; border-radius: 10px; padding: 20px; margin: 20px 0;
border-left: 4px solid #2196f3; color: white;">
<h4 style="color: #2196f3; margin-bottom: 15px;">πŸ—οΈ About This Dataset</h4>
<p style="font-size: 1.1em; line-height: 1.6; margin-bottom: 10px;">
This comprehensive building energy dataset contains energy models for various building types across different climate zones.
The dataset is designed for energy simulation research, building performance analysis, and climate impact studies.
</p>
<p style="font-size: 1.1em; line-height: 1.6; margin-bottom: 10px;">
Each building model includes detailed geometric properties, construction materials, HVAC systems, and occupancy schedules.
Multiple variations are generated from base models to study the impact of different parameters on energy performance.
</p>
<p style="font-size: 1.1em; line-height: 1.6;">
Weather data is sourced from global meteorological stations and covers multiple climate zones as defined by ASHRAE standards,
enabling comprehensive climate-specific energy analysis.
</p>
</div>
""", unsafe_allow_html=True)
# Detailed statistics
col1, col2 = st.columns(2)
with col1:
st.subheader("🏒 Building Dataset Details")
if not buildings_df.empty:
# Building type breakdown
building_types = buildings_df['building_type'].value_counts()
st.markdown("**Building Types Distribution:**")
for btype, count in building_types.items():
percentage = (count / len(buildings_df)) * 100
st.write(f"β€’ **{btype.title()}**: {count} models ({percentage:.1f}%)")
st.markdown("---")
# Variation breakdown
variation_types = buildings_df['variation_type'].value_counts()
st.markdown("**Variation Types:**")
for var_type, count in variation_types.items():
percentage = (count / len(buildings_df)) * 100
st.write(f"β€’ **{var_type.title()}**: {count} models ({percentage:.1f}%)")
st.markdown("---")
# Climate zone coverage
climate_zones = buildings_df['climate_zone'].value_counts().sort_index()
st.markdown("**Climate Zone Coverage:**")
for zone, count in climate_zones.items():
st.write(f"β€’ **Zone {zone}**: {count} buildings")
else:
st.warning("No building data available")
with col2:
st.subheader("🌍 Weather Dataset Details")
if not weather_df.empty:
# Geographic coverage
st.markdown("**Geographic Coverage:**")
st.write(f"β€’ **Total Locations**: {len(weather_df)}")
st.write(f"β€’ **Countries Covered**: {weather_df['country'].nunique()}")
st.write(f"β€’ **Climate Zones**: {weather_df['climate_zone_code'].nunique()}")
# Climate zone distribution in weather data
weather_climate_zones = weather_df['climate_zone_code'].value_counts().sort_index()
st.markdown("**Weather Locations by Climate Zone:**")
for zone, count in weather_climate_zones.head(10).items():
st.write(f"β€’ **Zone {zone}**: {count} locations")
st.markdown("---")
# Top countries by location count
top_countries = weather_df['country'].value_counts().head(8)
st.markdown("**Top Countries by Weather Locations:**")
for country, count in top_countries.items():
st.write(f"β€’ **{country}**: {count} locations")
# Data sources if available
if 'data_source' in weather_df.columns:
st.markdown("---")
data_sources = weather_df['data_source'].value_counts()
st.markdown("**Data Sources:**")
for source, count in data_sources.items():
st.write(f"β€’ **{source}**: {count} files")
else:
st.warning("No weather data available")
# Dataset quality metrics
st.subheader("🎯 Dataset Quality Metrics")
quality_col1, quality_col2, quality_col3, quality_col4 = st.columns(4)
with quality_col1:
completeness = 0
if not buildings_df.empty:
total_fields = len(buildings_df.columns)
missing_fields = buildings_df.isnull().sum().sum()
total_possible = len(buildings_df) * total_fields
completeness = ((total_possible - missing_fields) / total_possible) * 100 if total_possible > 0 else 0
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
border: 1px solid #404040; color: white;">
<div style="font-size: 1.5em; margin-bottom: 5px;">πŸ“ˆ</div>
<div style="font-size: 1.8em; font-weight: bold; color: #4caf50;">{completeness:.1f}%</div>
<div style="font-size: 0.9em; opacity: 0.8;">Data Completeness</div>
</div>
""", unsafe_allow_html=True)
with quality_col2:
file_coverage = 0
if not buildings_df.empty and 'filepath' in buildings_df.columns:
existing_files = 0
for _, row in buildings_df.iterrows():
filepath = Path("data") / row['filepath']
if filepath.exists():
existing_files += 1
file_coverage = (existing_files / len(buildings_df)) * 100
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
border: 1px solid #404040; color: white;">
<div style="font-size: 1.5em; margin-bottom: 5px;">πŸ“</div>
<div style="font-size: 1.8em; font-weight: bold; color: #2196f3;">{file_coverage:.1f}%</div>
<div style="font-size: 0.9em; opacity: 0.8;">File Availability</div>
</div>
""", unsafe_allow_html=True)
with quality_col3:
diversity_score = 0
if not buildings_df.empty:
type_entropy = len(buildings_df['building_type'].unique()) / len(buildings_df) * 100
climate_entropy = len(buildings_df['climate_zone'].unique()) / len(buildings_df) * 100
diversity_score = (type_entropy + climate_entropy) / 2
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
border: 1px solid #404040; color: white;">
<div style="font-size: 1.5em; margin-bottom: 5px;">🎨</div>
<div style="font-size: 1.8em; font-weight: bold; color: #ff9800;">{diversity_score:.1f}%</div>
<div style="font-size: 0.9em; opacity: 0.8;">Dataset Diversity</div>
</div>
""", unsafe_allow_html=True)
with quality_col4:
simulation_readiness = 0
if not combinations_df.empty:
simulation_readiness = 100
elif not buildings_df.empty and not weather_df.empty:
simulation_readiness = 75
elif not buildings_df.empty or not weather_df.empty:
simulation_readiness = 50
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 10px; padding: 15px; text-align: center;
border: 1px solid #404040; color: white;">
<div style="font-size: 1.5em; margin-bottom: 5px;">⚑</div>
<div style="font-size: 1.8em; font-weight: bold; color: #9c27b0;">{simulation_readiness}%</div>
<div style="font-size: 0.9em; opacity: 0.8;">Simulation Ready</div>
</div>
""", unsafe_allow_html=True)
# Usage recommendations
st.subheader("πŸ’‘ Usage Recommendations")
recommendation_col1, recommendation_col2 = st.columns(2)
with recommendation_col1:
st.markdown("""
<div style="background-color: #1a237e; border-radius: 10px; padding: 20px; margin: 10px 0;
border-left: 4px solid #3f51b5; color: white;">
<h5 style="color: #64b5f6; margin-bottom: 15px;">πŸ”¬ Research Applications</h5>
<ul style="line-height: 1.8;">
<li>Building energy performance analysis</li>
<li>Climate change impact studies</li>
<li>HVAC system optimization</li>
<li>Retrofit strategy evaluation</li>
<li>Code compliance verification</li>
</ul>
</div>
""", unsafe_allow_html=True)
with recommendation_col2:
st.markdown("""
<div style="background-color: #1b5e20; border-radius: 10px; padding: 20px; margin: 10px 0;
border-left: 4px solid #4caf50; color: white;">
<h5 style="color: #81c784; margin-bottom: 15px;">βš™οΈ Getting Started</h5>
<ul style="line-height: 1.8;">
<li>Use <strong>Building Explorer</strong> to browse models</li>
<li>Check <strong>Weather Data</strong> for climate coverage</li>
<li>Generate combinations for simulations</li>
<li>Export filtered datasets for analysis</li>
<li>Run quality checks before processing</li>
</ul>
</div>
""", unsafe_allow_html=True)# dashboard/streamlit_app.py
"""
Building Generator Dashboard - Main Streamlit Application
Interactive web interface for exploring building energy models and weather data
"""
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from pathlib import Path
import sys
import json
from typing import Dict, List, Optional
import logging
# Add the project root to Python path
PROJECT_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from building_gen.core.pipeline import BuildingPipeline
# Configure page
st.set_page_config(
page_title="Building Generator Dashboard",
page_icon="πŸ—οΈ",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for dark theme styling
st.markdown("""
<style>
.main > div {
padding-top: 2rem;
}
/* Dark theme metric cards */
.stMetric {
background-color: #1e1e1e;
border: 1px solid #333;
border-radius: 10px;
padding: 15px;
margin: 5px 0;
color: white;
}
/* Dark theme filter container */
.filter-container {
background-color: #2d2d2d;
border: 1px solid #404040;
border-radius: 10px;
padding: 15px;
margin: 10px 0;
color: white;
}
/* Dark theme building cards */
.building-card {
border: 2px solid #404040;
border-radius: 10px;
padding: 15px;
margin: 10px 0;
background-color: #1e1e1e;
color: white;
}
/* Dark theme comparison highlight */
.comparison-highlight {
background-color: #1a237e;
border-left: 4px solid #3f51b5;
border-radius: 5px;
padding: 15px;
margin: 5px 0;
color: white;
}
/* Plotly chart dark theme */
.js-plotly-plot {
background-color: transparent !important;
}
/* Data editor dark theme */
.stDataFrame {
background-color: #1e1e1e;
}
/* Sidebar dark theme adjustments */
.css-1d391kg {
background-color: #1e1e1e;
}
/* Success/Info/Warning message styling */
.stSuccess {
background-color: #1b5e20;
border: 1px solid #4caf50;
}
.stInfo {
background-color: #0d47a1;
border: 1px solid #2196f3;
}
.stWarning {
background-color: #e65100;
border: 1px solid #ff9800;
}
.stError {
background-color: #b71c1c;
border: 1px solid #f44336;
}
</style>
""", unsafe_allow_html=True)
@st.cache_data
def load_pipeline_data(data_dir: str = "data"):
"""Load and cache pipeline data"""
try:
pipeline = BuildingPipeline(data_dir)
# Load building data
buildings_path = Path(data_dir) / "tables/buildings.csv"
buildings_df = pd.read_csv(buildings_path) if buildings_path.exists() else pd.DataFrame()
# Load weather data
weather_path = Path(data_dir) / "weather/tables/all_weather.csv"
weather_df = pd.read_csv(weather_path) if weather_path.exists() else pd.DataFrame()
# Load combinations if available
combinations_path = Path(data_dir) / "tables/building_weather_combinations.csv"
combinations_df = pd.read_csv(combinations_path) if combinations_path.exists() else pd.DataFrame()
return pipeline, buildings_df, weather_df, combinations_df
except Exception as e:
st.error(f"Failed to load data: {e}")
return None, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
def create_building_filters(buildings_df: pd.DataFrame) -> Dict:
"""Create filter widgets for buildings"""
st.markdown('<div class="filter-container">', unsafe_allow_html=True)
st.subheader("πŸ” Filter Buildings")
col1, col2, col3 = st.columns(3)
with col1:
building_types = st.multiselect(
"Building Type",
options=sorted(buildings_df['building_type'].unique()) if not buildings_df.empty else [],
default=[]
)
climate_zones = st.multiselect(
"Climate Zone",
options=sorted(buildings_df['climate_zone'].unique()) if not buildings_df.empty else [],
default=[]
)
with col2:
variation_types = st.multiselect(
"Variation Type",
options=sorted(buildings_df['variation_type'].unique()) if not buildings_df.empty else [],
default=[]
)
# Floor area range
if not buildings_df.empty and 'floor_area' in buildings_df.columns:
min_area = float(buildings_df['floor_area'].min())
max_area = float(buildings_df['floor_area'].max())
area_range = st.slider(
"Floor Area Range (mΒ²)",
min_value=min_area,
max_value=max_area,
value=(min_area, max_area),
format="%.0f"
)
else:
area_range = (0, 10000)
with col3:
# Window-to-wall ratio if available
if not buildings_df.empty and 'window_wall_ratio' in buildings_df.columns:
min_wwr = float(buildings_df['window_wall_ratio'].min())
max_wwr = float(buildings_df['window_wall_ratio'].max())
wwr_range = st.slider(
"Window-to-Wall Ratio",
min_value=min_wwr,
max_value=max_wwr,
value=(min_wwr, max_wwr),
format="%.2f"
)
else:
wwr_range = (0.0, 1.0)
# Number of zones range
if not buildings_df.empty and 'num_zones' in buildings_df.columns:
min_zones = int(buildings_df['num_zones'].min())
max_zones = int(buildings_df['num_zones'].max())
zones_range = st.slider(
"Number of Zones",
min_value=min_zones,
max_value=max_zones,
value=(min_zones, max_zones)
)
else:
zones_range = (1, 100)
st.markdown('</div>', unsafe_allow_html=True)
return {
'building_types': building_types,
'climate_zones': climate_zones,
'variation_types': variation_types,
'area_range': area_range,
'wwr_range': wwr_range,
'zones_range': zones_range
}
def apply_building_filters(buildings_df: pd.DataFrame, filters: Dict) -> pd.DataFrame:
"""Apply filters to buildings dataframe"""
filtered_df = buildings_df.copy()
if filters['building_types']:
filtered_df = filtered_df[filtered_df['building_type'].isin(filters['building_types'])]
if filters['climate_zones']:
filtered_df = filtered_df[filtered_df['climate_zone'].isin(filters['climate_zones'])]
if filters['variation_types']:
filtered_df = filtered_df[filtered_df['variation_type'].isin(filters['variation_types'])]
if 'floor_area' in filtered_df.columns:
filtered_df = filtered_df[
(filtered_df['floor_area'] >= filters['area_range'][0]) &
(filtered_df['floor_area'] <= filters['area_range'][1])
]
if 'window_wall_ratio' in filtered_df.columns:
filtered_df = filtered_df[
(filtered_df['window_wall_ratio'] >= filters['wwr_range'][0]) &
(filtered_df['window_wall_ratio'] <= filters['wwr_range'][1])
]
if 'num_zones' in filtered_df.columns:
filtered_df = filtered_df[
(filtered_df['num_zones'] >= filters['zones_range'][0]) &
(filtered_df['num_zones'] <= filters['zones_range'][1])
]
return filtered_df
def create_overview_metrics(buildings_df: pd.DataFrame, weather_df: pd.DataFrame, combinations_df: pd.DataFrame):
"""Create overview metrics display with consistent sizing and dark theme"""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
border: 1px solid #404040; color: white; height: 200px; display: flex;
flex-direction: column; justify-content: space-between;">
<div style="display: flex; align-items: center; margin-bottom: 15px;">
<span style="font-size: 2em; margin-right: 15px;">🏒</span>
<span style="font-size: 1.3em; font-weight: bold;">Buildings</span>
</div>
<div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(buildings_df)}</div>
<div style="font-size: 1em; text-align: center; opacity: 0.8;">
{len(buildings_df[buildings_df['variation_type'] != 'base']) if not buildings_df.empty else 0} variations
</div>
</div>
""", unsafe_allow_html=True)
with col2:
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
border: 1px solid #404040; color: white; height: 200px; display: flex;
flex-direction: column; justify-content: space-between;">
<div style="display: flex; align-items: center; margin-bottom: 15px;">
<span style="font-size: 2em; margin-right: 15px;">🌍</span>
<span style="font-size: 1.3em; font-weight: bold;">Weather Locations</span>
</div>
<div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(weather_df)}</div>
<div style="font-size: 1em; text-align: center; opacity: 0.8;">
{weather_df['country'].nunique() if not weather_df.empty else 0} countries
</div>
</div>
""", unsafe_allow_html=True)
with col3:
combinations_status = "Not created" if len(combinations_df) == 0 else "Ready"
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
border: 1px solid #404040; color: white; height: 200px; display: flex;
flex-direction: column; justify-content: space-between;">
<div style="display: flex; align-items: center; margin-bottom: 15px;">
<span style="font-size: 2em; margin-right: 15px;">πŸ”„</span>
<span style="font-size: 1.3em; font-weight: bold;">Combinations</span>
</div>
<div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{len(combinations_df)}</div>
<div style="font-size: 1em; text-align: center; opacity: 0.8;">
{combinations_status}
</div>
</div>
""", unsafe_allow_html=True)
with col4:
climate_zones = buildings_df['climate_zone'].nunique() if not buildings_df.empty else 0
st.markdown(f"""
<div style="background-color: #2d2d2d; border-radius: 15px; padding: 25px; margin: 10px 0;
border: 1px solid #404040; color: white; height: 200px; display: flex;
flex-direction: column; justify-content: space-between;">
<div style="display: flex; align-items: center; margin-bottom: 15px;">
<span style="font-size: 2em; margin-right: 15px;">🌑️</span>
<span style="font-size: 1.3em; font-weight: bold;">Climate Zones</span>
</div>
<div style="font-size: 3.5em; font-weight: bold; text-align: center; margin: 15px 0;">{climate_zones}</div>
<div style="font-size: 1em; text-align: center; opacity: 0.8;">
ASHRAE zones
</div>
</div>
""", unsafe_allow_html=True)
def create_dark_theme_plotly_layout():
"""Create consistent dark theme layout for Plotly charts"""
return {
'plot_bgcolor': 'rgba(0,0,0,0)',
'paper_bgcolor': 'rgba(0,0,0,0)',
'font': {'color': 'white'},
'xaxis': {
'gridcolor': '#404040',
'linecolor': '#404040',
'tickcolor': '#404040',
'color': 'white'
},
'yaxis': {
'gridcolor': '#404040',
'linecolor': '#404040',
'tickcolor': '#404040',
'color': 'white'
}
}
def create_building_characteristics_chart(buildings_df: pd.DataFrame):
"""Create building characteristics visualization with dark theme"""
if buildings_df.empty:
st.warning("No building data available")
return
tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Distribution", "πŸ—ΊοΈ Climate Zones", "πŸ—οΈ Types", "πŸ“ Properties"])
with tab1:
col1, col2 = st.columns(2)
with col1:
# Building type distribution
type_counts = buildings_df['building_type'].value_counts()
fig_types = px.pie(
values=type_counts.values,
names=type_counts.index,
title="Building Types Distribution",
color_discrete_sequence=px.colors.qualitative.Set3
)
fig_types.update_layout(**create_dark_theme_plotly_layout(), height=400)
st.plotly_chart(fig_types, use_container_width=True)
with col2:
# Variation type distribution
var_counts = buildings_df['variation_type'].value_counts()
fig_vars = px.bar(
x=var_counts.index,
y=var_counts.values,
title="Variation Types",
color=var_counts.index,
color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_vars.update_layout(**create_dark_theme_plotly_layout(), height=400, showlegend=False)
st.plotly_chart(fig_vars, use_container_width=True)
with tab2:
# Climate zone analysis
climate_counts = buildings_df['climate_zone'].value_counts()
fig_climate = px.bar(
x=climate_counts.index,
y=climate_counts.values,
title="Buildings by Climate Zone",
color=climate_counts.values,
color_continuous_scale='viridis'
)
fig_climate.update_layout(**create_dark_theme_plotly_layout(), height=400)
st.plotly_chart(fig_climate, use_container_width=True)
# Climate zone descriptions
climate_descriptions = {
'1A': 'Very Hot - Humid', '1B': 'Very Hot - Dry',
'2A': 'Hot - Humid', '2B': 'Hot - Dry',
'3A': 'Warm - Humid', '3B': 'Warm - Dry', '3C': 'Warm - Marine',
'4A': 'Mixed - Humid', '4B': 'Mixed - Dry', '4C': 'Mixed - Marine',
'5A': 'Cool - Humid', '5B': 'Cool - Dry', '5C': 'Cool - Marine',
'6A': 'Cold - Humid', '6B': 'Cold - Dry',
'7': 'Very Cold', '8': 'Subarctic'
}
st.subheader("Climate Zone Descriptions")
for zone in sorted(buildings_df['climate_zone'].unique()):
if zone in climate_descriptions:
st.info(f"**{zone}**: {climate_descriptions[zone]}")
with tab3:
# Building type details
st.subheader("Building Type Analysis")
# Check which columns exist before grouping
agg_dict = {'floor_area': ['count']}
if 'floor_area' in buildings_df.columns:
agg_dict['floor_area'] = ['count', 'mean', 'std']
if 'num_zones' in buildings_df.columns:
agg_dict['num_zones'] = ['mean', 'std']
if 'window_wall_ratio' in buildings_df.columns:
agg_dict['window_wall_ratio'] = ['mean', 'std']
type_summary = buildings_df.groupby('building_type').agg(agg_dict).round(2)
st.dataframe(type_summary, use_container_width=True)
with tab4:
# Property correlations
numeric_cols = []
for col in ['floor_area', 'num_zones', 'window_wall_ratio']:
if col in buildings_df.columns:
numeric_cols.append(col)
if len(numeric_cols) >= 2:
corr_matrix = buildings_df[numeric_cols].corr()
fig_corr = px.imshow(
corr_matrix,
color_continuous_scale='RdBu',
aspect='auto',
title='Building Property Correlations'
)
fig_corr.update_layout(**create_dark_theme_plotly_layout())
st.plotly_chart(fig_corr, use_container_width=True)
# Scatter plots
if len(numeric_cols) >= 2:
col1, col2 = st.columns(2)
with col1:
if 'floor_area' in numeric_cols and 'num_zones' in numeric_cols:
fig_scatter1 = px.scatter(
buildings_df,
x='floor_area',
y='num_zones',
color='building_type',
title='Floor Area vs Number of Zones',
hover_data=['name']
)
fig_scatter1.update_layout(**create_dark_theme_plotly_layout())
st.plotly_chart(fig_scatter1, use_container_width=True)
with col2:
if 'window_wall_ratio' in numeric_cols and 'floor_area' in numeric_cols:
fig_scatter2 = px.scatter(
buildings_df,
x='window_wall_ratio',
y='floor_area',
color='building_type',
title='Window-Wall Ratio vs Floor Area',
hover_data=['name']
)
fig_scatter2.update_layout(**create_dark_theme_plotly_layout())
st.plotly_chart(fig_scatter2, use_container_width=True)
def display_buildings_table(buildings_df: pd.DataFrame):
"""Display interactive buildings table"""
st.subheader("πŸ“‹ Buildings Database")
if buildings_df.empty:
st.warning("No buildings found matching the current filters.")
return
# Prepare column config based on available columns
column_config = {
"id": st.column_config.NumberColumn("ID", width="small"),
"name": st.column_config.TextColumn("Building Name", width="large"),
"building_type": st.column_config.TextColumn("Type", width="medium"),
"climate_zone": st.column_config.TextColumn("Climate", width="small"),
"variation_type": st.column_config.TextColumn("Variation", width="medium"),
"filepath": st.column_config.TextColumn("File Path", width="large")
}
# Add optional columns if they exist
if 'floor_area' in buildings_df.columns:
column_config["floor_area"] = st.column_config.NumberColumn("Floor Area (mΒ²)", format="%.0f", width="medium")
if 'num_zones' in buildings_df.columns:
column_config["num_zones"] = st.column_config.NumberColumn("Zones", width="small")
if 'window_wall_ratio' in buildings_df.columns:
column_config["window_wall_ratio"] = st.column_config.NumberColumn("WWR", format="%.2f", width="small")
if 'created_date' in buildings_df.columns:
column_config["created_date"] = st.column_config.DatetimeColumn("Created", width="medium")
# Display the table
selected_buildings = st.data_editor(
buildings_df,
use_container_width=True,
hide_index=True,
column_config=column_config,
disabled=list(buildings_df.columns) # Make all columns read-only
)
# Export functionality
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
if st.button("πŸ“₯ Export to CSV"):
csv = buildings_df.to_csv(index=False)
st.download_button(
label="Download CSV",
data=csv,
file_name=f"buildings_filtered_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv"
)
def load_building_epjson(filepath: str, data_dir: str = "data") -> Optional[Dict]:
"""Load building epJSON file"""
try:
full_path = Path(data_dir) / filepath
if full_path.exists():
with open(full_path, 'r') as f:
return json.load(f)
else:
st.error(f"Building file not found: {full_path}")
return None
except Exception as e:
st.error(f"Error loading building file: {e}")
return None
def analyze_building_epjson(epjson_data: Dict) -> Dict:
"""Analyze epJSON building data and extract key metrics"""
analysis = {
'zones': 0,
'surfaces': 0,
'windows': 0,
'hvac_systems': 0,
'schedules': 0,
'materials': 0,
'constructions': 0,
'has_meters': False,
'has_setpoints': False,
'timestep': None
}
# Count building components
if 'Zone' in epjson_data:
analysis['zones'] = len(epjson_data['Zone'])
if 'BuildingSurface:Detailed' in epjson_data:
analysis['surfaces'] = len(epjson_data['BuildingSurface:Detailed'])
if 'FenestrationSurface:Detailed' in epjson_data:
analysis['windows'] = len(epjson_data['FenestrationSurface:Detailed'])
if 'Schedule:Compact' in epjson_data:
analysis['schedules'] = len(epjson_data['Schedule:Compact'])
if 'Material' in epjson_data:
analysis['materials'] = len(epjson_data['Material'])
if 'Construction' in epjson_data:
analysis['constructions'] = len(epjson_data['Construction'])
# Check for HVAC systems
hvac_objects = ['AirLoopHVAC', 'PlantLoop', 'ZoneHVAC:IdealLoadsAirSystem']
analysis['hvac_systems'] = sum(len(epjson_data.get(obj, {})) for obj in hvac_objects)
# Check for meters and outputs
analysis['has_meters'] = 'Output:Meter' in epjson_data
analysis['has_setpoints'] = any('Setpoint' in key for key in epjson_data.keys())
# Get timestep
if 'Timestep' in epjson_data:
timestep_obj = list(epjson_data['Timestep'].values())[0]
analysis['timestep'] = timestep_obj.get('number_of_timesteps_per_hour', 'Unknown')
return analysis
def create_mock_energy_profile(building_name: str):
"""Create mock energy profile for demonstration with dark theme"""
st.subheader("⚑ Energy Profile (Demo)")
st.info("πŸ“ Note: This is demonstration data. Connect to actual EnergyPlus simulation results for real data.")
# Mock hourly load profile
hours = list(range(24))
base_load = 100
peak_factor = np.sin(np.array(hours) * np.pi / 12)
mock_load = base_load + 50 * peak_factor + np.random.normal(0, 10, 24)
mock_load = np.maximum(mock_load, 20) # Minimum load
# Mock monthly energy
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
heating_load = [150, 120, 80, 40, 10, 0, 0, 0, 20, 60, 100, 140]
cooling_load = [0, 0, 10, 30, 60, 100, 120, 110, 70, 30, 5, 0]
col1, col2 = st.columns(2)
with col1:
# Hourly profile
fig_hourly = px.line(
x=hours,
y=mock_load,
title="Typical Daily Load Profile",
labels={'x': 'Hour of Day', 'y': 'Power (kW)'}
)
fig_hourly.update_traces(line_color='#00d4ff')
fig_hourly.update_layout(**create_dark_theme_plotly_layout())
st.plotly_chart(fig_hourly, use_container_width=True)
with col2:
# Monthly profile
fig_monthly = go.Figure()
fig_monthly.add_trace(go.Bar(x=months, y=heating_load, name='Heating', marker_color='#ff6b6b'))
fig_monthly.add_trace(go.Bar(x=months, y=cooling_load, name='Cooling', marker_color='#4ecdc4'))
fig_monthly.update_layout(
**create_dark_theme_plotly_layout(),
title="Monthly Energy Consumption",
xaxis_title="Month",
yaxis_title="Energy (kWh/mΒ²)",
barmode='stack'
)
st.plotly_chart(fig_monthly, use_container_width=True)
def main():
"""Main Streamlit application"""
st.title("πŸ—οΈ Building Generator Dashboard")
st.markdown("Interactive exploration of building energy models and weather data")
# Load data
with st.spinner("Loading building and weather data..."):
pipeline, buildings_df, weather_df, combinations_df = load_pipeline_data()
if pipeline is None:
st.error("Failed to initialize application. Please check your data directory.")
st.info("Make sure you have run: `python scripts/main.py --create-table` and `python scripts/main.py --create-weather-table`")
return
# Sidebar for navigation
st.sidebar.title("πŸ—‚οΈ Navigation")
# Initialize session state for page navigation
if 'current_page' not in st.session_state:
st.session_state.current_page = "🏠 Overview"
# Use session state to control the selectbox
page_options = ["🏠 Overview", "🏒 Building Explorer", "🌍 Weather Data", "βš–οΈ Compare Buildings", "πŸ“Š Analysis & Reports"]
current_index = page_options.index(st.session_state.current_page) if st.session_state.current_page in page_options else 0
page = st.sidebar.selectbox(
"Choose a page:",
page_options,
index=current_index,
key="page_selector"
)
# Update session state when selectbox changes
if page != st.session_state.current_page:
st.session_state.current_page = page
# Use the current page from session state
current_page = st.session_state.current_page
if current_page == "🏠 Overview":
st.header("System Overview")
# System overview
create_overview_metrics(buildings_df, weather_df, combinations_df)
elif current_page == "🏒 Building Explorer":
st.header("Building Explorer")
if buildings_df.empty:
st.warning("No building data available. Run `python scripts/main.py --create-table` first.")
return
# Filters
filters = create_building_filters(buildings_df)
# Apply filters
filtered_buildings = apply_building_filters(buildings_df, filters)
st.subheader(f"πŸ“Š Found {len(filtered_buildings)} buildings")
# Visualizations
if not filtered_buildings.empty:
create_building_characteristics_chart(filtered_buildings)
# Buildings table
display_buildings_table(filtered_buildings)
# Building details expander
if not filtered_buildings.empty:
st.subheader("πŸ” Building Details")
selected_building = st.selectbox(
"Select a building to analyze:",
options=filtered_buildings['name'].tolist(),
index=0
)
if selected_building:
building_info = filtered_buildings[filtered_buildings['name'] == selected_building].iloc[0]
col1, col2 = st.columns([1, 2])
with col1:
st.markdown('<div class="building-card">', unsafe_allow_html=True)
st.subheader(f"πŸ“‹ {building_info['name']}")
st.write(f"**Type**: {building_info['building_type']}")
st.write(f"**Climate Zone**: {building_info['climate_zone']}")
st.write(f"**Variation**: {building_info['variation_type']}")
# Add optional fields if they exist
if 'floor_area' in building_info and pd.notna(building_info['floor_area']):
st.write(f"**Floor Area**: {building_info['floor_area']:.0f} mΒ²")
if 'num_zones' in building_info and pd.notna(building_info['num_zones']):
st.write(f"**Zones**: {building_info['num_zones']}")
if 'window_wall_ratio' in building_info and pd.notna(building_info['window_wall_ratio']):
st.write(f"**WWR**: {building_info['window_wall_ratio']:.2%}")
st.markdown('</div>', unsafe_allow_html=True)
with col2:
# Load and analyze building file
epjson_data = load_building_epjson(building_info['filepath'])
if epjson_data:
analysis = analyze_building_epjson(epjson_data)
st.subheader("πŸ”§ Building Analysis")
# Create metrics display
metric_col1, metric_col2, metric_col3 = st.columns(3)
with metric_col1:
st.metric("Zones", analysis['zones'])
st.metric("Surfaces", analysis['surfaces'])
with metric_col2:
st.metric("Windows", analysis['windows'])
st.metric("HVAC Systems", analysis['hvac_systems'])
with metric_col3:
st.metric("Schedules", analysis['schedules'])
st.metric("Materials", analysis['materials'])
# Status indicators
st.subheader("⚑ Processing Status")
status_col1, status_col2, status_col3 = st.columns(3)
with status_col1:
meter_status = "βœ… Yes" if analysis['has_meters'] else "❌ No"
st.metric("Has Meters", meter_status)
with status_col2:
setpoint_status = "βœ… Yes" if analysis['has_setpoints'] else "❌ No"
st.metric("Has Setpoints", setpoint_status)
with status_col3:
timestep_value = analysis['timestep'] or "Not set"
st.metric("Timesteps/Hour", timestep_value)
# Mock energy profile
create_mock_energy_profile(selected_building)
elif current_page == "🌍 Weather Data":
st.header("Weather Data Explorer")
if weather_df.empty:
st.warning("No weather data available. Run `python scripts/main.py --create-weather-table` first.")
return
# Weather filters
st.subheader("🌑️ Filter Weather Locations")
col1, col2, col3 = st.columns(3)
with col1:
countries = st.multiselect(
"Countries",
options=sorted(weather_df['country'].unique()),
default=[]
)
climate_zones_weather = st.multiselect(
"Climate Zones",
options=sorted(weather_df['climate_zone_code'].unique()),
default=[]
)
with col2:
if 'data_source' in weather_df.columns:
data_sources = st.multiselect(
"Data Sources",
options=weather_df['data_source'].unique(),
default=[]
)
else:
data_sources = []
lat_range = st.slider(
"Latitude Range",
min_value=float(weather_df['latitude'].min()),
max_value=float(weather_df['latitude'].max()),
value=(float(weather_df['latitude'].min()), float(weather_df['latitude'].max()))
)
with col3:
lon_range = st.slider(
"Longitude Range",
min_value=float(weather_df['longitude'].min()),
max_value=float(weather_df['longitude'].max()),
value=(float(weather_df['longitude'].min()), float(weather_df['longitude'].max()))
)
# Apply weather filters
filtered_weather = weather_df.copy()
if countries:
filtered_weather = filtered_weather[filtered_weather['country'].isin(countries)]
if climate_zones_weather:
filtered_weather = filtered_weather[filtered_weather['climate_zone_code'].isin(climate_zones_weather)]
if data_sources:
filtered_weather = filtered_weather[filtered_weather['data_source'].isin(data_sources)]
filtered_weather = filtered_weather[
(filtered_weather['latitude'] >= lat_range[0]) &
(filtered_weather['latitude'] <= lat_range[1]) &
(filtered_weather['longitude'] >= lon_range[0]) &
(filtered_weather['longitude'] <= lon_range[1])
]
st.subheader(f"🌍 Found {len(filtered_weather)} weather locations")
# Weather visualizations
tab1, tab2, tab3 = st.tabs(["πŸ—ΊοΈ Map", "πŸ“Š Distribution", "πŸ“‹ Table"])
with tab1:
# World map of weather locations
fig_map = px.scatter_mapbox(
filtered_weather,
lat='latitude',
lon='longitude',
color='climate_zone_code',
hover_data=['place', 'country'],
mapbox_style='carto-darkmatter', # Dark theme map
zoom=1,
title='Weather Locations Worldwide'
)
fig_map.update_layout(**create_dark_theme_plotly_layout(), height=600)
st.plotly_chart(fig_map, use_container_width=True)
with tab2:
col1, col2 = st.columns(2)
with col1:
# Country distribution
country_counts = filtered_weather['country'].value_counts().head(15)
fig_countries = px.bar(
x=country_counts.values,
y=country_counts.index,
orientation='h',
title='Top 15 Countries by Weather Locations',
color=country_counts.values,
color_continuous_scale='viridis'
)
fig_countries.update_layout(**create_dark_theme_plotly_layout(), height=500)
st.plotly_chart(fig_countries, use_container_width=True)
with col2:
# Climate zone distribution
climate_counts = filtered_weather['climate_zone_code'].value_counts()
fig_climate = px.pie(
values=climate_counts.values,
names=climate_counts.index,
title='Climate Zone Distribution',
color_discrete_sequence=px.colors.qualitative.Set3
)
fig_climate.update_layout(**create_dark_theme_plotly_layout(), height=500)
st.plotly_chart(fig_climate, use_container_width=True)
with tab3:
# Weather locations table
st.dataframe(
filtered_weather,
use_container_width=True,
hide_index=True,
column_config={
"id": st.column_config.NumberColumn("ID", width="small"),
"place": st.column_config.TextColumn("Location", width="large"),
"country": st.column_config.TextColumn("Country", width="small"),
"climate_zone_code": st.column_config.TextColumn("Climate", width="small"),
"latitude": st.column_config.NumberColumn("Latitude", format="%.2f", width="medium"),
"longitude": st.column_config.NumberColumn("Longitude", format="%.2f", width="medium"),
"elevation": st.column_config.NumberColumn("Elevation (m)", width="medium") if 'elevation' in filtered_weather.columns else None,
"data_source": st.column_config.TextColumn("Source", width="small") if 'data_source' in filtered_weather.columns else None
}
)
elif current_page == "βš–οΈ Compare Buildings":
st.header("Building Comparison Tool")
if buildings_df.empty:
st.warning("No building data available for comparison.")
return
st.subheader("Select Buildings to Compare")
# Building selection for comparison
col1, col2 = st.columns(2)
with col1:
building1 = st.selectbox(
"Building 1:",
options=buildings_df['name'].tolist(),
key="building1"
)
with col2:
building2 = st.selectbox(
"Building 2:",
options=buildings_df['name'].tolist(),
key="building2"
)
if building1 and building2 and building1 != building2:
# Get building data
building1_data = buildings_df[buildings_df['name'] == building1].iloc[0]
building2_data = buildings_df[buildings_df['name'] == building2].iloc[0]
# Comparison display
st.subheader("πŸ” Building Comparison")
col1, col2 = st.columns(2)
with col1:
st.markdown('<div class="comparison-highlight">', unsafe_allow_html=True)
st.subheader(f"🏒 {building1}")
st.write(f"**Type**: {building1_data['building_type']}")
st.write(f"**Climate Zone**: {building1_data['climate_zone']}")
st.write(f"**Variation**: {building1_data['variation_type']}")
# Add optional fields if they exist
for field, label in [('floor_area', 'Floor Area'), ('num_zones', 'Zones'), ('window_wall_ratio', 'WWR')]:
if field in building1_data and pd.notna(building1_data[field]):
if field == 'floor_area':
st.write(f"**{label}**: {building1_data[field]:.0f} mΒ²")
elif field == 'window_wall_ratio':
st.write(f"**{label}**: {building1_data[field]:.2%}")
else:
st.write(f"**{label}**: {building1_data[field]}")
st.markdown('</div>', unsafe_allow_html=True)
with col2:
st.markdown('<div class="comparison-highlight">', unsafe_allow_html=True)
st.subheader(f"🏒 {building2}")
st.write(f"**Type**: {building2_data['building_type']}")
st.write(f"**Climate Zone**: {building2_data['climate_zone']}")
st.write(f"**Variation**: {building2_data['variation_type']}")
# Add optional fields if they exist
for field, label in [('floor_area', 'Floor Area'), ('num_zones', 'Zones'), ('window_wall_ratio', 'WWR')]:
if field in building2_data and pd.notna(building2_data[field]):
if field == 'floor_area':
st.write(f"**{label}**: {building2_data[field]:.0f} mΒ²")
elif field == 'window_wall_ratio':
st.write(f"**{label}**: {building2_data[field]:.2%}")
else:
st.write(f"**{label}**: {building2_data[field]}")
st.markdown('</div>', unsafe_allow_html=True)
# Load and compare epJSON files
st.subheader("πŸ”§ Technical Comparison")
epjson1 = load_building_epjson(building1_data['filepath'])
epjson2 = load_building_epjson(building2_data['filepath'])
if epjson1 and epjson2:
analysis1 = analyze_building_epjson(epjson1)
analysis2 = analyze_building_epjson(epjson2)
# Technical comparison table
tech_comparison = pd.DataFrame({
'Component': ['Zones', 'Surfaces', 'Windows', 'HVAC Systems', 'Schedules', 'Materials'],
building1: [
analysis1['zones'], analysis1['surfaces'], analysis1['windows'],
analysis1['hvac_systems'], analysis1['schedules'], analysis1['materials']
],
building2: [
analysis2['zones'], analysis2['surfaces'], analysis2['windows'],
analysis2['hvac_systems'], analysis2['schedules'], analysis2['materials']
]
})
# Add difference column
tech_comparison['Difference'] = tech_comparison[building2] - tech_comparison[building1]
st.dataframe(tech_comparison, use_container_width=True)
# Processing status comparison
st.subheader("⚑ Processing Status Comparison")
status_comparison = pd.DataFrame({
'Status': ['Has Meters', 'Has Setpoints', 'Timesteps/Hour'],
building1: [
"βœ…" if analysis1['has_meters'] else "❌",
"βœ…" if analysis1['has_setpoints'] else "❌",
str(analysis1['timestep'] or 'Not set')
],
building2: [
"βœ…" if analysis2['has_meters'] else "❌",
"βœ…" if analysis2['has_setpoints'] else "❌",
str(analysis2['timestep'] or 'Not set')
]
})
st.dataframe(status_comparison, use_container_width=True)
else:
st.info("Please select two different buildings to compare.")
elif current_page == "πŸ“Š Analysis & Reports":
st.header("Analysis & Reports")
if buildings_df.empty:
st.warning("No building data available for analysis.")
return
# Analysis options
analysis_type = st.selectbox(
"Choose analysis type:",
["πŸ“ˆ Statistical Summary", "πŸ” Data Quality Check", "πŸ“‹ Detailed Report", "🎯 Custom Analysis"]
)
if analysis_type == "πŸ“ˆ Statistical Summary":
st.subheader("Statistical Summary")
# Numeric column statistics
numeric_cols = []
for col in ['floor_area', 'num_zones', 'window_wall_ratio']:
if col in buildings_df.columns:
numeric_cols.append(col)
if numeric_cols:
st.write("**Numeric Properties Statistics:**")
stats_df = buildings_df[numeric_cols].describe()
st.dataframe(stats_df, use_container_width=True)
# Categorical distributions
st.write("**Categorical Distributions:**")
col1, col2 = st.columns(2)
with col1:
if 'building_type' in buildings_df.columns:
type_dist = buildings_df['building_type'].value_counts()
st.write("Building Types:")
st.bar_chart(type_dist)
with col2:
if 'climate_zone' in buildings_df.columns:
climate_dist = buildings_df['climate_zone'].value_counts()
st.write("Climate Zones:")
st.bar_chart(climate_dist)
elif analysis_type == "πŸ” Data Quality Check":
st.subheader("Data Quality Assessment")
# Missing data check
missing_data = buildings_df.isnull().sum()
if missing_data.sum() > 0:
st.write("**Missing Data:**")
missing_df = missing_data[missing_data > 0].to_frame('Missing Count')
missing_df['Percentage'] = (missing_df['Missing Count'] / len(buildings_df) * 100).round(2)
st.dataframe(missing_df)
else:
st.success("βœ… No missing data found!")
# Duplicate check
duplicates = buildings_df.duplicated().sum()
if duplicates > 0:
st.warning(f"⚠️ Found {duplicates} duplicate rows")
else:
st.success("βœ… No duplicate rows found!")
# File existence check
if 'filepath' in buildings_df.columns:
st.write("**File Existence Check:**")
missing_files = []
for idx, row in buildings_df.iterrows():
filepath = Path("data") / row['filepath']
if not filepath.exists():
missing_files.append(row['name'])
if missing_files:
st.error(f"❌ {len(missing_files)} building files not found")
with st.expander("Show missing files"):
for file in missing_files[:10]: # Show first 10
st.write(f"- {file}")
if len(missing_files) > 10:
st.write(f"... and {len(missing_files) - 10} more")
else:
st.success("βœ… All building files exist!")
elif analysis_type == "πŸ“‹ Detailed Report":
st.subheader("Generate Detailed Report")
# Report options
include_weather = st.checkbox("Include weather data analysis", value=True)
include_combinations = st.checkbox("Include combination analysis", value=True)
if st.button("Generate Report"):
with st.spinner("Generating report..."):
# Generate comprehensive report
report_data = {
'timestamp': pd.Timestamp.now(),
'buildings_total': len(buildings_df),
'weather_total': len(weather_df) if not weather_df.empty else 0,
'combinations_total': len(combinations_df) if not combinations_df.empty else 0
}
st.success("πŸ“Š Report generated successfully!")
# Display key metrics
metric_col1, metric_col2, metric_col3 = st.columns(3)
with metric_col1:
st.metric("Buildings Analyzed", report_data['buildings_total'])
with metric_col2:
if include_weather:
st.metric("Weather Locations", report_data['weather_total'])
with metric_col3:
if include_combinations:
st.metric("Combinations", report_data['combinations_total'])
# Download report
report_text = f"""
Building Generator Analysis Report
Generated: {report_data['timestamp']}
Summary Statistics:
- Total Buildings: {report_data['buildings_total']}
- Weather Locations: {report_data['weather_total']}
- Simulation Combinations: {report_data['combinations_total']}
Building Type Distribution:
{buildings_df['building_type'].value_counts().to_string() if not buildings_df.empty else 'No data'}
Climate Zone Distribution:
{buildings_df['climate_zone'].value_counts().to_string() if not buildings_df.empty else 'No data'}
"""
st.download_button(
label="πŸ“₯ Download Report",
data=report_text,
file_name=f"building_analysis_report_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain"
)
elif analysis_type == "🎯 Custom Analysis":
st.subheader("Custom Analysis")
st.info("🚧 Custom analysis features coming soon! This will include:")
col1, col2 = st.columns(2)
with col1:
st.markdown("""
**Planned Features:**
- Building performance correlation analysis
- Climate impact assessment
- Variation effectiveness studies
- Energy consumption modeling
- Optimization recommendations
""")
with col2:
st.markdown("""
**Interactive Tools:**
- Custom filter combinations
- Advanced statistical analysis
- Machine learning insights
- Predictive modeling
- Export to research formats
""")
if __name__ == "__main__":
main()