Spaces:

kawaiipeace
/

AI-OMS-Analyze

Sleeping

App Files Files Community

kawaiipeace commited on 30 days ago

Commit

4a72ef3

1 Parent(s): 74baf5f

Update Raliability Computation with AI

Browse files

Files changed (5) hide show

.DS_Store +0 -0
README.md +1 -1
app.py +1 -1
scripts/compute_reliability.py +39 -1
scripts/summary.py +111 -14

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ short_description: AI OMS Platform
 ---
 # OMS Analyze — Prototype
-> Created by PEACE, Powered by AI, Version 0.0.1
 Prototype Application Platform สำหรับวิเคราะห์ข้อมูลการดับไฟฟ้า (OMS - Outage Management System) โดยใช้ AI และ Machine Learning เพื่อสรุป สืบหาความผิดปกติ พยากรณ์ และจำแนกสาเหตุ

 ---
 # OMS Analyze — Prototype
+> Created by PEACE, Powered by AI, Version 0.0.2
 Prototype Application Platform สำหรับวิเคราะห์ข้อมูลการดับไฟฟ้า (OMS - Outage Management System) โดยใช้ AI และ Machine Learning เพื่อสรุป สืบหาความผิดปกติ พยากรณ์ และจำแนกสาเหตุ

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ def parse_row_selection(df, rows_text: str):
 with gr.Blocks() as demo:
     gr.Markdown("# OMS Analyze — Prototype")
-    gr.Markdown("> Created by PEACE, Powered by AI, Version 0.0.1")
     with gr.Tabs():
         # Upload & Preview tab
         with gr.TabItem('Upload & Preview'):

 with gr.Blocks() as demo:
     gr.Markdown("# OMS Analyze — Prototype")
+    gr.Markdown("> Created by PEACE, Powered by AI, Version 0.0.2")
     with gr.Tabs():
         # Upload & Preview tab
         with gr.TabItem('Upload & Preview'):

scripts/compute_reliability.py CHANGED Viewed

@@ -26,6 +26,12 @@ from pathlib import Path
 DATE_COLS = ['OutageDateTime', 'FirstRestoDateTime', 'LastRestoDateTime', 'CreateEventDateTime', 'CloseEventDateTime']
 def parse_dates(df: pd.DataFrame) -> pd.DataFrame:
     for c in DATE_COLS:
@@ -113,16 +119,48 @@ def compute_reliability(
     out_dir.mkdir(parents=True, exist_ok=True)
     df = pd.read_csv(input_csv, dtype=str)
     # parse dates
     df = parse_dates(df)
     # coalesce end time
     df['OutageStart'] = df.get('OutageDateTime')
     df['OutageEnd'] = df.apply(coalesce_end_time, axis=1)
     # compute duration in minutes
     df['DurationMin'] = (pd.to_datetime(df['OutageEnd']) - pd.to_datetime(df['OutageStart'])).dt.total_seconds() / 60.0
-    # customers affected
     df['CustomersAffected'] = df.apply(estimate_customers, axis=1)
     # flag planned

 DATE_COLS = ['OutageDateTime', 'FirstRestoDateTime', 'LastRestoDateTime', 'CreateEventDateTime', 'CloseEventDateTime']
+# Default expected columns (from repository / user description). If an input CSV has no header,
+# we'll assign these names when reading with header=None.
+DEFAULT_COLUMNS = [
+    'Number','EventNumber','OutageDateTime','CountOfRestoStep','FirstRestoDateTime','LastRestoDateTime','FirstStepDuration','LastStepDuration','EventType','SubstationID','Feeder','OpDeviceID','OpDeviceGIStag','OpDeviceXYcoord','OpDeviceType','OpDeviceStatus','OpDevicePhase','OpVolt','OpVolt1','OpVolt2','OpDeviceSysType','Owner','FaultSysType','KnowUnknowCause','Weather','CauseType','SubCauseType','FaultDeviceType','FaultDevice','FaultDeviceCondition','Parts','DamagedParts','Manufacturer','RelayType','Relation','RelayPhase','RelayStatus','Detail','FaultDetail','SiteDetail','OpDeviceAreaID','OpDeviceSiteID','EventSiteID','AffectedAreaID','AffectedSiteID','Industrial','AffectedCustomer1','AffectedCustomer2','AffectedCustomer3','AffectedCustomer4','AffectedCustomer5','AffectedCustomer','AllStepCusXTime1','AllStepCusXTime2','AllStepCusXTime3','AllStepCusXTime4','AllStepCusXTime5','AllStepCusXTime','Capacity(kVA)','Load(MW)','CorrectionDetail','WorkOrderID','CreateEventDateTime','CloseEventDateTime'
+]
 def parse_dates(df: pd.DataFrame) -> pd.DataFrame:
     for c in DATE_COLS:
     out_dir.mkdir(parents=True, exist_ok=True)
     df = pd.read_csv(input_csv, dtype=str)
+    # If key expected columns are missing, maybe the CSV has no header. Try re-reading without header
+    key_cols = {'OutageDateTime', 'AffectedCustomer1', 'AllStepCusXTime1'}
+    if not key_cols.intersection(set(df.columns)):
+        try:
+            df_no_header = pd.read_csv(input_csv, header=None, dtype=str)
+            if df_no_header.shape[1] == len(DEFAULT_COLUMNS):
+                df_no_header.columns = DEFAULT_COLUMNS
+                df = df_no_header
+                print(f"Re-read CSV without header and assigned {len(DEFAULT_COLUMNS)} default columns")
+            else:
+                print(f"CSV appears to lack expected key columns and also did not match default column count ({df_no_header.shape[1]} != {len(DEFAULT_COLUMNS)})")
+        except Exception as e:
+            print(f"Error re-reading CSV without header: {e}")
     # parse dates
     df = parse_dates(df)
     # coalesce end time
     df['OutageStart'] = df.get('OutageDateTime')
     df['OutageEnd'] = df.apply(coalesce_end_time, axis=1)
     # compute duration in minutes
     df['DurationMin'] = (pd.to_datetime(df['OutageEnd']) - pd.to_datetime(df['OutageStart'])).dt.total_seconds() / 60.0
+    # Ensure aggregated customer columns exist: sum AffectedCustomer1..5 -> AffectedCustomer if missing
+    af_cols = [f'AffectedCustomer{i}' for i in range(1, 6)]
+    present_af = [c for c in af_cols if c in df.columns]
+    if 'AffectedCustomer' not in df.columns and present_af:
+        df[present_af] = df[present_af].apply(pd.to_numeric, errors='coerce')
+        df['AffectedCustomer'] = df[present_af].sum(axis=1)
+        print(f"Aggregated {present_af} into AffectedCustomer")
+    # Similarly aggregate AllStepCusXTime1..5 -> AllStepCusXTime
+    at_cols = [f'AllStepCusXTime{i}' for i in range(1, 6)]
+    present_at = [c for c in at_cols if c in df.columns]
+    if 'AllStepCusXTime' not in df.columns and present_at:
+        df[present_at] = df[present_at].apply(pd.to_numeric, errors='coerce')
+        df['AllStepCusXTime'] = df[present_at].sum(axis=1)
+        print(f"Aggregated {present_at} into AllStepCusXTime")
+    # estimate customers affected per event
     df['CustomersAffected'] = df.apply(estimate_customers, axis=1)
     # flag planned

scripts/summary.py CHANGED Viewed

@@ -47,6 +47,23 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
         if col in df_copy.columns:
             df_copy[col] = pd.to_datetime(df_copy[col], dayfirst=True, errors='coerce')
     # Calculate basic metrics
     if 'OutageDateTime' in df_copy.columns:
         date_range = f"{df_copy['OutageDateTime'].min()} ถึง {df_copy['OutageDateTime'].max()}" if pd.notna(df_copy['OutageDateTime'].min()) else "ไม่ระบุ"
@@ -79,45 +96,122 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
             from scripts.compute_reliability import compute_reliability
             import tempfile
             import os
-            # Save df to temp CSV for compute_reliability
             with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
                 df_copy.to_csv(f.name, index=False)
                 temp_path = f.name
             try:
-                reliability_results = compute_reliability(temp_path, total_customers=total_customers, exclude_planned=True)
-                overall_metrics = reliability_results.get('overall', pd.DataFrame())
-                if not overall_metrics.empty:
-                    row = overall_metrics.iloc[0]
                     # Create reliability DataFrame with proper metric names
                     reliability_data = [
                         {
                             'Metric': 'SAIFI',
                             'Full Name': 'System Average Interruption Frequency Index',
-                            'Value': f"{row.get('SAIFI', 'N/A'):.4f}",
                             'Unit': 'ครั้ง/ลูกค้า',
                             'Description': 'ความถี่เฉลี่ยของการขัดข้องต่อลูกค้า'
                         },
                         {
                             'Metric': 'SAIDI',
                             'Full Name': 'System Average Interruption Duration Index',
-                            'Value': f"{row.get('SAIDI', 'N/A'):.2f}",
                             'Unit': 'นาที/ลูกค้า',
                             'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อลูกค้า'
                         },
                         {
                             'Metric': 'CAIDI',
                             'Full Name': 'Customer Average Interruption Duration Index',
-                            'Value': f"{row.get('CAIDI', 'N/A'):.2f}",
                             'Unit': 'นาที/ครั้ง',
                             'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อครั้ง'
                         },
                         {
                             'Metric': 'MAIFI',
                             'Full Name': 'Momentary Average Interruption Frequency Index',
-                            'Value': f"{row.get('MAIFI', 'N/A'):.4f}",
                             'Unit': 'ครั้ง/ลูกค้า',
                             'Description': 'ความถี่เฉลี่ยของการขัดข้องชั่วคราวต่อลูกค้า'
                         }
@@ -126,14 +220,17 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
                     reliability_summary = f"""
 ดัชนีความน่าเชื่อถือ:
-- SAIFI (System Average Interruption Frequency Index): {row.get('SAIFI', 'N/A'):.4f} ครั้ง/ลูกค้า
-- SAIDI (System Average Interruption Duration Index): {row.get('SAIDI', 'N/A'):.2f} นาที/ลูกค้า
-- CAIDI (Customer Average Interruption Duration Index): {row.get('CAIDI', 'N/A'):.2f} นาที/ครั้ง
-- MAIFI (Momentary Average Interruption Frequency Index): {row.get('MAIFI', 'N/A'):.4f} ครั้ง/ลูกค้า
 """
                     summary_text += reliability_summary
             finally:
-                os.unlink(temp_path)
         except Exception as e:
             reliability_summary = f"ไม่สามารถคำนวณดัชนีความน่าเชื่อถือได้: {str(e)}"

         if col in df_copy.columns:
             df_copy[col] = pd.to_datetime(df_copy[col], dayfirst=True, errors='coerce')
+    # Aggregate AffectedCustomer and AllStepCusXTime from numbered columns if needed
+    # AffectedCustomer = sum(AffectedCustomer1..AffectedCustomer5) if AffectedCustomer missing
+    affected_parts = [f'AffectedCustomer{i}' for i in range(1, 6)]
+    present_affected = [c for c in affected_parts if c in df_copy.columns]
+    if 'AffectedCustomer' not in df_copy.columns and present_affected:
+        df_copy['AffectedCustomer'] = df_copy[present_affected].apply(lambda s: pd.to_numeric(s, errors='coerce')).sum(axis=1)
+    elif 'AffectedCustomer' in df_copy.columns:
+        df_copy['AffectedCustomer'] = pd.to_numeric(df_copy['AffectedCustomer'], errors='coerce')
+    # AllStepCusXTime = sum(AllStepCusXTime1..AllStepCusXTime5) if missing
+    allstep_parts = [f'AllStepCusXTime{i}' for i in range(1, 6)]
+    present_allstep = [c for c in allstep_parts if c in df_copy.columns]
+    if 'AllStepCusXTime' not in df_copy.columns and present_allstep:
+        df_copy['AllStepCusXTime'] = df_copy[present_allstep].apply(lambda s: pd.to_numeric(s, errors='coerce')).sum(axis=1)
+    elif 'AllStepCusXTime' in df_copy.columns:
+        df_copy['AllStepCusXTime'] = pd.to_numeric(df_copy['AllStepCusXTime'], errors='coerce')
     # Calculate basic metrics
     if 'OutageDateTime' in df_copy.columns:
         date_range = f"{df_copy['OutageDateTime'].min()} ถึง {df_copy['OutageDateTime'].max()}" if pd.notna(df_copy['OutageDateTime'].min()) else "ไม่ระบุ"
             from scripts.compute_reliability import compute_reliability
             import tempfile
             import os
+            import json
+            # If AI requested, attempt to get reliability metrics from the model as JSON
+            ai_metrics = None
+            if use_hf and get_hf_token():
+                try:
+                    # Detailed instruction tailored to the available columns and aggregation rules
+                    ai_instruction = (
+                        "คำนวณดัชนีความน่าเชื่อถือจากข้อมูลเหตุการณ์ไฟฟ้านี้ (SAIFI, SAIDI, CAIDI, MAIFI) โดยใช้คอลัมน์ที่มีดังนี้:\n"
+                        "- OutageDateTime (เวลาเริ่มเหตุการณ์)\n"
+                        "- LastRestoDateTime (เวลา restore สุดท้าย) — หากไม่มีให้ใช้ FirstRestoDateTime หรือ CloseEventDateTime ตามลำดับ\n"
+                        "- AffectedCustomer1..AffectedCustomer5 (หากมี)\n"
+                        "- AllStepCusXTime1..AllStepCusXTime5 (หากมี)\n"
+                        "- AffectedCustomer (ถือเป็นผลรวมของ AffectedCustomer1..5 ถ้ามี)\n"
+                        "- AllStepCusXTime (ถือเป็นผลรวมของ AllStepCusXTime1..5 ถ้ามี)\n"
+                        "- (optional) IsPlanned/Planned\n\n"
+                        "ให้คำนวณโดยใช้กฎต่อไปนี้:\n"
+                        "1) แปลงเวลาเป็น datetime และคำนวณ duration_minutes = (LastRestoDateTime - OutageDateTime).total_seconds() / 60\n"
+                        "   - ถ้า LastRestoDateTime ไม่มี ให้ใช้ FirstRestoDateTime หรือ CloseEventDateTime\n"
+                        "   - ถ้า duration เป็นลบหรือไม่สามารถคำนวณ ให้ข้ามเหตุการณ์และระบุใน notes\n"
+                        "2) หาก AffectedCustomer ไม่มี ให้คำนวณเป็น sum(AffectedCustomer1..5); ถ้า AllStepCusXTime ไม่มี ให้คำนวณเป็น sum(AllStepCusXTime1..5)\n"
+                        "3) ยกเว้นเหตุการณ์ที่เป็น planned เมื่อ exclude_planned == True\n"
+                        "4) กำหนด sustained ถ้า duration_minutes >= 3.0 (sustained_threshold_minutes) และ momentary ถ้า duration_minutes <= 1.0 (momentary_threshold_minutes)\n"
+                        "5) คำนวณ aggregates:\n"
+                        "   - total_customer_interruptions = Σ AffectedCustomer สำหรับเหตุการณ์ sustained\n"
+                        "   - total_customer_minutes = Σ (AffectedCustomer * duration_minutes) สำหรับเหตุการณ์ sustained\n"
+                        "   - total_momentary_interruptions = Σ AffectedCustomer สำหรับเหตุการณ์ momentary\n"
+                        "   - total_customers_served = parameter total_customers (ถ้าไม่มีหรือ <=0 ให้คืน null ทั้งหมด)\n"
+                        "6) สูตรดัชนี:\n"
+                        "   - SAIFI = total_customer_interruptions / total_customers_served\n"
+                        "   - SAIDI = total_customer_minutes / total_customers_served\n"
+                        "   - CAIDI = SAIDI / SAIFI (ถ้า SAIFI == 0 ให้ CAIDI = null)\n"
+                        "   - MAIFI = total_momentary_interruptions / total_customers_served\n\n"
+                        "ปัดค่า: SAIFI, MAIFI -> 4 ทศนิยม; SAIDI, CAIDI -> 2 ทศนิยม. หลีกเลี่ยงค่า negative (ถ้า negative ให้เป็น null).\n"
+                        "ผลลัพธ์: ตอบเป็น JSON ล้วนโดยตรงตาม schema:\n"
+                        "{\"SAIFI\": <number|null>, \"SAIDI\": <number|null>, \"CAIDI\": <number|null>, \"MAIFI\": <number|null>, \"notes\": <string|null>}\n"
+                        "ตัวอย่าง: {\"SAIFI\": 0.1234, \"SAIDI\": 12.34, \"CAIDI\": 100.00, \"MAIFI\": 0.0123, \"notes\": null}\n"
+                        "ถ้าไม่สามารถคำนวณบางค่าจงคืนเป็น null และอธิบายสาเหตุใน notes (เช่น 'skipped 3 events due to missing AffectedCustomer').\n"
+                        "โปรดตอบเป็น JSON เท่านั้น ไม่มีข้อความอื่น"
+                    )
+                    prompt = f"{ai_instruction}\n\nข้อมูล (CSV):\n{df_copy.head(50).to_csv(index=False)}"
+                    ai_resp = openai_summary(prompt, verbosity='analyze', model=model)
+                    if ai_resp:
+                        # Attempt to extract JSON substring
+                        text = ai_resp.strip()
+                        # Find first { and last }
+                        start = text.find('{')
+                        end = text.rfind('}')
+                        if start != -1 and end != -1 and end > start:
+                            json_text = text[start:end+1]
+                            try:
+                                parsed = json.loads(json_text)
+                            except Exception:
+                                parsed = None
+                            # Normalize numeric values or None
+                            if parsed:
+                                ai_metrics = {
+                                    'SAIFI': float(parsed.get('SAIFI')) if parsed.get('SAIFI') is not None else None,
+                                    'SAIDI': float(parsed.get('SAIDI')) if parsed.get('SAIDI') is not None else None,
+                                    'CAIDI': float(parsed.get('CAIDI')) if parsed.get('CAIDI') is not None else None,
+                                    'MAIFI': float(parsed.get('MAIFI')) if parsed.get('MAIFI') is not None else None,
+                                }
+                        # If parsing failed, leave ai_metrics as None and fallback to compute_reliability
+                except Exception:
+                    ai_metrics = None
+            # Save df to temp CSV for compute_reliability (fallback / canonical calculation)
             with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
                 df_copy.to_csv(f.name, index=False)
                 temp_path = f.name
             try:
+                # If AI provided metrics successfully, use them; otherwise call compute_reliability
+                overall_row = None
+                if ai_metrics:
+                    overall_row = ai_metrics
+                else:
+                    reliability_results = compute_reliability(temp_path, total_customers=total_customers, exclude_planned=True)
+                    overall_metrics = reliability_results.get('overall', pd.DataFrame())
+                    if not overall_metrics.empty:
+                        row = overall_metrics.iloc[0]
+                        overall_row = {
+                            'SAIFI': row.get('SAIFI', None),
+                            'SAIDI': row.get('SAIDI', None),
+                            'CAIDI': row.get('CAIDI', None),
+                            'MAIFI': row.get('MAIFI', None),
+                        }
+                if overall_row:
                     # Create reliability DataFrame with proper metric names
                     reliability_data = [
                         {
                             'Metric': 'SAIFI',
                             'Full Name': 'System Average Interruption Frequency Index',
+                            'Value': f"{overall_row.get('SAIFI', 'N/A'):.4f}" if overall_row.get('SAIFI') is not None else 'N/A',
                             'Unit': 'ครั้ง/ลูกค้า',
                             'Description': 'ความถี่เฉลี่ยของการขัดข้องต่อลูกค้า'
                         },
                         {
                             'Metric': 'SAIDI',
                             'Full Name': 'System Average Interruption Duration Index',
+                            'Value': f"{overall_row.get('SAIDI', 'N/A'):.2f}" if overall_row.get('SAIDI') is not None else 'N/A',
                             'Unit': 'นาที/ลูกค้า',
                             'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อลูกค้า'
                         },
                         {
                             'Metric': 'CAIDI',
                             'Full Name': 'Customer Average Interruption Duration Index',
+                            'Value': f"{overall_row.get('CAIDI', 'N/A'):.2f}" if overall_row.get('CAIDI') is not None else 'N/A',
                             'Unit': 'นาที/ครั้ง',
                             'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อครั้ง'
                         },
                         {
                             'Metric': 'MAIFI',
                             'Full Name': 'Momentary Average Interruption Frequency Index',
+                            'Value': f"{overall_row.get('MAIFI', 'N/A'):.4f}" if overall_row.get('MAIFI') is not None else 'N/A',
                             'Unit': 'ครั้ง/ลูกค้า',
                             'Description': 'ความถี่เฉลี่ยของการขัดข้องชั่วคราวต่อลูกค้า'
                         }
                     reliability_summary = f"""
 ดัชนีความน่าเชื่อถือ:
+- SAIFI (System Average Interruption Frequency Index): {overall_row.get('SAIFI', 'N/A') if overall_row.get('SAIFI') is not None else 'N/A'} ครั้ง/ลูกค้า
+- SAIDI (System Average Interruption Duration Index): {overall_row.get('SAIDI', 'N/A') if overall_row.get('SAIDI') is not None else 'N/A'} นาที/ลูกค้า
+- CAIDI (Customer Average Interruption Duration Index): {overall_row.get('CAIDI', 'N/A') if overall_row.get('CAIDI') is not None else 'N/A'} นาที/ครั้ง
+- MAIFI (Momentary Average Interruption Frequency Index): {overall_row.get('MAIFI', 'N/A') if overall_row.get('MAIFI') is not None else 'N/A'} ครั้ง/ลูกค้า
 """
                     summary_text += reliability_summary
             finally:
+                try:
+                    os.unlink(temp_path)
+                except Exception:
+                    pass
         except Exception as e:
             reliability_summary = f"ไม่สามารถคำนวณดัชนีความน่าเชื่อถือได้: {str(e)}"