kawaiipeace commited on
Commit
4a72ef3
·
1 Parent(s): 74baf5f

Update Raliability Computation with AI

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. README.md +1 -1
  3. app.py +1 -1
  4. scripts/compute_reliability.py +39 -1
  5. scripts/summary.py +111 -14
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
README.md CHANGED
@@ -12,7 +12,7 @@ short_description: AI OMS Platform
12
  ---
13
 
14
  # OMS Analyze — Prototype
15
- > Created by PEACE, Powered by AI, Version 0.0.1
16
 
17
  Prototype Application Platform สำหรับวิเคราะห์ข้อมูลการดับไฟฟ้า (OMS - Outage Management System) โดยใช้ AI และ Machine Learning เพื่อสรุป สืบหาความผิดปกติ พยากรณ์ และจำแนกสาเหตุ
18
 
 
12
  ---
13
 
14
  # OMS Analyze — Prototype
15
+ > Created by PEACE, Powered by AI, Version 0.0.2
16
 
17
  Prototype Application Platform สำหรับวิเคราะห์ข้อมูลการดับไฟฟ้า (OMS - Outage Management System) โดยใช้ AI และ Machine Learning เพื่อสรุป สืบหาความผิดปกติ พยากรณ์ และจำแนกสาเหตุ
18
 
app.py CHANGED
@@ -33,7 +33,7 @@ def parse_row_selection(df, rows_text: str):
33
 
34
  with gr.Blocks() as demo:
35
  gr.Markdown("# OMS Analyze — Prototype")
36
- gr.Markdown("> Created by PEACE, Powered by AI, Version 0.0.1")
37
  with gr.Tabs():
38
  # Upload & Preview tab
39
  with gr.TabItem('Upload & Preview'):
 
33
 
34
  with gr.Blocks() as demo:
35
  gr.Markdown("# OMS Analyze — Prototype")
36
+ gr.Markdown("> Created by PEACE, Powered by AI, Version 0.0.2")
37
  with gr.Tabs():
38
  # Upload & Preview tab
39
  with gr.TabItem('Upload & Preview'):
scripts/compute_reliability.py CHANGED
@@ -26,6 +26,12 @@ from pathlib import Path
26
 
27
  DATE_COLS = ['OutageDateTime', 'FirstRestoDateTime', 'LastRestoDateTime', 'CreateEventDateTime', 'CloseEventDateTime']
28
 
 
 
 
 
 
 
29
 
30
  def parse_dates(df: pd.DataFrame) -> pd.DataFrame:
31
  for c in DATE_COLS:
@@ -113,16 +119,48 @@ def compute_reliability(
113
  out_dir.mkdir(parents=True, exist_ok=True)
114
 
115
  df = pd.read_csv(input_csv, dtype=str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  # parse dates
117
  df = parse_dates(df)
118
 
119
  # coalesce end time
120
  df['OutageStart'] = df.get('OutageDateTime')
121
  df['OutageEnd'] = df.apply(coalesce_end_time, axis=1)
 
122
  # compute duration in minutes
123
  df['DurationMin'] = (pd.to_datetime(df['OutageEnd']) - pd.to_datetime(df['OutageStart'])).dt.total_seconds() / 60.0
124
 
125
- # customers affected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  df['CustomersAffected'] = df.apply(estimate_customers, axis=1)
127
 
128
  # flag planned
 
26
 
27
  DATE_COLS = ['OutageDateTime', 'FirstRestoDateTime', 'LastRestoDateTime', 'CreateEventDateTime', 'CloseEventDateTime']
28
 
29
+ # Default expected columns (from repository / user description). If an input CSV has no header,
30
+ # we'll assign these names when reading with header=None.
31
+ DEFAULT_COLUMNS = [
32
+ 'Number','EventNumber','OutageDateTime','CountOfRestoStep','FirstRestoDateTime','LastRestoDateTime','FirstStepDuration','LastStepDuration','EventType','SubstationID','Feeder','OpDeviceID','OpDeviceGIStag','OpDeviceXYcoord','OpDeviceType','OpDeviceStatus','OpDevicePhase','OpVolt','OpVolt1','OpVolt2','OpDeviceSysType','Owner','FaultSysType','KnowUnknowCause','Weather','CauseType','SubCauseType','FaultDeviceType','FaultDevice','FaultDeviceCondition','Parts','DamagedParts','Manufacturer','RelayType','Relation','RelayPhase','RelayStatus','Detail','FaultDetail','SiteDetail','OpDeviceAreaID','OpDeviceSiteID','EventSiteID','AffectedAreaID','AffectedSiteID','Industrial','AffectedCustomer1','AffectedCustomer2','AffectedCustomer3','AffectedCustomer4','AffectedCustomer5','AffectedCustomer','AllStepCusXTime1','AllStepCusXTime2','AllStepCusXTime3','AllStepCusXTime4','AllStepCusXTime5','AllStepCusXTime','Capacity(kVA)','Load(MW)','CorrectionDetail','WorkOrderID','CreateEventDateTime','CloseEventDateTime'
33
+ ]
34
+
35
 
36
  def parse_dates(df: pd.DataFrame) -> pd.DataFrame:
37
  for c in DATE_COLS:
 
119
  out_dir.mkdir(parents=True, exist_ok=True)
120
 
121
  df = pd.read_csv(input_csv, dtype=str)
122
+
123
+ # If key expected columns are missing, maybe the CSV has no header. Try re-reading without header
124
+ key_cols = {'OutageDateTime', 'AffectedCustomer1', 'AllStepCusXTime1'}
125
+ if not key_cols.intersection(set(df.columns)):
126
+ try:
127
+ df_no_header = pd.read_csv(input_csv, header=None, dtype=str)
128
+ if df_no_header.shape[1] == len(DEFAULT_COLUMNS):
129
+ df_no_header.columns = DEFAULT_COLUMNS
130
+ df = df_no_header
131
+ print(f"Re-read CSV without header and assigned {len(DEFAULT_COLUMNS)} default columns")
132
+ else:
133
+ print(f"CSV appears to lack expected key columns and also did not match default column count ({df_no_header.shape[1]} != {len(DEFAULT_COLUMNS)})")
134
+ except Exception as e:
135
+ print(f"Error re-reading CSV without header: {e}")
136
+
137
  # parse dates
138
  df = parse_dates(df)
139
 
140
  # coalesce end time
141
  df['OutageStart'] = df.get('OutageDateTime')
142
  df['OutageEnd'] = df.apply(coalesce_end_time, axis=1)
143
+
144
  # compute duration in minutes
145
  df['DurationMin'] = (pd.to_datetime(df['OutageEnd']) - pd.to_datetime(df['OutageStart'])).dt.total_seconds() / 60.0
146
 
147
+ # Ensure aggregated customer columns exist: sum AffectedCustomer1..5 -> AffectedCustomer if missing
148
+ af_cols = [f'AffectedCustomer{i}' for i in range(1, 6)]
149
+ present_af = [c for c in af_cols if c in df.columns]
150
+ if 'AffectedCustomer' not in df.columns and present_af:
151
+ df[present_af] = df[present_af].apply(pd.to_numeric, errors='coerce')
152
+ df['AffectedCustomer'] = df[present_af].sum(axis=1)
153
+ print(f"Aggregated {present_af} into AffectedCustomer")
154
+
155
+ # Similarly aggregate AllStepCusXTime1..5 -> AllStepCusXTime
156
+ at_cols = [f'AllStepCusXTime{i}' for i in range(1, 6)]
157
+ present_at = [c for c in at_cols if c in df.columns]
158
+ if 'AllStepCusXTime' not in df.columns and present_at:
159
+ df[present_at] = df[present_at].apply(pd.to_numeric, errors='coerce')
160
+ df['AllStepCusXTime'] = df[present_at].sum(axis=1)
161
+ print(f"Aggregated {present_at} into AllStepCusXTime")
162
+
163
+ # estimate customers affected per event
164
  df['CustomersAffected'] = df.apply(estimate_customers, axis=1)
165
 
166
  # flag planned
scripts/summary.py CHANGED
@@ -47,6 +47,23 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
47
  if col in df_copy.columns:
48
  df_copy[col] = pd.to_datetime(df_copy[col], dayfirst=True, errors='coerce')
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # Calculate basic metrics
51
  if 'OutageDateTime' in df_copy.columns:
52
  date_range = f"{df_copy['OutageDateTime'].min()} ถึง {df_copy['OutageDateTime'].max()}" if pd.notna(df_copy['OutageDateTime'].min()) else "ไม่ระบุ"
@@ -79,45 +96,122 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
79
  from scripts.compute_reliability import compute_reliability
80
  import tempfile
81
  import os
 
82
 
83
- # Save df to temp CSV for compute_reliability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
85
  df_copy.to_csv(f.name, index=False)
86
  temp_path = f.name
87
 
88
  try:
89
- reliability_results = compute_reliability(temp_path, total_customers=total_customers, exclude_planned=True)
90
- overall_metrics = reliability_results.get('overall', pd.DataFrame())
91
- if not overall_metrics.empty:
92
- row = overall_metrics.iloc[0]
 
 
 
 
 
 
 
 
 
 
 
93
 
 
94
  # Create reliability DataFrame with proper metric names
95
  reliability_data = [
96
  {
97
  'Metric': 'SAIFI',
98
  'Full Name': 'System Average Interruption Frequency Index',
99
- 'Value': f"{row.get('SAIFI', 'N/A'):.4f}",
100
  'Unit': 'ครั้ง/ลูกค้า',
101
  'Description': 'ความถี่เฉลี่ยของการขัดข้องต่อลูกค้า'
102
  },
103
  {
104
  'Metric': 'SAIDI',
105
  'Full Name': 'System Average Interruption Duration Index',
106
- 'Value': f"{row.get('SAIDI', 'N/A'):.2f}",
107
  'Unit': 'นาที/ลูกค้า',
108
  'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อลูกค้า'
109
  },
110
  {
111
  'Metric': 'CAIDI',
112
  'Full Name': 'Customer Average Interruption Duration Index',
113
- 'Value': f"{row.get('CAIDI', 'N/A'):.2f}",
114
  'Unit': 'นาที/ครั้ง',
115
  'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อครั้ง'
116
  },
117
  {
118
  'Metric': 'MAIFI',
119
  'Full Name': 'Momentary Average Interruption Frequency Index',
120
- 'Value': f"{row.get('MAIFI', 'N/A'):.4f}",
121
  'Unit': 'ครั้ง/ลูกค้า',
122
  'Description': 'ความถี่เฉลี่ยของการขัดข้องชั่วคราวต่อลูกค้า'
123
  }
@@ -126,14 +220,17 @@ def summarize_overall(df: pd.DataFrame, use_hf: bool = False, model: str = 'meta
126
 
127
  reliability_summary = f"""
128
  ดัชนีความน่าเชื่อถือ:
129
- - SAIFI (System Average Interruption Frequency Index): {row.get('SAIFI', 'N/A'):.4f} ครั้ง/ลูกค้า
130
- - SAIDI (System Average Interruption Duration Index): {row.get('SAIDI', 'N/A'):.2f} นาที/ลูกค้า
131
- - CAIDI (Customer Average Interruption Duration Index): {row.get('CAIDI', 'N/A'):.2f} นาที/ครั้ง
132
- - MAIFI (Momentary Average Interruption Frequency Index): {row.get('MAIFI', 'N/A'):.4f} ครั้ง/ลูกค้า
133
  """
134
  summary_text += reliability_summary
135
  finally:
136
- os.unlink(temp_path)
 
 
 
137
  except Exception as e:
138
  reliability_summary = f"ไม่สามารถคำนวณดัชนีความน่าเชื่อถือได้: {str(e)}"
139
 
 
47
  if col in df_copy.columns:
48
  df_copy[col] = pd.to_datetime(df_copy[col], dayfirst=True, errors='coerce')
49
 
50
+ # Aggregate AffectedCustomer and AllStepCusXTime from numbered columns if needed
51
+ # AffectedCustomer = sum(AffectedCustomer1..AffectedCustomer5) if AffectedCustomer missing
52
+ affected_parts = [f'AffectedCustomer{i}' for i in range(1, 6)]
53
+ present_affected = [c for c in affected_parts if c in df_copy.columns]
54
+ if 'AffectedCustomer' not in df_copy.columns and present_affected:
55
+ df_copy['AffectedCustomer'] = df_copy[present_affected].apply(lambda s: pd.to_numeric(s, errors='coerce')).sum(axis=1)
56
+ elif 'AffectedCustomer' in df_copy.columns:
57
+ df_copy['AffectedCustomer'] = pd.to_numeric(df_copy['AffectedCustomer'], errors='coerce')
58
+
59
+ # AllStepCusXTime = sum(AllStepCusXTime1..AllStepCusXTime5) if missing
60
+ allstep_parts = [f'AllStepCusXTime{i}' for i in range(1, 6)]
61
+ present_allstep = [c for c in allstep_parts if c in df_copy.columns]
62
+ if 'AllStepCusXTime' not in df_copy.columns and present_allstep:
63
+ df_copy['AllStepCusXTime'] = df_copy[present_allstep].apply(lambda s: pd.to_numeric(s, errors='coerce')).sum(axis=1)
64
+ elif 'AllStepCusXTime' in df_copy.columns:
65
+ df_copy['AllStepCusXTime'] = pd.to_numeric(df_copy['AllStepCusXTime'], errors='coerce')
66
+
67
  # Calculate basic metrics
68
  if 'OutageDateTime' in df_copy.columns:
69
  date_range = f"{df_copy['OutageDateTime'].min()} ถึง {df_copy['OutageDateTime'].max()}" if pd.notna(df_copy['OutageDateTime'].min()) else "ไม่ระบุ"
 
96
  from scripts.compute_reliability import compute_reliability
97
  import tempfile
98
  import os
99
+ import json
100
 
101
+ # If AI requested, attempt to get reliability metrics from the model as JSON
102
+ ai_metrics = None
103
+ if use_hf and get_hf_token():
104
+ try:
105
+ # Detailed instruction tailored to the available columns and aggregation rules
106
+ ai_instruction = (
107
+ "คำนวณดัชนีความน่าเชื่อถือจากข้อมูลเหตุการณ์ไฟฟ้านี้ (SAIFI, SAIDI, CAIDI, MAIFI) โดยใช้คอลัมน์ที่มีดังนี้:\n"
108
+ "- OutageDateTime (เวลาเริ่มเหตุการณ์)\n"
109
+ "- LastRestoDateTime (เวลา restore สุดท้าย) — หากไม่มีให้ใช้ FirstRestoDateTime หรือ CloseEventDateTime ตามลำดับ\n"
110
+ "- AffectedCustomer1..AffectedCustomer5 (หากมี)\n"
111
+ "- AllStepCusXTime1..AllStepCusXTime5 (หากมี)\n"
112
+ "- AffectedCustomer (ถือเป็นผลรวมของ AffectedCustomer1..5 ถ้ามี)\n"
113
+ "- AllStepCusXTime (ถือเป็นผลรวมของ AllStepCusXTime1..5 ถ้ามี)\n"
114
+ "- (optional) IsPlanned/Planned\n\n"
115
+ "ให้คำนวณโดยใช้กฎต่อไปนี้:\n"
116
+ "1) แปลงเวลาเป็น datetime และคำนวณ duration_minutes = (LastRestoDateTime - OutageDateTime).total_seconds() / 60\n"
117
+ " - ถ้า LastRestoDateTime ไม่มี ให้ใช้ FirstRestoDateTime หรือ CloseEventDateTime\n"
118
+ " - ถ้า duration เป็นลบหรือไม่สามารถคำนวณ ให้ข้ามเหตุการณ์และระบุใน notes\n"
119
+ "2) หาก AffectedCustomer ไม่มี ให้คำนวณเป็น sum(AffectedCustomer1..5); ถ้า AllStepCusXTime ไม่มี ให้คำนวณเป็น sum(AllStepCusXTime1..5)\n"
120
+ "3) ยกเว้นเหตุการณ์ที่เป็น planned เมื่อ exclude_planned == True\n"
121
+ "4) กำหนด sustained ถ้า duration_minutes >= 3.0 (sustained_threshold_minutes) และ momentary ถ้า duration_minutes <= 1.0 (momentary_threshold_minutes)\n"
122
+ "5) คำนวณ aggregates:\n"
123
+ " - total_customer_interruptions = Σ AffectedCustomer สำหรับเหตุการณ์ sustained\n"
124
+ " - total_customer_minutes = Σ (AffectedCustomer * duration_minutes) สำหรับเหตุการณ์ sustained\n"
125
+ " - total_momentary_interruptions = Σ AffectedCustomer สำหรับเหตุการณ์ momentary\n"
126
+ " - total_customers_served = parameter total_customers (ถ้าไม่มีหรือ <=0 ให้คืน null ทั้งหมด)\n"
127
+ "6) สูตรดัชนี:\n"
128
+ " - SAIFI = total_customer_interruptions / total_customers_served\n"
129
+ " - SAIDI = total_customer_minutes / total_customers_served\n"
130
+ " - CAIDI = SAIDI / SAIFI (ถ้า SAIFI == 0 ให้ CAIDI = null)\n"
131
+ " - MAIFI = total_momentary_interruptions / total_customers_served\n\n"
132
+ "ปัดค่า: SAIFI, MAIFI -> 4 ทศนิยม; SAIDI, CAIDI -> 2 ทศนิยม. หลีกเลี่ยงค่า negative (ถ้า negative ให้เป็น null).\n"
133
+ "ผลลัพธ์: ตอบเป็น JSON ล้วนโดยตรงตาม schema:\n"
134
+ "{\"SAIFI\": <number|null>, \"SAIDI\": <number|null>, \"CAIDI\": <number|null>, \"MAIFI\": <number|null>, \"notes\": <string|null>}\n"
135
+ "ตัวอย่าง: {\"SAIFI\": 0.1234, \"SAIDI\": 12.34, \"CAIDI\": 100.00, \"MAIFI\": 0.0123, \"notes\": null}\n"
136
+ "ถ้าไม่สามารถคำนวณบางค่าจงคืนเป็น null และอธิบายสาเหตุใน notes (เช่น 'skipped 3 events due to missing AffectedCustomer').\n"
137
+ "โปรดตอบเป็น JSON เท่านั้น ไม่มีข้อความอื่น"
138
+ )
139
+ prompt = f"{ai_instruction}\n\nข้อมูล (CSV):\n{df_copy.head(50).to_csv(index=False)}"
140
+ ai_resp = openai_summary(prompt, verbosity='analyze', model=model)
141
+ if ai_resp:
142
+ # Attempt to extract JSON substring
143
+ text = ai_resp.strip()
144
+ # Find first { and last }
145
+ start = text.find('{')
146
+ end = text.rfind('}')
147
+ if start != -1 and end != -1 and end > start:
148
+ json_text = text[start:end+1]
149
+ try:
150
+ parsed = json.loads(json_text)
151
+ except Exception:
152
+ parsed = None
153
+ # Normalize numeric values or None
154
+ if parsed:
155
+ ai_metrics = {
156
+ 'SAIFI': float(parsed.get('SAIFI')) if parsed.get('SAIFI') is not None else None,
157
+ 'SAIDI': float(parsed.get('SAIDI')) if parsed.get('SAIDI') is not None else None,
158
+ 'CAIDI': float(parsed.get('CAIDI')) if parsed.get('CAIDI') is not None else None,
159
+ 'MAIFI': float(parsed.get('MAIFI')) if parsed.get('MAIFI') is not None else None,
160
+ }
161
+ # If parsing failed, leave ai_metrics as None and fallback to compute_reliability
162
+ except Exception:
163
+ ai_metrics = None
164
+
165
+ # Save df to temp CSV for compute_reliability (fallback / canonical calculation)
166
  with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
167
  df_copy.to_csv(f.name, index=False)
168
  temp_path = f.name
169
 
170
  try:
171
+ # If AI provided metrics successfully, use them; otherwise call compute_reliability
172
+ overall_row = None
173
+ if ai_metrics:
174
+ overall_row = ai_metrics
175
+ else:
176
+ reliability_results = compute_reliability(temp_path, total_customers=total_customers, exclude_planned=True)
177
+ overall_metrics = reliability_results.get('overall', pd.DataFrame())
178
+ if not overall_metrics.empty:
179
+ row = overall_metrics.iloc[0]
180
+ overall_row = {
181
+ 'SAIFI': row.get('SAIFI', None),
182
+ 'SAIDI': row.get('SAIDI', None),
183
+ 'CAIDI': row.get('CAIDI', None),
184
+ 'MAIFI': row.get('MAIFI', None),
185
+ }
186
 
187
+ if overall_row:
188
  # Create reliability DataFrame with proper metric names
189
  reliability_data = [
190
  {
191
  'Metric': 'SAIFI',
192
  'Full Name': 'System Average Interruption Frequency Index',
193
+ 'Value': f"{overall_row.get('SAIFI', 'N/A'):.4f}" if overall_row.get('SAIFI') is not None else 'N/A',
194
  'Unit': 'ครั้ง/ลูกค้า',
195
  'Description': 'ความถี่เฉลี่ยของการขัดข้องต่อลูกค้า'
196
  },
197
  {
198
  'Metric': 'SAIDI',
199
  'Full Name': 'System Average Interruption Duration Index',
200
+ 'Value': f"{overall_row.get('SAIDI', 'N/A'):.2f}" if overall_row.get('SAIDI') is not None else 'N/A',
201
  'Unit': 'นาที/ลูกค้า',
202
  'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อลูกค้า'
203
  },
204
  {
205
  'Metric': 'CAIDI',
206
  'Full Name': 'Customer Average Interruption Duration Index',
207
+ 'Value': f"{overall_row.get('CAIDI', 'N/A'):.2f}" if overall_row.get('CAIDI') is not None else 'N/A',
208
  'Unit': 'นาที/ครั้ง',
209
  'Description': 'ระยะเวลาขัดข้องเฉลี่ยต่อครั้ง'
210
  },
211
  {
212
  'Metric': 'MAIFI',
213
  'Full Name': 'Momentary Average Interruption Frequency Index',
214
+ 'Value': f"{overall_row.get('MAIFI', 'N/A'):.4f}" if overall_row.get('MAIFI') is not None else 'N/A',
215
  'Unit': 'ครั้ง/ลูกค้า',
216
  'Description': 'ความถี่เฉลี่ยของการขัดข้องชั่วคราวต่อลูกค้า'
217
  }
 
220
 
221
  reliability_summary = f"""
222
  ดัชนีความน่าเชื่อถือ:
223
+ - SAIFI (System Average Interruption Frequency Index): {overall_row.get('SAIFI', 'N/A') if overall_row.get('SAIFI') is not None else 'N/A'} ครั้ง/ลูกค้า
224
+ - SAIDI (System Average Interruption Duration Index): {overall_row.get('SAIDI', 'N/A') if overall_row.get('SAIDI') is not None else 'N/A'} นาที/ลูกค้า
225
+ - CAIDI (Customer Average Interruption Duration Index): {overall_row.get('CAIDI', 'N/A') if overall_row.get('CAIDI') is not None else 'N/A'} นาที/ครั้ง
226
+ - MAIFI (Momentary Average Interruption Frequency Index): {overall_row.get('MAIFI', 'N/A') if overall_row.get('MAIFI') is not None else 'N/A'} ครั้ง/ลูกค้า
227
  """
228
  summary_text += reliability_summary
229
  finally:
230
+ try:
231
+ os.unlink(temp_path)
232
+ except Exception:
233
+ pass
234
  except Exception as e:
235
  reliability_summary = f"ไม่สามารถคำนวณดัชนีความน่าเชื่อถือได้: {str(e)}"
236