Callmebowoo-22 commited on
Commit
5857fcf
·
verified ·
1 Parent(s): 0290183

Update utils/preprocessing.py

Browse files
Files changed (1) hide show
  1. utils/preprocessing.py +2 -11
utils/preprocessing.py CHANGED
@@ -2,22 +2,13 @@ import pandas as pd
2
  from sklearn.ensemble import IsolationForest
3
 
4
  def clean_data(file):
5
- """
6
- Bersihkan data UMKM dari anomaly (outlier).
7
- Contoh input: File CSV dengan kolom: tanggal, demand, supply
8
- """
9
- # Baca data
10
  df = pd.read_csv(file)
11
 
12
  # Konversi tanggal
13
  df['tanggal'] = pd.to_datetime(df['tanggal'])
14
 
15
  # Deteksi anomaly
16
- clf = IsolationForest(contamination=0.05, random_state=42)
17
  df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
18
 
19
- # Filter data bersih
20
- clean_df = df[df['anomaly'] == 1].copy()
21
- clean_df.drop('anomaly', axis=1, inplace=True)
22
-
23
- return clean_df
 
2
  from sklearn.ensemble import IsolationForest
3
 
4
  def clean_data(file):
 
 
 
 
 
5
  df = pd.read_csv(file)
6
 
7
  # Konversi tanggal
8
  df['tanggal'] = pd.to_datetime(df['tanggal'])
9
 
10
  # Deteksi anomaly
11
+ clf = IsolationForest(contamination=0.05)
12
  df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
13
 
14
+ return df[df['anomaly'] == 1].drop('anomaly', axis=1)