Update utils/preprocessing.py
Browse files- utils/preprocessing.py +2 -11
utils/preprocessing.py
CHANGED
|
@@ -2,22 +2,13 @@ import pandas as pd
|
|
| 2 |
from sklearn.ensemble import IsolationForest
|
| 3 |
|
| 4 |
def clean_data(file):
|
| 5 |
-
"""
|
| 6 |
-
Bersihkan data UMKM dari anomaly (outlier).
|
| 7 |
-
Contoh input: File CSV dengan kolom: tanggal, demand, supply
|
| 8 |
-
"""
|
| 9 |
-
# Baca data
|
| 10 |
df = pd.read_csv(file)
|
| 11 |
|
| 12 |
# Konversi tanggal
|
| 13 |
df['tanggal'] = pd.to_datetime(df['tanggal'])
|
| 14 |
|
| 15 |
# Deteksi anomaly
|
| 16 |
-
clf = IsolationForest(contamination=0.05
|
| 17 |
df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
|
| 18 |
|
| 19 |
-
|
| 20 |
-
clean_df = df[df['anomaly'] == 1].copy()
|
| 21 |
-
clean_df.drop('anomaly', axis=1, inplace=True)
|
| 22 |
-
|
| 23 |
-
return clean_df
|
|
|
|
| 2 |
from sklearn.ensemble import IsolationForest
|
| 3 |
|
| 4 |
def clean_data(file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
df = pd.read_csv(file)
|
| 6 |
|
| 7 |
# Konversi tanggal
|
| 8 |
df['tanggal'] = pd.to_datetime(df['tanggal'])
|
| 9 |
|
| 10 |
# Deteksi anomaly
|
| 11 |
+
clf = IsolationForest(contamination=0.05)
|
| 12 |
df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
|
| 13 |
|
| 14 |
+
return df[df['anomaly'] == 1].drop('anomaly', axis=1)
|
|
|
|
|
|
|
|
|
|
|
|