Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import pandas as pd, numpy as np | |
| from typing import Dict | |
| from validation import _prepare_generic | |
| CR_EXPECTED = { | |
| "customer_id":["cust_id","user_id","client_id"], | |
| "credit_score":["creditscore","score"], | |
| "utilization":["util","credit_utilization","utilization_ratio"], | |
| "dti":["debt_to_income","debt_to_income_ratio"], | |
| "recent_defaults":["defaults","recentdefaults"], | |
| "income":["annual_income","salary"] | |
| } | |
| def prepare_credit(df: pd.DataFrame): | |
| return _prepare_generic(df, CR_EXPECTED) | |
| def detect_credit(clean_df: pd.DataFrame, colmap: Dict[str,str]): | |
| needed = ["credit_score","utilization","dti","recent_defaults","income"] | |
| if not any(k in colmap for k in needed): | |
| return pd.DataFrame(), "Required columns missing for Credit Risk." | |
| df = clean_df.copy() | |
| cs = df[colmap.get("credit_score","credit_score")] if "credit_score" in colmap else pd.Series([np.nan]*len(df)) | |
| util= df[colmap.get("utilization","utilization")] if "utilization" in colmap else pd.Series([np.nan]*len(df)) | |
| dti = df[colmap.get("dti","dti")] if "dti" in colmap else pd.Series([np.nan]*len(df)) | |
| rde = df[colmap.get("recent_defaults","recent_defaults")] if "recent_defaults" in colmap else pd.Series([np.nan]*len(df)) | |
| inc = df[colmap.get("income","income")] if "income" in colmap else pd.Series([np.nan]*len(df)) | |
| out=[] | |
| for i in range(len(df)): | |
| hits=0; reasons=[] | |
| if pd.notna(cs.iloc[i]) and cs.iloc[i] < 600: hits+=1; reasons.append("credit_score<600") | |
| if pd.notna(util.iloc[i]) and util.iloc[i] > 0.8: hits+=1; reasons.append("utilization>0.8") | |
| if pd.notna(dti.iloc[i]) and dti.iloc[i] > 0.4: hits+=1; reasons.append("DTI>0.4") | |
| if pd.notna(rde.iloc[i]) and rde.iloc[i] > 0: hits+=1; reasons.append("recent_defaults>0") | |
| if pd.notna(inc.iloc[i]) and inc.iloc[i] < 30000: hits+=1; reasons.append("income<30000") | |
| level = "High" if hits>=3 else ("Medium" if hits==2 else ("Low" if hits==1 else "None")) | |
| out.append((hits, level, ", ".join(reasons))) | |
| res = df.assign( | |
| risk_score=[x[0] for x in out], | |
| risk_level=[x[1] for x in out], | |
| risk_reason=[x[2] for x in out] | |
| ) | |
| flagged = res[res["risk_level"].isin(["High","Medium","Low"]) & (res["risk_level"]!="None")] | |
| stats = f"Credit Risk flagged: {len(flagged)} of {len(df)}. Distribution: High={(res['risk_level']=='High').sum()}, Medium={(res['risk_level']=='Medium').sum()}, Low={(res['risk_level']=='Low').sum()}." | |
| return flagged, stats |