Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 0fcbaad

Browse files
committedOct 11, 2024·
Added Risk Predection System with PDF generation
1 parent c7cd947 commit 0fcbaad

File tree

6 files changed

+1129
-0
lines changed

6 files changed

+1129
-0
lines changed
 
35.5 KB
Binary file not shown.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pandas as pd
2+
import seaborn as sns
3+
import matplotlib.pyplot as plt
4+
5+
data = pd.read_csv('german_credit_data.csv')
6+
7+
print(data.head())
8+
print(data.info())
9+
print(data.isnull().sum())
10+
11+
sns.countplot(x='Risk', data=data)
12+
plt.title('Distribution of Risk')
13+
plt.show()
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from sklearn.preprocessing import LabelEncoder, StandardScaler
2+
import pandas as pd
3+
4+
data = pd.read_csv('german_credit_data.csv')
5+
6+
label_encoders = {}
7+
for column in data.select_dtypes(include=['object']).columns:
8+
label_encoders[column] = LabelEncoder()
9+
data[column] = label_encoders[column].fit_transform(data[column])
10+
11+
scaler = StandardScaler()
12+
numerical_features = data.select_dtypes(include=['int64', 'float64']).columns
13+
data[numerical_features] = scaler.fit_transform(data[numerical_features])
14+
print(data.head())

‎risk_predection_system/german_credit_data.csv

Lines changed: 1001 additions & 0 deletions
Large diffs are not rendered by default.

‎risk_predection_system/main.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import pandas as pd
2+
import seaborn as sns
3+
import matplotlib.pyplot as plt
4+
from sklearn.preprocessing import LabelEncoder, StandardScaler
5+
from sklearn.model_selection import train_test_split
6+
from sklearn.ensemble import RandomForestClassifier
7+
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
8+
from matplotlib.backends.backend_pdf import PdfPages
9+
10+
data = pd.read_csv('german_credit_data.csv')
11+
12+
label_encoders = {}
13+
for column in data.select_dtypes(include=['object']).columns:
14+
label_encoders[column] = LabelEncoder()
15+
data[column] = label_encoders[column].fit_transform(data[column])
16+
17+
scaler = StandardScaler()
18+
numerical_features = data.select_dtypes(include=['int64', 'float64']).columns
19+
data[numerical_features] = scaler.fit_transform(data[numerical_features])
20+
21+
print(data.head())
22+
print("Unique values in 'Risk' column:", data['Risk'].unique())
23+
24+
25+
X = data.drop('Risk', axis=1)
26+
y = data['Risk']
27+
y = y.astype(int)
28+
29+
30+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
31+
32+
33+
model = RandomForestClassifier(random_state=42)
34+
model.fit(X_train, y_train)
35+
36+
37+
y_pred = model.predict(X_test)
38+
39+
40+
print("Accuracy:", accuracy_score(y_test, y_pred))
41+
print("Classification Report:\n", classification_report(y_test, y_pred))
42+
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
43+
44+
45+
importances = model.feature_importances_
46+
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances})
47+
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
48+
49+
50+
with PdfPages('ML_Model_Report2.pdf') as pdf:
51+
plt.figure(figsize=(10, 6))
52+
sns.countplot(x='Risk', data=data)
53+
plt.title('Distribution of Risk')
54+
pdf.savefig()
55+
plt.close()
56+
plt.figure(figsize=(10, 6))
57+
sns.barplot(x='Importance', y='Feature', data=feature_importance_df)
58+
plt.title('Feature Importance')
59+
pdf.savefig()
60+
plt.close()
61+
62+
fig, ax = plt.subplots(figsize=(10, 6))
63+
ax.text(0.01, 1.25, str('Classification Report:\n'), {'fontsize': 10}, fontproperties='monospace')
64+
ax.text(0.01, 0.05, str(classification_report(y_test, y_pred)), {'fontsize': 10}, fontproperties='monospace')
65+
ax.axis('off')
66+
pdf.savefig()
67+
plt.close()
68+
69+
fig, ax = plt.subplots(figsize=(10, 6))
70+
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", ax=ax)
71+
ax.set_title('Confusion Matrix')
72+
pdf.savefig()
73+
plt.close()
74+
75+
print("PDF report 'ML_Model_Report2.pdf' has been generated successfully.")
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pandas as pd
2+
from sklearn.preprocessing import LabelEncoder
3+
from sklearn.model_selection import train_test_split
4+
from sklearn.ensemble import RandomForestClassifier
5+
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
6+
7+
data = pd.read_csv('german_credit_data.csv')
8+
9+
label_encoders = {}
10+
for column in data.select_dtypes(include=['object']).columns:
11+
label_encoders[column] = LabelEncoder()
12+
data[column] = label_encoders[column].fit_transform(data[column])
13+
14+
X = data.drop('Risk', axis=1)
15+
y = data['Risk']
16+
17+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
18+
19+
model = RandomForestClassifier(random_state=42)
20+
model.fit(X_train, y_train)
21+
22+
y_pred = model.predict(X_test)
23+
24+
print("Accuracy:", accuracy_score(y_test, y_pred))
25+
print("Classification Report:\n", classification_report(y_test, y_pred))
26+
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

0 commit comments

Comments
 (0)
Please sign in to comment.