plot_evalplot_script with examples#
An example showing the evalplot
function
used by a scikit-learn regressor.
9 # Authors: The scikit-plots developers
10 # SPDX-License-Identifier: BSD-3-Clause
Import scikit-plot
14 import scikitplot.snsx as sp
18 import matplotlib.pyplot as plt
19 import numpy as np; np.random.seed(0) # reproducibility
20 import pandas as pd
21
22 from sklearn.datasets import make_classification
23 from sklearn.datasets import (
24 load_breast_cancer as data_2_classes,
25 load_iris as data_3_classes,
26 load_digits as data_10_classes,
27 )
28 from sklearn.linear_model import LogisticRegression
29 from sklearn.model_selection import train_test_split
30
31 def logistic_scale(scores):
32 """Scale decision_function outputs to (0,1) using the logistic (sigmoid) function."""
33 scores = np.asarray(scores, dtype=float)
34 # Clip to avoid overflow for large |x| before exp
35 # scores = np.clip(scores, -500, 500)
36 return 1.0 / (1.0 + np.exp(-scores))
37
38 def minmax_scale(scores):
39 """Linearly scale an array to [0,1]."""
40 scores = np.asarray(scores, dtype=float)
41 min_, max_ = np.min(scores), np.max(scores)
42 if np.isclose(min_, max_):
43 # Avoid divide-by-zero when all values identical
44 return np.zeros_like(scores)
45 return (scores - min_) / (max_ - min_)
Load the data X, y = data_3_classes(return_X_y=True, as_frame=False) X, y = data_2_classes(return_X_y=True, as_frame=False)
53 # Generate a sample dataset
54 X, y = make_classification(n_samples=5000, n_features=20, n_informative=15,
55 n_redundant=2, n_classes=2, n_repeated=0,
56 class_sep=1.5, flip_y=0.01, weights=[0.85, 0.15],
57 random_state=0)
60 X_train, X_val, y_train, y_val = train_test_split(
61 X, y, stratify=y, test_size=0.2, random_state=0
62 )
63 np.unique(y)
array([0, 1])
Create an instance of the LogisticRegression
67 model = (
68 LogisticRegression(
69 # max_iter=int(1e5),
70 # C=10,
71 # penalty='l1',
72 # solver='liblinear',
73 class_weight='balanced',
74 random_state=0
75 )
76 .fit(X_train, y_train)
77 )
78 # Perform predictions
79 y_val_prob = model.predict_proba(X_val)
80 # Create a DataFrame with predictions
81 df = pd.DataFrame({
82 "y_true": y_val==1, # target class (0,1,2)
83 "y_score": y_val_prob[:, 1], # target class (0,1,2)
84 # np.argmax
85 "y_pred": y_val_prob[:, 1] > 0.5, # target class (0,1,2)
86 # "y_true": np.random.normal(0.5, 0.1, 100).round(),
87 # "y_score": np.random.normal(0.5, 0.15, 100),
88 # "hue": np.random.normal(0.5, 0.4, 100).round(),
89 })
90 df
95 p = sp.evalplot(
96 df,
97 x="y_true",
98 y="y_pred",
99 # y="y_score",
100 # allow_probs=True, # if y_score provided
101 # threshold=0.5,
102 kind="all",
103 )

107 p = sp.evalplot(
108 df,
109 x="y_true",
110 y="y_pred",
111 kind="classification_report",
112 text_kws={'fontsize': 16},
113 )

116 p = sp.evalplot(
117 df,
118 x="y_true",
119 y="y_pred",
120 kind="confusion_matrix",
121 )

fig, ax = plt.subplots(figsize=(8, 6))
125 p = sp.evalplot(
126 df,
127 x="y_true",
128 # y="y_pred",
129 y="y_score",
130 allow_probs=True, # if y_score provided
131 threshold=0.5,
132 kind="all",
133 )

136 import numpy as np
137 import matplotlib.pyplot as plt
138 from sklearn.datasets import make_classification
139 from sklearn.datasets import (
140 load_breast_cancer as data_2_classes,
141 load_iris as data_3_classes,
142 load_digits as data_10_classes,
143 )
144 from sklearn.ensemble import RandomForestClassifier
145 from sklearn.model_selection import train_test_split
146 from sklearn.metrics import classification_report, confusion_matrix
Load the data X, y = data_3_classes(return_X_y=True, as_frame=False) X, y = data_2_classes(return_X_y=True, as_frame=False)
154 # Generate a sample dataset
155 X, y = make_classification(n_samples=5000, n_features=20, n_informative=15,
156 n_redundant=2, n_classes=2, n_repeated=0,
157 class_sep=1.5, flip_y=0.01, weights=[0.97, 0.03],
158 random_state=0)
161 X_train, X_val, y_train, y_val = train_test_split(
162 X, y, stratify=y, test_size=0.2, random_state=0,
163 )
164 np.unique(y)
array([0, 1])
Initialize the Random Forest Classifier
168 rf_model = RandomForestClassifier(
169 class_weight='balanced',
170 n_estimators=100,
171 max_depth=6,
172 random_state=0,
173 )
174
175 # Train the model
176 rf_model.fit(X_train, y_train)
Make predictions on the test set
180 y_val_pred = rf_model.predict(X_val)
181 y_val_prob = rf_model.predict_proba(X_val)[:, 1]
fig, ax = plt.subplots(figsize=(8, 8))
185 p = sp.evalplot(
186 x=y_val,
187 y=y_val_pred,
188 kind="all",
189 )

fig, ax = plt.subplots(figsize=(8, 8))
193 p = sp.evalplot(
194 x=y_val,
195 # y=y_pred,
196 y=y_val_prob,
197 allow_probs=True, # if y_score provided
198 threshold=0.5,
199 kind="all",
200 )

Generate a classification report
204 print(classification_report(y_val, y_val_pred))
205
206 # Generate a confusion matrix
207 conf_matrix = confusion_matrix(y_val, y_val_pred)
208 print(conf_matrix)
precision recall f1-score support
0 0.97 1.00 0.99 966
1 0.73 0.24 0.36 34
accuracy 0.97 1000
macro avg 0.85 0.62 0.67 1000
weighted avg 0.97 0.97 0.96 1000
[[963 3]
[ 26 8]]
import seaborn as sns
213 # plt.figure(figsize=(12, 7))
214 # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
215 # xticklabels=np.arange(15), yticklabels=np.arange(15))
216 # plt.ylabel('Actual')
217 # plt.xlabel('Predicted')
218 # plt.title('Confusion Matrix')
219 # plt.show()
Total running time of the script: (0 minutes 1.437 seconds)
Related examples