plot_evalplot_script with examples#

An example showing the evalplot function used by a scikit-learn regressor.

 9 # Authors: The scikit-plots developers
10 # SPDX-License-Identifier: BSD-3-Clause

Import scikit-plot

14 import scikitplot.snsx as sp
18 import matplotlib.pyplot as plt
19 import numpy as np; np.random.seed(0)  # reproducibility
20 import pandas as pd
21
22 from sklearn.datasets import make_classification
23 from sklearn.datasets import (
24     load_breast_cancer as data_2_classes,
25     load_iris as data_3_classes,
26     load_digits as data_10_classes,
27 )
28 from sklearn.linear_model import LogisticRegression
29 from sklearn.model_selection import train_test_split
30
31 def logistic_scale(scores):
32     """Scale decision_function outputs to (0,1) using the logistic (sigmoid) function."""
33     scores = np.asarray(scores, dtype=float)
34     # Clip to avoid overflow for large |x| before exp
35     # scores = np.clip(scores, -500, 500)
36     return 1.0 / (1.0 + np.exp(-scores))
37
38 def minmax_scale(scores):
39     """Linearly scale an array to [0,1]."""
40     scores = np.asarray(scores, dtype=float)
41     min_, max_ = np.min(scores), np.max(scores)
42     if np.isclose(min_, max_):
43         # Avoid divide-by-zero when all values identical
44         return np.zeros_like(scores)
45     return (scores - min_) / (max_ - min_)

Load the data X, y = data_3_classes(return_X_y=True, as_frame=False) X, y = data_2_classes(return_X_y=True, as_frame=False)

53 # Generate a sample dataset
54 X, y = make_classification(n_samples=5000, n_features=20, n_informative=15,
55                           n_redundant=2, n_classes=2, n_repeated=0,
56                           class_sep=1.5, flip_y=0.01, weights=[0.85, 0.15],
57                           random_state=0)
60 X_train, X_val, y_train, y_val = train_test_split(
61     X, y, stratify=y, test_size=0.2, random_state=0
62 )
63 np.unique(y)
array([0, 1])

Create an instance of the LogisticRegression

67 model = (
68     LogisticRegression(
69         # max_iter=int(1e5),
70         # C=10,
71         # penalty='l1',
72         # solver='liblinear',
73         class_weight='balanced',
74         random_state=0
75     )
76     .fit(X_train, y_train)
77 )
78 # Perform predictions
79 y_val_prob = model.predict_proba(X_val)
80 # Create a DataFrame with predictions
81 df = pd.DataFrame({
82     "y_true": y_val==1,  # target class (0,1,2)
83     "y_score": y_val_prob[:, 1],  # target class (0,1,2)
84     # np.argmax
85     "y_pred": y_val_prob[:, 1] > 0.5,  # target class (0,1,2)
86     # "y_true": np.random.normal(0.5, 0.1, 100).round(),
87     # "y_score": np.random.normal(0.5, 0.15, 100),
88     # "hue": np.random.normal(0.5, 0.4, 100).round(),
89 })
90 df
y_true y_score y_pred
0 False 0.033725 False
1 True 0.860583 True
2 False 0.423101 False
3 False 0.137295 False
4 False 0.788645 True
... ... ... ...
995 False 0.228034 False
996 False 0.017187 False
997 True 0.987892 True
998 False 0.931136 True
999 False 0.128248 False

1000 rows × 3 columns



 95 p = sp.evalplot(
 96     df,
 97     x="y_true",
 98     y="y_pred",
 99     # y="y_score",
100     # allow_probs=True,  # if y_score provided
101     # threshold=0.5,
102     kind="all",
103 )
Classification Report, Confusion Matrix
107 p = sp.evalplot(
108     df,
109     x="y_true",
110     y="y_pred",
111     kind="classification_report",
112     text_kws={'fontsize': 16},
113 )
Classification Report
116 p = sp.evalplot(
117     df,
118     x="y_true",
119     y="y_pred",
120     kind="confusion_matrix",
121 )
Confusion Matrix

fig, ax = plt.subplots(figsize=(8, 6))

125 p = sp.evalplot(
126     df,
127     x="y_true",
128     # y="y_pred",
129     y="y_score",
130     allow_probs=True,  # if y_score provided
131     threshold=0.5,
132     kind="all",
133 )
Classification Report, Confusion Matrix
136 import numpy as np
137 import matplotlib.pyplot as plt
138 from sklearn.datasets import make_classification
139 from sklearn.datasets import (
140     load_breast_cancer as data_2_classes,
141     load_iris as data_3_classes,
142     load_digits as data_10_classes,
143 )
144 from sklearn.ensemble import RandomForestClassifier
145 from sklearn.model_selection import train_test_split
146 from sklearn.metrics import classification_report, confusion_matrix

Load the data X, y = data_3_classes(return_X_y=True, as_frame=False) X, y = data_2_classes(return_X_y=True, as_frame=False)

154 # Generate a sample dataset
155 X, y = make_classification(n_samples=5000, n_features=20, n_informative=15,
156                           n_redundant=2, n_classes=2, n_repeated=0,
157                           class_sep=1.5, flip_y=0.01, weights=[0.97, 0.03],
158                           random_state=0)
161 X_train, X_val, y_train, y_val = train_test_split(
162     X, y, stratify=y, test_size=0.2, random_state=0,
163 )
164 np.unique(y)
array([0, 1])

Initialize the Random Forest Classifier

168 rf_model = RandomForestClassifier(
169     class_weight='balanced',
170     n_estimators=100,
171     max_depth=6,
172     random_state=0,
173 )
174
175 # Train the model
176 rf_model.fit(X_train, y_train)
RandomForestClassifier(class_weight='balanced', max_depth=6, random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Make predictions on the test set

180 y_val_pred = rf_model.predict(X_val)
181 y_val_prob = rf_model.predict_proba(X_val)[:, 1]

fig, ax = plt.subplots(figsize=(8, 8))

185 p = sp.evalplot(
186     x=y_val,
187     y=y_val_pred,
188     kind="all",
189 )
Classification Report, Confusion Matrix

fig, ax = plt.subplots(figsize=(8, 8))

193 p = sp.evalplot(
194     x=y_val,
195     # y=y_pred,
196     y=y_val_prob,
197     allow_probs=True,  # if y_score provided
198     threshold=0.5,
199     kind="all",
200 )
Classification Report, Confusion Matrix

Generate a classification report

204 print(classification_report(y_val, y_val_pred))
205
206 # Generate a confusion matrix
207 conf_matrix = confusion_matrix(y_val, y_val_pred)
208 print(conf_matrix)
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       966
           1       0.73      0.24      0.36        34

    accuracy                           0.97      1000
   macro avg       0.85      0.62      0.67      1000
weighted avg       0.97      0.97      0.96      1000

[[963   3]
 [ 26   8]]

import seaborn as sns

213 # plt.figure(figsize=(12, 7))
214 # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
215 #             xticklabels=np.arange(15), yticklabels=np.arange(15))
216 # plt.ylabel('Actual')
217 # plt.xlabel('Predicted')
218 # plt.title('Confusion Matrix')
219 # plt.show()

Tags: model-type: classification model-workflow: model evaluation plot-type: line plot-type: eval level: beginner purpose: showcase

Total running time of the script: (0 minutes 1.437 seconds)

Related examples

plot_decileplot_script with examples

plot_decileplot_script with examples

plot_aucplot_script with examples

plot_aucplot_script with examples

plot_cumulative_gain with examples

plot_cumulative_gain with examples

plot_lift with examples

plot_lift with examples

Gallery generated by Sphinx-Gallery