plot_evalplot_script with examples#

An example showing the evalplot function used by a scikit-learn regressor.

 9 # Authors: The scikit-plots developers
10 # SPDX-License-Identifier: BSD-3-Clause

Import scikit-plot

14 import scikitplot.snsx as sp
18 import matplotlib.pyplot as plt
19 import numpy as np; np.random.seed(0)  # reproducibility
20 import pandas as pd
21
22 from sklearn.datasets import (
23     load_breast_cancer as data_2_classes,
24     # load_iris as data_3_classes,
25 )
26 from sklearn.linear_model import LogisticRegression
27 from sklearn.model_selection import train_test_split

Load the data X, y = data_3_classes(return_X_y=True, as_frame=False)

33 X, y = data_2_classes(return_X_y=True, as_frame=False)
34 X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=0)
35 np.unique(y)
array([0, 1])

Create an instance of the LogisticRegression

39 model = (
40     LogisticRegression(max_iter=int(1e5), random_state=0)
41     .fit(X_train, y_train)
42 )
43 # Perform predictions
44 y_val_prob = model.predict_proba(X_val)
45 # Create a DataFrame with predictions
46 df = pd.DataFrame({
47     "y_true": y_val==1,  # target class (0,1,2)
48     "y_score": y_val_prob[:, 1],  # target class (0,1,2)
49     # "y_true": np.random.normal(0.5, 0.1, 100).round(),
50     # "y_score": np.random.normal(0.5, 0.15, 100),
51     # "hue": np.random.normal(0.5, 0.4, 100).round(),
52 })
57 p = sp.evalplot(
58     df,
59     x="y_true",
60     y="y_score",
61     kind="all",
62 )
Classification Report, Confusion Matrix
66 p = sp.evalplot(
67     df,
68     x="y_true",
69     y="y_score",
70     kind="classification_report",
71     text_kws={'fontsize': 16},
72 )
Classification Report
75 p = sp.evalplot(
76     df,
77     x="y_true",
78     y="y_score",
79     kind="confusion_matrix",
80 )
Confusion Matrix
83 fig, ax = plt.subplots(figsize=(8, 6))
84 p = sp.evalplot(
85     df,
86     x="y_true",
87     y="y_score",
88     kind="all",
89     # legend=True,
90 )
Classification Report, Confusion Matrix
93 import numpy as np
94 import matplotlib.pyplot as plt
95 from sklearn.datasets import make_classification
96 from sklearn.ensemble import RandomForestClassifier
97 from sklearn.model_selection import train_test_split
98 from sklearn.metrics import classification_report, confusion_matrix

Create a synthetic dataset with 15 classes

103 X, y = make_classification(n_samples=5000, n_features=20,
104                            n_classes=15, n_informative=15,
105                            n_redundant=5, random_state=0)
106
107 # Convert to DataFrame for easier visualization (optional)
108 data = pd.DataFrame(X)
109 data['target'] = y
110
111 print(data.head())
          0         1         2  ...        18         19  target
0 -3.249907 -7.609039  3.904290  ...  0.613949  -3.092426       7
1 -1.653727  3.553595 -0.281940  ... -2.332832  -4.067393       3
2 -1.057158  7.627822  0.181669  ...  0.713138   6.743130      11
3  0.734427 -2.012334 -1.798327  ... -4.232499   1.357491      11
4  6.516446 -8.325342 -1.350152  ...  1.410434  13.547467       1

[5 rows x 21 columns]

Split the dataset into training and testing sets

115 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Initialize the Random Forest Classifier

119 rf_model = RandomForestClassifier(n_estimators=100, random_state=0)
120
121 # Train the model
122 rf_model.fit(X_train, y_train)
RandomForestClassifier(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Make predictions on the test set

126 y_pred = rf_model.predict(X_test)
129 fig, ax = plt.subplots(figsize=(8, 8))
130 p = sp.evalplot(
131     x=y_test,
132     y=y_pred,
133     kind="all",
134     # legend=True,
135 )
Classification Report, Confusion Matrix
138 import scikitplot as sp
139
140 # Save the combined figure as an image file
141 figs = sp.stack(  # experimental
142     p.figure,
143     p.figure,
144     orient='x',
145     **{'figsize': (12, 8)}
146 )
plot evalplot script

Generate a classification report

150 print(classification_report(y_test, y_pred))
151
152 # Generate a confusion matrix
153 conf_matrix = confusion_matrix(y_test, y_pred)
154 print(conf_matrix)
              precision    recall  f1-score   support

           0       0.48      0.48      0.48        64
           1       0.44      0.48      0.46        61
           2       0.56      0.52      0.54        65
           3       0.55      0.67      0.60        61
           4       0.53      0.62      0.57        76
           5       0.64      0.45      0.53        80
           6       0.64      0.62      0.63        72
           7       0.57      0.66      0.61        56
           8       0.62      0.62      0.62        65
           9       0.54      0.57      0.56        68
          10       0.67      0.60      0.63        70
          11       0.54      0.55      0.54        69
          12       0.63      0.53      0.58        77
          13       0.56      0.45      0.50        67
          14       0.41      0.53      0.46        49

    accuracy                           0.56      1000
   macro avg       0.56      0.56      0.55      1000
weighted avg       0.56      0.56      0.56      1000

[[31  2  3  3  2  3  3  2  2  4  1  3  0  1  4]
 [ 5 29  3  3  1  2  2  2  2  2  1  3  2  3  1]
 [ 0  2 34  3  3  1  0  3  2  1  3  1  5  2  5]
 [ 3  1  1 41  1  1  0  1  2  1  0  2  1  2  4]
 [ 5  4  1  3 47  1  1  2  2  2  1  2  3  2  0]
 [ 3  5  1  3  7 36  0  6  2  5  1  9  0  1  1]
 [ 0  1  2  3  7  1 45  2  0  1  0  4  2  1  3]
 [ 1  0  1  2  3  3  3 37  1  1  0  0  1  3  0]
 [ 6  3  0  3  1  1  2  1 40  4  0  2  1  0  1]
 [ 1  1  1  4  3  2  3  0  1 39  4  1  0  1  7]
 [ 1  1  7  1  4  0  3  0  1  2 42  1  3  2  2]
 [ 1  4  3  0  2  2  4  0  2  5  1 38  3  1  3]
 [ 1  5  1  1  3  2  1  2  2  2  6  5 41  3  2]
 [ 5  4  3  0  3  1  2  5  4  3  2  0  0 30  5]
 [ 2  4  0  5  2  0  1  2  1  0  1  0  3  2 26]]

import seaborn as sns

159 # plt.figure(figsize=(12, 7))
160 # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
161 #             xticklabels=np.arange(15), yticklabels=np.arange(15))
162 # plt.ylabel('Actual')
163 # plt.xlabel('Predicted')
164 # plt.title('Confusion Matrix')
165 # plt.show()

Tags: model-type: classification model-workflow: model evaluation plot-type: line plot-type: eval level: beginner purpose: showcase

Total running time of the script: (0 minutes 3.563 seconds)

Related examples

plot_decileplot_script with examples

plot_decileplot_script with examples

plot_aucplot_script with examples

plot_aucplot_script with examples

plot_cumulative_gain with examples

plot_cumulative_gain with examples

plot_lift with examples

plot_lift with examples

Gallery generated by Sphinx-Gallery