plot_evalplot_script with examples#
An example showing the evalplot
function
used by a scikit-learn regressor.
9 # Authors: The scikit-plots developers
10 # SPDX-License-Identifier: BSD-3-Clause
Import scikit-plot
14 import scikitplot.snsx as sp
18 import matplotlib.pyplot as plt
19 import numpy as np; np.random.seed(0) # reproducibility
20 import pandas as pd
21
22 from sklearn.datasets import (
23 load_breast_cancer as data_2_classes,
24 # load_iris as data_3_classes,
25 )
26 from sklearn.linear_model import LogisticRegression
27 from sklearn.model_selection import train_test_split
Load the data X, y = data_3_classes(return_X_y=True, as_frame=False)
33 X, y = data_2_classes(return_X_y=True, as_frame=False)
34 X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=0)
35 np.unique(y)
array([0, 1])
Create an instance of the LogisticRegression
39 model = (
40 LogisticRegression(max_iter=int(1e5), random_state=0)
41 .fit(X_train, y_train)
42 )
43 # Perform predictions
44 y_val_prob = model.predict_proba(X_val)
45 # Create a DataFrame with predictions
46 df = pd.DataFrame({
47 "y_true": y_val==1, # target class (0,1,2)
48 "y_score": y_val_prob[:, 1], # target class (0,1,2)
49 # "y_true": np.random.normal(0.5, 0.1, 100).round(),
50 # "y_score": np.random.normal(0.5, 0.15, 100),
51 # "hue": np.random.normal(0.5, 0.4, 100).round(),
52 })
57 p = sp.evalplot(
58 df,
59 x="y_true",
60 y="y_score",
61 kind="all",
62 )

66 p = sp.evalplot(
67 df,
68 x="y_true",
69 y="y_score",
70 kind="classification_report",
71 text_kws={'fontsize': 16},
72 )

75 p = sp.evalplot(
76 df,
77 x="y_true",
78 y="y_score",
79 kind="confusion_matrix",
80 )

83 fig, ax = plt.subplots(figsize=(8, 6))
84 p = sp.evalplot(
85 df,
86 x="y_true",
87 y="y_score",
88 kind="all",
89 # legend=True,
90 )

93 import numpy as np
94 import matplotlib.pyplot as plt
95 from sklearn.datasets import make_classification
96 from sklearn.ensemble import RandomForestClassifier
97 from sklearn.model_selection import train_test_split
98 from sklearn.metrics import classification_report, confusion_matrix
Create a synthetic dataset with 15 classes
103 X, y = make_classification(n_samples=5000, n_features=20,
104 n_classes=15, n_informative=15,
105 n_redundant=5, random_state=0)
106
107 # Convert to DataFrame for easier visualization (optional)
108 data = pd.DataFrame(X)
109 data['target'] = y
110
111 print(data.head())
0 1 2 ... 18 19 target
0 -3.249907 -7.609039 3.904290 ... 0.613949 -3.092426 7
1 -1.653727 3.553595 -0.281940 ... -2.332832 -4.067393 3
2 -1.057158 7.627822 0.181669 ... 0.713138 6.743130 11
3 0.734427 -2.012334 -1.798327 ... -4.232499 1.357491 11
4 6.516446 -8.325342 -1.350152 ... 1.410434 13.547467 1
[5 rows x 21 columns]
Split the dataset into training and testing sets
115 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
Initialize the Random Forest Classifier
119 rf_model = RandomForestClassifier(n_estimators=100, random_state=0)
120
121 # Train the model
122 rf_model.fit(X_train, y_train)
Make predictions on the test set
126 y_pred = rf_model.predict(X_test)
129 fig, ax = plt.subplots(figsize=(8, 8))
130 p = sp.evalplot(
131 x=y_test,
132 y=y_pred,
133 kind="all",
134 # legend=True,
135 )

138 import scikitplot as sp
139
140 # Save the combined figure as an image file
141 figs = sp.stack( # experimental
142 p.figure,
143 p.figure,
144 orient='x',
145 **{'figsize': (12, 8)}
146 )

Generate a classification report
150 print(classification_report(y_test, y_pred))
151
152 # Generate a confusion matrix
153 conf_matrix = confusion_matrix(y_test, y_pred)
154 print(conf_matrix)
precision recall f1-score support
0 0.48 0.48 0.48 64
1 0.44 0.48 0.46 61
2 0.56 0.52 0.54 65
3 0.55 0.67 0.60 61
4 0.53 0.62 0.57 76
5 0.64 0.45 0.53 80
6 0.64 0.62 0.63 72
7 0.57 0.66 0.61 56
8 0.62 0.62 0.62 65
9 0.54 0.57 0.56 68
10 0.67 0.60 0.63 70
11 0.54 0.55 0.54 69
12 0.63 0.53 0.58 77
13 0.56 0.45 0.50 67
14 0.41 0.53 0.46 49
accuracy 0.56 1000
macro avg 0.56 0.56 0.55 1000
weighted avg 0.56 0.56 0.56 1000
[[31 2 3 3 2 3 3 2 2 4 1 3 0 1 4]
[ 5 29 3 3 1 2 2 2 2 2 1 3 2 3 1]
[ 0 2 34 3 3 1 0 3 2 1 3 1 5 2 5]
[ 3 1 1 41 1 1 0 1 2 1 0 2 1 2 4]
[ 5 4 1 3 47 1 1 2 2 2 1 2 3 2 0]
[ 3 5 1 3 7 36 0 6 2 5 1 9 0 1 1]
[ 0 1 2 3 7 1 45 2 0 1 0 4 2 1 3]
[ 1 0 1 2 3 3 3 37 1 1 0 0 1 3 0]
[ 6 3 0 3 1 1 2 1 40 4 0 2 1 0 1]
[ 1 1 1 4 3 2 3 0 1 39 4 1 0 1 7]
[ 1 1 7 1 4 0 3 0 1 2 42 1 3 2 2]
[ 1 4 3 0 2 2 4 0 2 5 1 38 3 1 3]
[ 1 5 1 1 3 2 1 2 2 2 6 5 41 3 2]
[ 5 4 3 0 3 1 2 5 4 3 2 0 0 30 5]
[ 2 4 0 5 2 0 1 2 1 0 1 0 3 2 26]]
import seaborn as sns
159 # plt.figure(figsize=(12, 7))
160 # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
161 # xticklabels=np.arange(15), yticklabels=np.arange(15))
162 # plt.ylabel('Actual')
163 # plt.xlabel('Predicted')
164 # plt.title('Confusion Matrix')
165 # plt.show()
Total running time of the script: (0 minutes 3.563 seconds)
Related examples