plot_evalplot_script with examples#

An example showing the evalplot function used by a scikit-learn regressor.

 # Authors: The scikit-plots developers
 # SPDX-License-Identifier: BSD-3-Clause

Import scikit-plot

 import scikitplot.snsx as sp

 import matplotlib.pyplot as plt
 import numpy as np; np.random.seed(0)  # reproducibility
 import pandas as pd

 from sklearn.datasets import (
     load_breast_cancer as data_2_classes,
     # load_iris as data_3_classes,
 )
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split

Load the data X, y = data_3_classes(return_X_y=True, as_frame=False)

 X, y = data_2_classes(return_X_y=True, as_frame=False)
 X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=0)
 np.unique(y)

array([0, 1])

Create an instance of the LogisticRegression

 model = (
     LogisticRegression(max_iter=int(1e5), random_state=0)
     .fit(X_train, y_train)
 )
 # Perform predictions
 y_val_prob = model.predict_proba(X_val)
 # Create a DataFrame with predictions
 df = pd.DataFrame({
     "y_true": y_val==1,  # target class (0,1,2)
     "y_score": y_val_prob[:, 1],  # target class (0,1,2)
     # "y_true": np.random.normal(0.5, 0.1, 100).round(),
     # "y_score": np.random.normal(0.5, 0.15, 100),
     # "hue": np.random.normal(0.5, 0.4, 100).round(),
 })

 p = sp.evalplot(
     df,
     x="y_true",
     y="y_score",
     kind="all",
 )

 p = sp.evalplot(
     df,
     x="y_true",
     y="y_score",
     kind="classification_report",
     text_kws={'fontsize': 16},
 )

 p = sp.evalplot(
     df,
     x="y_true",
     y="y_score",
     kind="confusion_matrix",
 )

 fig, ax = plt.subplots(figsize=(8, 6))
 p = sp.evalplot(
     df,
     x="y_true",
     y="y_score",
     kind="all",
     # legend=True,
 )

 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import classification_report, confusion_matrix

Create a synthetic dataset with 15 classes

 X, y = make_classification(n_samples=5000, n_features=20,
                            n_classes=15, n_informative=15,
                            n_redundant=5, random_state=0)

 # Convert to DataFrame for easier visualization (optional)
 data = pd.DataFrame(X)
 data['target'] = y

 print(data.head())

          0         1         2  ...        18         19  target
-3.249907 -7.609039  3.904290  ...  0.613949  -3.092426       7
-1.653727  3.553595 -0.281940  ... -2.332832  -4.067393       3
-1.057158  7.627822  0.181669  ...  0.713138   6.743130      11
0.734427 -2.012334 -1.798327  ... -4.232499   1.357491      11
6.516446 -8.325342 -1.350152  ...  1.410434  13.547467       1

[5 rows x 21 columns]

Split the dataset into training and testing sets

 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Initialize the Random Forest Classifier

 rf_model = RandomForestClassifier(n_estimators=100, random_state=0)

 # Train the model
 rf_model.fit(X_train, y_train)

RandomForestClassifier(random_state=0)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Make predictions on the test set

 y_pred = rf_model.predict(X_test)

 fig, ax = plt.subplots(figsize=(8, 8))
 p = sp.evalplot(
     x=y_test,
     y=y_pred,
     kind="all",
     # legend=True,
 )

 import scikitplot as sp

 # Save the combined figure as an image file
 figs = sp.stack(  # experimental
     p.figure,
     p.figure,
     orient='x',
     **{'figsize': (12, 8)}
 )

Generate a classification report

 print(classification_report(y_test, y_pred))

 # Generate a confusion matrix
 conf_matrix = confusion_matrix(y_test, y_pred)
 print(conf_matrix)

              precision    recall  f1-score   support

           0       0.48      0.48      0.48        64
           1       0.44      0.48      0.46        61
           2       0.56      0.52      0.54        65
           3       0.55      0.67      0.60        61
           4       0.53      0.62      0.57        76
           5       0.64      0.45      0.53        80
           6       0.64      0.62      0.63        72
           7       0.57      0.66      0.61        56
           8       0.62      0.62      0.62        65
           9       0.54      0.57      0.56        68
          10       0.67      0.60      0.63        70
          11       0.54      0.55      0.54        69
          12       0.63      0.53      0.58        77
          13       0.56      0.45      0.50        67
          14       0.41      0.53      0.46        49

    accuracy                           0.56      1000
   macro avg       0.56      0.56      0.55      1000
weighted avg       0.56      0.56      0.56      1000

[[31  2  3  3  2  3  3  2  2  4  1  3  0  1  4]
 [ 5 29  3  3  1  2  2  2  2  2  1  3  2  3  1]
 [ 0  2 34  3  3  1  0  3  2  1  3  1  5  2  5]
 [ 3  1  1 41  1  1  0  1  2  1  0  2  1  2  4]
 [ 5  4  1  3 47  1  1  2  2  2  1  2  3  2  0]
 [ 3  5  1  3  7 36  0  6  2  5  1  9  0  1  1]
 [ 0  1  2  3  7  1 45  2  0  1  0  4  2  1  3]
 [ 1  0  1  2  3  3  3 37  1  1  0  0  1  3  0]
 [ 6  3  0  3  1  1  2  1 40  4  0  2  1  0  1]
 [ 1  1  1  4  3  2  3  0  1 39  4  1  0  1  7]
 [ 1  1  7  1  4  0  3  0  1  2 42  1  3  2  2]
 [ 1  4  3  0  2  2  4  0  2  5  1 38  3  1  3]
 [ 1  5  1  1  3  2  1  2  2  2  6  5 41  3  2]
 [ 5  4  3  0  3  1  2  5  4  3  2  0  0 30  5]
 [ 2  4  0  5  2  0  1  2  1  0  1  0  3  2 26]]

import seaborn as sns

 # plt.figure(figsize=(12, 7))
 # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
 #             xticklabels=np.arange(15), yticklabels=np.arange(15))
 # plt.ylabel('Actual')
 # plt.xlabel('Predicted')
 # plt.title('Confusion Matrix')
 # plt.show()

Tags: model-type: classification model-workflow: model evaluation plot-type: line plot-type: eval level: beginner purpose: showcase

Total running time of the script: (0 minutes 3.563 seconds)

Related examples

plot_decileplot_script with examples

plot_aucplot_script with examples

plot_cumulative_gain with examples

plot_lift with examples

Gallery generated by Sphinx-Gallery

	n_estimators	100
	criterion	'gini'
	max_depth	None
	min_samples_split	2
	min_samples_leaf	1
	min_weight_fraction_leaf	0.0
	max_features	'sqrt'
	max_leaf_nodes	None
	min_impurity_decrease	0.0
	bootstrap	True
	oob_score	False
	n_jobs	None
	random_state	0
	verbose	0
	warm_start	False
	class_weight	None
	ccp_alpha	0.0
	max_samples	None
	monotonic_cst	None

plot_evalplot_script with examples#

This Page