.. only:: html
.. note::
:class: sphx-glr-download-link-note
Click :ref:`here ` to download the full example code
.. rst-class:: sphx-glr-example-title
.. _sphx_glr_auto_examples_plot_model_selection1.py:
==========================
Hyperparameter Selection 1
==========================
This example demonstrates how to do model selection in a feature representation pipeline using a grid search
.. image:: /auto_examples/images/sphx_glr_plot_model_selection1_001.png
:class: sphx-glr-single-img
.. rst-class:: sphx-glr-script-out
Out:
.. code-block:: none
/home/david/Code/seglearn/examples/plot_model_selection1.py:72: UserWarning: Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.
plt.show()
|
.. code-block:: default
# Author: David Burns
# License: BSD
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, GroupKFold
from sklearn.preprocessing import StandardScaler
import seglearn as sgl
def plot_grid_search(cv_results, grid_param_1, grid_param_2, name_param_1, name_param_2):
# plotting grid results from David Alvarez on Stack Overflow
# Get Test Scores Mean and std for each grid search
scores_mean = cv_results['mean_test_score']
scores_mean = np.array(scores_mean).reshape(len(grid_param_2), len(grid_param_1))
scores_sd = cv_results['std_test_score']
scores_sd = np.array(scores_sd).reshape(len(grid_param_2), len(grid_param_1))
# Plot Grid search scores
_, ax = plt.subplots(1, 1)
# Param1 is the X-axis, Param 2 is represented as a different curve (color line)
for idx, val in enumerate(grid_param_2):
ax.plot(grid_param_1, scores_mean[idx, :], '-o', label=name_param_2 + ': ' + str(val))
ax.set_title("Grid Search Scores", fontsize=20, fontweight='bold')
ax.set_xlabel(name_param_1, fontsize=16)
ax.set_ylabel('CV Average Score', fontsize=16)
ax.legend(loc="best", fontsize=15)
ax.grid(True)
# load the data
data = sgl.load_watch()
X = data['X']
y = data['y']
g = data['subject']
# use subject id to group folds
splitter = GroupKFold(n_splits=3)
cv = splitter.split(X, y, groups=g)
# create a feature representation pipeline
pipe = sgl.Pype([('seg', sgl.Segment()),
('features', sgl.FeatureRep()),
('scaler', StandardScaler()),
('rf', RandomForestClassifier())])
# create a parameter dictionary using the sklearn API
# note that if you want to set a parameter to a single value, it will still need to be as a list
par_grid = {'seg__width': [50, 100, 200],
'seg__overlap': [0., 0.5],
'rf__n_estimators': [20]}
clf = GridSearchCV(pipe, par_grid, cv=cv)
clf.fit(X, y)
plot_grid_search(clf.cv_results_, par_grid['seg__width'],
par_grid['seg__overlap'], 'width', 'overlap')
plt.show()
.. rst-class:: sphx-glr-timing
**Total running time of the script:** ( 0 minutes 8.050 seconds)
.. _sphx_glr_download_auto_examples_plot_model_selection1.py:
.. only :: html
.. container:: sphx-glr-footer
:class: sphx-glr-footer-example
.. container:: sphx-glr-download sphx-glr-download-python
:download:`Download Python source code: plot_model_selection1.py `
.. container:: sphx-glr-download sphx-glr-download-jupyter
:download:`Download Jupyter notebook: plot_model_selection1.ipynb `
.. only:: html
.. rst-class:: sphx-glr-signature
`Gallery generated by Sphinx-Gallery `_