.. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_plot_model_selection1.py: ========================== Hyperparameter Selection 1 ========================== This example demonstrates how to do model selection in a feature representation pipeline using a grid search .. image:: /auto_examples/images/sphx_glr_plot_model_selection1_001.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-script-out Out: .. code-block:: none /home/david/Code/seglearn/examples/plot_model_selection1.py:72: UserWarning: Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure. plt.show() | .. code-block:: default # Author: David Burns # License: BSD import matplotlib.pyplot as plt import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV, GroupKFold from sklearn.preprocessing import StandardScaler import seglearn as sgl def plot_grid_search(cv_results, grid_param_1, grid_param_2, name_param_1, name_param_2): # plotting grid results from David Alvarez on Stack Overflow # Get Test Scores Mean and std for each grid search scores_mean = cv_results['mean_test_score'] scores_mean = np.array(scores_mean).reshape(len(grid_param_2), len(grid_param_1)) scores_sd = cv_results['std_test_score'] scores_sd = np.array(scores_sd).reshape(len(grid_param_2), len(grid_param_1)) # Plot Grid search scores _, ax = plt.subplots(1, 1) # Param1 is the X-axis, Param 2 is represented as a different curve (color line) for idx, val in enumerate(grid_param_2): ax.plot(grid_param_1, scores_mean[idx, :], '-o', label=name_param_2 + ': ' + str(val)) ax.set_title("Grid Search Scores", fontsize=20, fontweight='bold') ax.set_xlabel(name_param_1, fontsize=16) ax.set_ylabel('CV Average Score', fontsize=16) ax.legend(loc="best", fontsize=15) ax.grid(True) # load the data data = sgl.load_watch() X = data['X'] y = data['y'] g = data['subject'] # use subject id to group folds splitter = GroupKFold(n_splits=3) cv = splitter.split(X, y, groups=g) # create a feature representation pipeline pipe = sgl.Pype([('seg', sgl.Segment()), ('features', sgl.FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier())]) # create a parameter dictionary using the sklearn API # note that if you want to set a parameter to a single value, it will still need to be as a list par_grid = {'seg__width': [50, 100, 200], 'seg__overlap': [0., 0.5], 'rf__n_estimators': [20]} clf = GridSearchCV(pipe, par_grid, cv=cv) clf.fit(X, y) plot_grid_search(clf.cv_results_, par_grid['seg__width'], par_grid['seg__overlap'], 'width', 'overlap') plt.show() .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 8.050 seconds) .. _sphx_glr_download_auto_examples_plot_model_selection1.py: .. only :: html .. container:: sphx-glr-footer :class: sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_model_selection1.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_model_selection1.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_