.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download_auto_examples_plot_scoring.py>`     to download the full example code
    .. rst-class:: sphx-glr-example-title

    .. _sphx_glr_auto_examples_plot_scoring.py:


==============================
Scoring Time Series Estimators
==============================

This examples demonstrates some of the caveats / issues when trying to
calculate performance scores for time series estimators.

This pipeline has been designed to evaluate performance using
segments (not series') as instances of the data.


.. code-block:: default

    # Author: David Burns
    # License: BSD


    import itertools

    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import f1_score, confusion_matrix, make_scorer
    from sklearn.model_selection import train_test_split, cross_validate
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler

    from seglearn.datasets import load_watch
    from seglearn.pipe import Pype
    from seglearn.transform import FeatureRep, Segment


CONFUSION PLOT
#############################################


.. code-block:: default


    def plot_confusion_matrix(cm, classes,
                              normalize=True,
                              cmap=plt.cm.Blues):
        """ plots confusion matrix """
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)
        fmt = '.2f' if normalize else 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()


SETUP
#############################################


.. code-block:: default


    # load the data
    data = load_watch()
    X = data['X']
    y = data['y']

    # create a feature representation pipeline
    steps = [('seg', Segment()),
             ('features', FeatureRep()),
             ('scaler', StandardScaler()),
             ('rf', RandomForestClassifier(n_estimators=20))]

    pipe = Pype(steps)

    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


OPTION 1: Use the score SegPipe score method
#############################################


.. code-block:: default


    pipe.fit(X_train, y_train)
    score = pipe.score(X_test, y_test)
    print("Accuracy score: ", score)


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none

    Accuracy score:  0.7556131260794473


OPTION 2: generate true and predicted target values for the segments
#####################################################################


.. code-block:: default


    y_true, y_pred = pipe.transform_predict(X_test, y_test)
    # use any of the sklearn scorers
    f1_macro = f1_score(y_true, y_pred, average='macro')
    print("F1 score: ", f1_macro)

    cm = confusion_matrix(y_true, y_pred)
    plot_confusion_matrix(cm, data['y_labels'])


.. image:: /auto_examples/images/sphx_glr_plot_scoring_001.png
    :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none

    F1 score:  0.7683103625934831


OPTION 3: scoring during model selection
#########################################


.. code-block:: default


    # model selection using the built-in score method for the final estimator
    cv_scores = cross_validate(pipe, X, y, cv=4, return_train_score=True)
    print("CV Scores: ", pd.DataFrame(cv_scores))

    # model selection with scoring functions / dictionaries
    #
    # unfortunately, this is not possible withing the current framework due to how
    # scoring is implemented within the model_selection functions / classes of sklearn
    # running the code below will cause an error, because the model_selection
    # functions / classes do not have access to y_true for the segments
    #
    # >>> scoring = ['accuracy','precision_macro','recall_macro','f1_macro']
    # >>> cv_scores = cross_validate(pipe, X, y, cv = 4, return_train_score=True, scoring=scoring)
    #
    # workarounds for this issue are outlined below


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none

    CV Scores:     fit_time  score_time  test_score  train_score
    0  0.446275    0.076087    0.784972     0.999708
    1  0.502541    0.079094    0.842857     1.000000
    2  0.466788    0.069131    0.774956     1.000000
    3  0.431198    0.071541    0.770619     1.000000


SCORING WORKAROUND 1: USE ANOTHER SCORER FUNCTION
##################################################


.. code-block:: default


    # ``SegPipe`` can be initialized with a scorer callable made with sklearn.metrics.make_scorer
    # this can be used to cross_validate or grid search with any 1 score

    scorer = make_scorer(f1_score, average='macro')
    pipe = Pype(steps, scorer=scorer)
    cv_scores = cross_validate(pipe, X, y, cv=4, return_train_score=True)
    print("CV F1 Scores: ", pd.DataFrame(cv_scores))


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none

    CV F1 Scores:     fit_time  score_time  test_score  train_score
    0  0.431416    0.073237    0.824988          1.0
    1  0.467504    0.068295    0.841483          1.0
    2  0.446201    0.069201    0.791326          1.0
    3  0.435928    0.070309    0.797336          1.0


SCORING WORKAROUND 2: WORK OUTSIDE THE PIPELINE
#################################################


.. code-block:: default


    # If you want to have multiple score computed, the only way is as follows
    #
    # First transform the time series data into segments and then use an sklearn Pipeline
    #
    # The disadvantage of this is that the parameters of the segmentation cannot be
    # optimized with this approach

    segmenter = Segment()
    X_seg, y_seg, _ = segmenter.fit_transform(X, y)

    clf = Pipeline([('features', FeatureRep()),
                    ('scaler', StandardScaler()),
                    ('rf', RandomForestClassifier())])

    scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
    cv_scores = cross_validate(clf, X_seg, y_seg,
                               cv=4, return_train_score=False, scoring=scoring)
    print("CV Scores (workaround): ", pd.DataFrame(cv_scores))


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none

    CV Scores (workaround):     fit_time  score_time  test_accuracy  test_precision_macro  test_recall_macro  test_f1_macro
    0  1.312680    0.081219       0.813675              0.829951           0.827524       0.823218
    1  1.308434    0.079772       0.816082              0.840500           0.834377       0.831517
    2  1.313067    0.081143       0.810950              0.838272           0.820251       0.822046
    3  1.310060    0.090736       0.799829              0.822963           0.818255       0.816566


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  12.474 seconds)


.. _sphx_glr_download_auto_examples_plot_scoring.py:


.. only :: html

 .. container:: sphx-glr-footer
    :class: sphx-glr-footer-example


  .. container:: sphx-glr-download sphx-glr-download-python

     :download:`Download Python source code: plot_scoring.py <plot_scoring.py>`


  .. container:: sphx-glr-download sphx-glr-download-jupyter

     :download:`Download Jupyter notebook: plot_scoring.ipynb <plot_scoring.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_