Source code for seglearn.feature_functions

"""
This module has functions or callable objects that can be used to compute features from segmented
time series data

Sets of these functions or callables can be passed in a dictionary object to initialize the
``FeatureRep`` transformer.

All functions follow the same template and process a single segmented time series instance:

    >>> def compute_feature(X):
    >>>     F = np.mean(X, axis = 1)
    >>>     return F
    X : array-like shape [n_segments, segment_width, n_variables]
    F : array-like [n_segments, n_features]
    The number of features returned (n_features) must be >= 1

    .. note:: ``np.atleast_3d`` is used if accessing the third dimension, as some datasets will
    have only a single time series variable. See ``hist4`` as an example.

See hist for an example of a callable object

Examples
--------
>>> from seglearn.feature_functions import all_features
>>> from seglearn.transform import FeatureRep
>>> FeatureTransform = FeatureRep(features=all_features())

"""

# Author: David Burns
# License: BSD

import numpy as np
from scipy import stats


[docs]def base_features():
    """ Returns dictionary of some basic features that can be calculated for segmented time
    series data """
    features = {'mean': mean,
                'median': median,
                'abs_energy': abs_energy,
                'std': std,
                'var': var,
                'min': minimum,
                'max': maximum,
                'skew': skew,
                'kurt': kurt,
                'mse': mse,
                'mnx': mean_crossings}
    return features


[docs]def all_features():
    """ Returns dictionary of all features in the module

    .. note:: Some of the features (hist4, corr) are relatively expensive to compute
    """
    features = {'mean': mean,
                'median': median,
                'gmean': gmean,
                'hmean': hmean,
                'vec_sum': vec_sum,
                'abs_sum': abs_sum,
                'abs_energy': abs_energy,
                'std': std,
                'var': var,
                'mad': median_absolute_deviation,
                'variation': variation,
                'min': minimum,
                'max': maximum,
                'skew': skew,
                'kurt': kurt,
                'mean_diff': mean_diff,
                'mean_abs_diff': means_abs_diff,
                'mse': mse,
                'mnx': mean_crossings,
                'hist4': hist(),
                'corr': corr2,
                'mean_abs_value': mean_abs,
                'zero_crossings': zero_crossing(),
                'slope_sign_changes': slope_sign_changes(),
                'waveform_length': waveform_length,
                'emg_var': emg_var,
                'root_mean_square': root_mean_square,
                'willison_amplitude': willison_amplitude()}
    return features


[docs]def hudgins_features(threshold=0):
    """Return a dict of Hudgin's time domain features used for EMG time series classification."""
    return {
        'mean_abs_value': mean_abs,
        'zero_crossings': zero_crossing(threshold),
        'slope_sign_changes': slope_sign_changes(threshold),
        'waveform_length': waveform_length,
    }


[docs]def emg_features(threshold=0):
    """Return a dictionary of popular features used for EMG time series classification."""
    return {
        'mean_abs_value': mean_abs,
        'zero_crossings': zero_crossing(threshold),
        'slope_sign_changes': slope_sign_changes(threshold),
        'waveform_length': waveform_length,
        'integrated_emg': abs_sum,
        'emg_var': emg_var,
        'simple square integral': abs_energy,
        'root_mean_square': root_mean_square,
        'willison_amplitude': willison_amplitude(threshold),
    }


[docs]def mean(X):
    """ statistical mean for each variable in a segmented time series """
    return np.mean(X, axis=1)


[docs]def median(X):
    """ statistical median for each variable in a segmented time series """
    return np.median(X, axis=1)


[docs]def gmean(X):
    """ geometric mean for each variable """
    return stats.gmean(X, axis=1)


[docs]def hmean(X):
    """ harmonic mean for each variable """
    return stats.hmean(X, axis=1)


[docs]def vec_sum(X):
    """ vector sum of each variable """
    return np.sum(X, axis=1)


[docs]def abs_sum(X):
    """ sum of absolute values """
    return np.sum(np.abs(X), axis=1)


[docs]def abs_energy(X):
    """ absolute sum of squares for each variable """
    return np.sum(X * X, axis=1)


[docs]def std(X):
    """ statistical standard deviation for each variable in a segmented time series """
    return np.std(X, axis=1)


[docs]def var(X):
    """ statistical variance for each variable in a segmented time series """
    return np.var(X, axis=1)


[docs]def median_absolute_deviation(X):
    """ median absolute deviation for each variable in a segmented time series """
    if hasattr(stats, 'median_abs_deviation'):
        return stats.median_abs_deviation(X, axis=1)
    else:
        return stats.median_absolute_deviation(X, axis=1)


[docs]def variation(X):
    """ coefficient of variation """
    return stats.variation(X, axis=1)


[docs]def minimum(X):
    """ minimum value for each variable in a segmented time series """
    return np.min(X, axis=1)


[docs]def maximum(X):
    """ maximum value for each variable in a segmented time series """
    return np.max(X, axis=1)


[docs]def skew(X):
    """ skewness for each variable in a segmented time series """
    return stats.skew(X, axis=1)


[docs]def kurt(X):
    """ kurtosis for each variable in a segmented time series """
    return stats.kurtosis(X, axis=1)


[docs]def mean_diff(X):
    """ mean temporal derivative """
    return np.mean(np.diff(X, axis=1), axis=1)


[docs]def means_abs_diff(X):
    """ mean absolute temporal derivative """
    return np.mean(np.abs(np.diff(X, axis=1)), axis=1)


[docs]def mse(X):
    """ computes mean spectral energy for each variable in a segmented time series """
    return np.mean(np.square(np.abs(np.fft.fft(X, axis=1))), axis=1)


[docs]def mean_crossings(X):
    """ Computes number of mean crossings for each variable in a segmented time series """
    X = np.atleast_3d(X)
    N = X.shape[0]
    D = X.shape[2]
    mnx = np.zeros((N, D))
    for i in range(D):
        pos = X[:, :, i] > 0
        npos = ~pos
        c = (pos[:, :-1] & npos[:, 1:]) | (npos[:, :-1] & pos[:, 1:])
        mnx[:, i] = np.count_nonzero(c, axis=1)
    return mnx


[docs]class hist(object):
    """ histogram for each variable in a segmented time series

    .. note:: this feature is expensive to compute with the current implementation
    """

    def __init__(self, bins=4):
        if bins < 2:
            raise ValueError("hist requires bins >= 2")
        self.bins = bins

    def __call__(self, X):
        X = np.atleast_3d(X)
        N = X.shape[0]
        D = X.shape[2]
        histogram = np.zeros((N, D * self.bins))
        for i in np.arange(N):
            for j in np.arange(D):
                # for each variable, advance by bins
                histogram[i, (j * self.bins):((j + 1) * self.bins)] = \
                    np.histogram(X[i, :, j], bins=self.bins, density=True)[0]

        return histogram

    def __repr__(self):
        return "%s(bins=%s)" % (self.__class__.__name__, self.bins)


[docs]def corr2(X):
    """ computes correlations between all variable pairs in a segmented time series

    .. note:: this feature is expensive to compute with the current implementation, and cannot be
    used with univariate time series
    """
    X = np.atleast_3d(X)
    N = X.shape[0]
    D = X.shape[2]

    if D == 1:
        return np.zeros(N, dtype=float)

    trii = np.triu_indices(D, k=1)
    DD = len(trii[0])
    r = np.zeros((N, DD))
    for i in np.arange(N):
        rmat = np.corrcoef(X[i])  # get the ith window from each signal, result will be DxD
        r[i] = rmat[trii]
    return r


[docs]def mean_abs(X):
    """ statistical mean of the absolute values for each variable in a segmented time series """
    return np.mean(np.abs(X), axis=1)


[docs]class zero_crossing(object):
    """ number of zero crossings among two consecutive samples above a certain threshold for each
    variable in the segmented time series"""

    def __init__(self, threshold=0):
        self.threshold = threshold

    def __call__(self, X):
        sign = np.heaviside(-1 * X[:, :-1] * X[:, 1:], 0)
        abs_diff = np.abs(np.diff(X, axis=1))
        return np.sum(sign * abs_diff >= self.threshold, axis=1, dtype=X.dtype)

    def __repr__(self):
        return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)


[docs]class slope_sign_changes(object):
    """ number of changes between positive and negative slope among three consecutive samples
    above a certain threshold for each variable in the segmented time series"""

    def __init__(self, threshold=0):
        self.threshold = threshold

    def __call__(self, X):
        change = (X[:, 1:-1] - X[:, :-2]) * (X[:, 1:-1] - X[:, 2:])
        return np.sum(change >= self.threshold, axis=1, dtype=X.dtype)

    def __repr__(self):
        return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)


[docs]def waveform_length(X):
    """ cumulative length of the waveform over a segment for each variable in the segmented time
    series """
    return np.sum(np.abs(np.diff(X, axis=1)), axis=1)


[docs]def root_mean_square(X):
    """ root mean square for each variable in the segmented time series """
    segment_width = X.shape[1]
    return np.sqrt(np.sum(X * X, axis=1) / segment_width)


[docs]def emg_var(X):
    """ variance (assuming a mean of zero) for each variable in the segmented time series
    (equals abs_energy divided by (seg_size - 1)) """
    segment_width = X.shape[1]
    return np.sum(X * X, axis=1) / (segment_width - 1)


[docs]class willison_amplitude(object):
    """ the Willison amplitude for each variable in the segmented time series """

    def __init__(self, threshold=0):
        self.threshold = threshold

    def __call__(self, X):
        segment_size = X.shape[1]
        return np.sum(np.abs(np.diff(X, axis=1)) >= self.threshold, axis=1)

    def __repr__(self):
        return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)