Source code for seglearn.feature_functions

This module has functions or callable objects that can be used to compute features from segmented
time series data

Sets of these functions or callables can be passed in a dictionary object to initialize the
``FeatureRep`` transformer.

All functions follow the same template and process a single segmented time series instance:

    >>> def compute_feature(X):
    >>>     F = np.mean(X, axis = 1)
    >>>     return F
    X : array-like shape [n_segments, segment_width, n_variables]
    F : array-like [n_segments, n_features]
    The number of features returned (n_features) must be >= 1

    .. note:: ``np.atleast_3d`` is used if accessing the third dimension, as some datasets will
    have only a single time series variable. See ``hist4`` as an example.

See hist for an example of a callable object

>>> from seglearn.feature_functions import all_features
>>> from seglearn.transform import FeatureRep
>>> FeatureTransform = FeatureRep(features=all_features())


# Author: David Burns
# License: BSD

import numpy as np
from scipy import stats

[docs]def base_features(): """ Returns dictionary of some basic features that can be calculated for segmented time series data """ features = {'mean': mean, 'median': median, 'abs_energy': abs_energy, 'std': std, 'var': var, 'min': minimum, 'max': maximum, 'skew': skew, 'kurt': kurt, 'mse': mse, 'mnx': mean_crossings} return features
[docs]def all_features(): """ Returns dictionary of all features in the module .. note:: Some of the features (hist4, corr) are relatively expensive to compute """ features = {'mean': mean, 'median': median, 'gmean': gmean, 'hmean': hmean, 'vec_sum': vec_sum, 'abs_sum': abs_sum, 'abs_energy': abs_energy, 'std': std, 'var': var, 'mad': median_absolute_deviation, 'variation': variation, 'min': minimum, 'max': maximum, 'skew': skew, 'kurt': kurt, 'mean_diff': mean_diff, 'mean_abs_diff': means_abs_diff, 'mse': mse, 'mnx': mean_crossings, 'hist4': hist(), 'corr': corr2, 'mean_abs_value': mean_abs, 'zero_crossings': zero_crossing(), 'slope_sign_changes': slope_sign_changes(), 'waveform_length': waveform_length, 'emg_var': emg_var, 'root_mean_square': root_mean_square, 'willison_amplitude': willison_amplitude()} return features
[docs]def hudgins_features(threshold=0): """Return a dict of Hudgin's time domain features used for EMG time series classification.""" return { 'mean_abs_value': mean_abs, 'zero_crossings': zero_crossing(threshold), 'slope_sign_changes': slope_sign_changes(threshold), 'waveform_length': waveform_length, }
[docs]def emg_features(threshold=0): """Return a dictionary of popular features used for EMG time series classification.""" return { 'mean_abs_value': mean_abs, 'zero_crossings': zero_crossing(threshold), 'slope_sign_changes': slope_sign_changes(threshold), 'waveform_length': waveform_length, 'integrated_emg': abs_sum, 'emg_var': emg_var, 'simple square integral': abs_energy, 'root_mean_square': root_mean_square, 'willison_amplitude': willison_amplitude(threshold), }
[docs]def mean(X): """ statistical mean for each variable in a segmented time series """ return np.mean(X, axis=1)
[docs]def median(X): """ statistical median for each variable in a segmented time series """ return np.median(X, axis=1)
[docs]def gmean(X): """ geometric mean for each variable """ return stats.gmean(X, axis=1)
[docs]def hmean(X): """ harmonic mean for each variable """ return stats.hmean(X, axis=1)
[docs]def vec_sum(X): """ vector sum of each variable """ return np.sum(X, axis=1)
[docs]def abs_sum(X): """ sum of absolute values """ return np.sum(np.abs(X), axis=1)
[docs]def abs_energy(X): """ absolute sum of squares for each variable """ return np.sum(X * X, axis=1)
[docs]def std(X): """ statistical standard deviation for each variable in a segmented time series """ return np.std(X, axis=1)
[docs]def var(X): """ statistical variance for each variable in a segmented time series """ return np.var(X, axis=1)
[docs]def median_absolute_deviation(X): """ median absolute deviation for each variable in a segmented time series """ if hasattr(stats, 'median_abs_deviation'): return stats.median_abs_deviation(X, axis=1) else: return stats.median_absolute_deviation(X, axis=1)
[docs]def variation(X): """ coefficient of variation """ return stats.variation(X, axis=1)
[docs]def minimum(X): """ minimum value for each variable in a segmented time series """ return np.min(X, axis=1)
[docs]def maximum(X): """ maximum value for each variable in a segmented time series """ return np.max(X, axis=1)
[docs]def skew(X): """ skewness for each variable in a segmented time series """ return stats.skew(X, axis=1)
[docs]def kurt(X): """ kurtosis for each variable in a segmented time series """ return stats.kurtosis(X, axis=1)
[docs]def mean_diff(X): """ mean temporal derivative """ return np.mean(np.diff(X, axis=1), axis=1)
[docs]def means_abs_diff(X): """ mean absolute temporal derivative """ return np.mean(np.abs(np.diff(X, axis=1)), axis=1)
[docs]def mse(X): """ computes mean spectral energy for each variable in a segmented time series """ return np.mean(np.square(np.abs(np.fft.fft(X, axis=1))), axis=1)
[docs]def mean_crossings(X): """ Computes number of mean crossings for each variable in a segmented time series """ X = np.atleast_3d(X) N = X.shape[0] D = X.shape[2] mnx = np.zeros((N, D)) for i in range(D): pos = X[:, :, i] > 0 npos = ~pos c = (pos[:, :-1] & npos[:, 1:]) | (npos[:, :-1] & pos[:, 1:]) mnx[:, i] = np.count_nonzero(c, axis=1) return mnx
[docs]class hist(object): """ histogram for each variable in a segmented time series .. note:: this feature is expensive to compute with the current implementation """ def __init__(self, bins=4): if bins < 2: raise ValueError("hist requires bins >= 2") self.bins = bins def __call__(self, X): X = np.atleast_3d(X) N = X.shape[0] D = X.shape[2] histogram = np.zeros((N, D * self.bins)) for i in np.arange(N): for j in np.arange(D): # for each variable, advance by bins histogram[i, (j * self.bins):((j + 1) * self.bins)] = \ np.histogram(X[i, :, j], bins=self.bins, density=True)[0] return histogram def __repr__(self): return "%s(bins=%s)" % (self.__class__.__name__, self.bins)
[docs]def corr2(X): """ computes correlations between all variable pairs in a segmented time series .. note:: this feature is expensive to compute with the current implementation, and cannot be used with univariate time series """ X = np.atleast_3d(X) N = X.shape[0] D = X.shape[2] if D == 1: return np.zeros(N, dtype=float) trii = np.triu_indices(D, k=1) DD = len(trii[0]) r = np.zeros((N, DD)) for i in np.arange(N): rmat = np.corrcoef(X[i]) # get the ith window from each signal, result will be DxD r[i] = rmat[trii] return r
[docs]def mean_abs(X): """ statistical mean of the absolute values for each variable in a segmented time series """ return np.mean(np.abs(X), axis=1)
[docs]class zero_crossing(object): """ number of zero crossings among two consecutive samples above a certain threshold for each variable in the segmented time series""" def __init__(self, threshold=0): self.threshold = threshold def __call__(self, X): sign = np.heaviside(-1 * X[:, :-1] * X[:, 1:], 0) abs_diff = np.abs(np.diff(X, axis=1)) return np.sum(sign * abs_diff >= self.threshold, axis=1, dtype=X.dtype) def __repr__(self): return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)
[docs]class slope_sign_changes(object): """ number of changes between positive and negative slope among three consecutive samples above a certain threshold for each variable in the segmented time series""" def __init__(self, threshold=0): self.threshold = threshold def __call__(self, X): change = (X[:, 1:-1] - X[:, :-2]) * (X[:, 1:-1] - X[:, 2:]) return np.sum(change >= self.threshold, axis=1, dtype=X.dtype) def __repr__(self): return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)
[docs]def waveform_length(X): """ cumulative length of the waveform over a segment for each variable in the segmented time series """ return np.sum(np.abs(np.diff(X, axis=1)), axis=1)
[docs]def root_mean_square(X): """ root mean square for each variable in the segmented time series """ segment_width = X.shape[1] return np.sqrt(np.sum(X * X, axis=1) / segment_width)
[docs]def emg_var(X): """ variance (assuming a mean of zero) for each variable in the segmented time series (equals abs_energy divided by (seg_size - 1)) """ segment_width = X.shape[1] return np.sum(X * X, axis=1) / (segment_width - 1)
[docs]class willison_amplitude(object): """ the Willison amplitude for each variable in the segmented time series """ def __init__(self, threshold=0): self.threshold = threshold def __call__(self, X): segment_size = X.shape[1] return np.sum(np.abs(np.diff(X, axis=1)) >= self.threshold, axis=1) def __repr__(self): return "%s(threshold=%s)" % (self.__class__.__name__, self.threshold)