Source code for biopsykit.signals.imu.feature_extraction.static_moments

"""Extract features from static moments of IMU data."""
from typing import Optional, Tuple, Union

import numpy as np
import pandas as pd
from biopsykit.utils.array_handling import sanitize_input_nd
from biopsykit.utils.time import tz
from scipy.stats import skew


[docs]def compute_features( data: pd.DataFrame, static_moments: pd.DataFrame, start: Optional[Union[str, pd.Timestamp]] = None, end: Optional[Union[str, pd.Timestamp]] = None, index: Optional[Union[int, str]] = None, timezone: Optional[str] = None, ) -> Optional[pd.DataFrame]: """Compute features based on frequency and duration of static moments in given input signal. This function computes the following features: * ``sm_number``: number of static moments in data * ``sm_max``: maximum duration of static moments, i.e., longest duration * ``sm_max_position``: location of the beginning of the longest static moment in the input data normalized to ``[0, 1]`` where 0 = ``start`` and 1 = ``end`` * ``sm_median``: median duration of static moments * ``sm_mean``: mean duration of static moments * ``sm_std``: standard deviation of static moment durations * ``sm_skewness``: skewness of static moment durations The features are both computed on all detected static moments and on static moments that are longer than 60 seconds (suffix ``_60``). Parameters ---------- data : :class:`~pandas.DataFrame` input data static_moments : :class:`~pandas.DataFrame` dataframe with beginning and end of static moments start : :class:`~pandas.Timestamp` or str, optional start timestamp in input data for feature extraction or ``None`` to set start index to the first index in ``data``. All samples *before* ``start`` will not be used for feature extraction. end : :class:`~pandas.Timestamp` or str, optional end timestamp in input data for feature extraction or ``None`` to set end index to the last index in ``data``. All samples *after* ``end`` will not be used for feature extraction. index : int or str, optional index label of the resulting dataframe or ``None`` to assign a default label (0) timezone : str, optional timezone of the recorded data or ``None`` to use default timezone ("Europe/Berlin") Returns ------- :class:`~pandas.DataFrame` dataframe with extracted static moment features """ if data.empty: return None start, end = _get_start_end(data, start, end, timezone) total_time = end - start static_moments = sanitize_input_nd(static_moments, ncols=2) durations = np.array([static_moment_duration(data, sequence) for sequence in static_moments]) durations_60 = durations[durations >= 60] loc_max_moment = data.index[static_moments[np.argmax(durations)][0]] loc_max_moment_relative = (loc_max_moment - start) / total_time feature_dict = {"sm_max_position": loc_max_moment_relative} # feature_dict['sleep_bouts_number'.format(index)] = len(sleep_bouts) # feature_dict['wake_bouts_number'] = len(wake_bouts) # mean_orientations = mean_orientation(data, static_sequences) # dominant_orientation = mean_orientations.iloc[mean_orientations.index.argmax()] # dict_ori = {'sm_dominant_orientation_{}'.format(x): dominant_orientation.loc['acc_{}'.format(x)] for x # in # ['x', 'y', 'z']} # feature_dict.update(dict_ori) for dur, suffix in zip([durations, durations_60], ["", "_60"]): feature_dict[f"sm_number{suffix}"] = len(dur) feature_dict[f"sm_max{suffix}"] = np.max(dur) feature_dict[f"sm_median{suffix}"] = np.median(dur) feature_dict[f"sm_mean{suffix}"] = np.mean(dur) feature_dict[f"sm_std{suffix}"] = np.std(dur, ddof=1) feature_dict[f"sm_skewness{suffix}"] = skew(dur) if index is None: index = 0 return pd.DataFrame(feature_dict, index=[index])
def _get_start_end( data: pd.DataFrame, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp], timezone: str, ) -> Tuple[Union[str, pd.Timestamp], Union[str, pd.Timestamp]]: if timezone is None: timezone = tz if start is None: start = data.index[0] if end is None: end = data.index[-1] start = _to_timestamp(start, timezone) end = _to_timestamp(end, timezone) return start, end def _to_timestamp(date: Union[str, pd.Timestamp], timezone: str) -> pd.Timestamp: if isinstance(date, str): date = pd.Timestamp(date, tz=timezone) return date
[docs]def static_moment_duration(data: pd.DataFrame, start_end: np.array) -> float: """Compute duration of static moment. Parameters ---------- data : :class:`~pandas.DataFrame` input data start_end : array start and end index of static moment to compute duration Returns ------- float duration in seconds """ return (data.index[start_end[1]] - data.index[start_end[0]]).total_seconds()
[docs]def mean_orientation(data: pd.DataFrame, static_moments: pd.DataFrame) -> pd.DataFrame: """Compute mean orientation of acceleration signal within static moment windows. Parameters ---------- data : :class:`~pandas.DataFrame` input data static_moments : :class:`~pandas.DataFrame` dataframe with start and end indices of static moments Returns ------- :class:`~pandas.DataFrame` mean orientation (x, y, z) of acceleration signal for each static moment window """ static_moments = sanitize_input_nd(static_moments, 2) mean_orientations = [data.iloc[start_end[0] : start_end[1]] for start_end in static_moments] mean_orientations = {len(data): data.mean() for data in mean_orientations} mean_orientations = pd.DataFrame(mean_orientations).T # mean_orientations.rename(columns={'index': 'length'}, inplace=True) return mean_orientations