Source code for biopsykit.signals.imu.feature_extraction.static_moments

"""Extract features from static moments of IMU data."""
from typing import Optional, Tuple, Union

import numpy as np
import pandas as pd
from biopsykit.utils.array_handling import sanitize_input_nd
from biopsykit.utils.time import tz
from scipy.stats import skew


[docs]def compute_features(
    data: pd.DataFrame,
    static_moments: pd.DataFrame,
    start: Optional[Union[str, pd.Timestamp]] = None,
    end: Optional[Union[str, pd.Timestamp]] = None,
    index: Optional[Union[int, str]] = None,
    timezone: Optional[str] = None,
) -> Optional[pd.DataFrame]:
    """Compute features based on frequency and duration of static moments in given input signal.

    This function computes the following features:

    * ``sm_number``: number of static moments in data
    * ``sm_max``: maximum duration of static moments, i.e., longest duration
    * ``sm_max_position``: location of the beginning of the longest static moment in the input data normalized to
      ``[0, 1]`` where 0 = ``start`` and 1 = ``end``
    * ``sm_median``: median duration of static moments
    * ``sm_mean``: mean duration of static moments
    * ``sm_std``: standard deviation of static moment durations
    * ``sm_skewness``: skewness of static moment durations

    The features are both computed on all detected static moments and on static moments that are longer than
    60 seconds (suffix ``_60``).


    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        input data
    static_moments : :class:`~pandas.DataFrame`
        dataframe with beginning and end of static moments
    start : :class:`~pandas.Timestamp` or str, optional
        start timestamp in input data for feature extraction or ``None`` to set start index to the first index in
        ``data``. All samples *before* ``start`` will not be used for feature extraction.
    end : :class:`~pandas.Timestamp` or str, optional
        end timestamp in input data for feature extraction or ``None`` to set end index to the last index in
        ``data``. All samples *after* ``end`` will not be used for feature extraction.
    index : int or str, optional
        index label of the resulting dataframe or ``None`` to assign a default label (0)
    timezone : str, optional
        timezone of the recorded data or ``None`` to use default timezone ("Europe/Berlin")

    Returns
    -------
    :class:`~pandas.DataFrame`
        dataframe with extracted static moment features

    """
    if data.empty:
        return None

    start, end = _get_start_end(data, start, end, timezone)
    total_time = end - start

    static_moments = sanitize_input_nd(static_moments, ncols=2)

    durations = np.array([static_moment_duration(data, sequence) for sequence in static_moments])
    durations_60 = durations[durations >= 60]

    loc_max_moment = data.index[static_moments[np.argmax(durations)][0]]
    loc_max_moment_relative = (loc_max_moment - start) / total_time

    feature_dict = {"sm_max_position": loc_max_moment_relative}
    # feature_dict['sleep_bouts_number'.format(index)] = len(sleep_bouts)
    # feature_dict['wake_bouts_number'] = len(wake_bouts)

    # mean_orientations = mean_orientation(data, static_sequences)
    # dominant_orientation = mean_orientations.iloc[mean_orientations.index.argmax()]
    # dict_ori = {'sm_dominant_orientation_{}'.format(x): dominant_orientation.loc['acc_{}'.format(x)] for x
    #             in
    #             ['x', 'y', 'z']}
    # feature_dict.update(dict_ori)

    for dur, suffix in zip([durations, durations_60], ["", "_60"]):
        feature_dict[f"sm_number{suffix}"] = len(dur)
        feature_dict[f"sm_max{suffix}"] = np.max(dur)
        feature_dict[f"sm_median{suffix}"] = np.median(dur)
        feature_dict[f"sm_mean{suffix}"] = np.mean(dur)
        feature_dict[f"sm_std{suffix}"] = np.std(dur, ddof=1)
        feature_dict[f"sm_skewness{suffix}"] = skew(dur)

    if index is None:
        index = 0
    return pd.DataFrame(feature_dict, index=[index])


def _get_start_end(
    data: pd.DataFrame,
    start: Union[str, pd.Timestamp],
    end: Union[str, pd.Timestamp],
    timezone: str,
) -> Tuple[Union[str, pd.Timestamp], Union[str, pd.Timestamp]]:
    if timezone is None:
        timezone = tz

    if start is None:
        start = data.index[0]
    if end is None:
        end = data.index[-1]

    start = _to_timestamp(start, timezone)
    end = _to_timestamp(end, timezone)
    return start, end


def _to_timestamp(date: Union[str, pd.Timestamp], timezone: str) -> pd.Timestamp:
    if isinstance(date, str):
        date = pd.Timestamp(date, tz=timezone)
    return date


[docs]def static_moment_duration(data: pd.DataFrame, start_end: np.array) -> float:
    """Compute duration of static moment.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        input data
    start_end : array
        start and end index of static moment to compute duration

    Returns
    -------
    float
        duration in seconds

    """
    return (data.index[start_end[1]] - data.index[start_end[0]]).total_seconds()


[docs]def mean_orientation(data: pd.DataFrame, static_moments: pd.DataFrame) -> pd.DataFrame:
    """Compute mean orientation of acceleration signal within static moment windows.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        input data
    static_moments : :class:`~pandas.DataFrame`
        dataframe with start and end indices of static moments

    Returns
    -------
    :class:`~pandas.DataFrame`
        mean orientation (x, y, z) of acceleration signal for each static moment window

    """
    static_moments = sanitize_input_nd(static_moments, 2)
    mean_orientations = [data.iloc[start_end[0] : start_end[1]] for start_end in static_moments]
    mean_orientations = {len(data): data.mean() for data in mean_orientations}
    mean_orientations = pd.DataFrame(mean_orientations).T
    # mean_orientations.rename(columns={'index': 'length'}, inplace=True)
    return mean_orientations