Source code for biopsykit.utils.dtypes

"""A couple of helper functions that ease the use of the typical biopsykit data formats."""

from typing import Any

import numpy as np
import pandas as pd

from biopsykit.utils._datatype_validation_helper import (
    _assert_has_column_multiindex,
    _assert_has_column_prefix,
    _assert_has_columns,
    _assert_has_columns_any_level,
    _assert_has_index_levels,
    _assert_has_multiindex,
    _assert_is_dtype,
    _assert_sample_columns_int,
)
from biopsykit.utils.exceptions import ValidationError

__all__ = [
    "ECG_RESULT_DATAFRAME_COLUMNS",
    "HEART_RATE_DATAFRAME_COLUMNS",
    "R_PEAK_DATAFRAME_COLUMNS",
    "Acc1dDataFrame",
    "Acc3dDataFrame",
    "BPointDataFrame",
    "BiomarkerRawDataFrame",
    "CPointDataFrame",
    "CodebookDataFrame",
    "EcgRawDataFrame",
    "EcgResultDataFrame",
    "Gyr1dDataFrame",
    "Gyr3dDataFrame",
    "HeartRateDataFrame",
    "HeartRatePhaseDict",
    "HeartRateStudyDataDict",
    "HeartRateSubjectDataDict",
    "HeartbeatSegmentationDataFrame",
    "IcgRawDataFrame",
    "ImuDataFrame",
    "MeanSeDataFrame",
    "MergedStudyDataDict",
    "PepResultDataFrame",
    "PhaseDict",
    "QPeakDataFrame",
    "RPeakDataFrame",
    "SalivaFeatureDataFrame",
    "SalivaMeanSeDataFrame",
    "SalivaRawDataFrame",
    "SleepEndpointDataFrame",
    "SleepEndpointDict",
    "SleepWakeDataFrame",
    "StudyDataDict",
    "SubjectConditionDataFrame",
    "SubjectConditionDict",
    "SubjectDataDict",
    "is_acc1d_dataframe",
    "is_acc3d_dataframe",
    "is_b_point_dataframe",
    "is_biomarker_raw_dataframe",
    "is_c_point_dataframe",
    "is_codebook_dataframe",
    "is_ecg_raw_dataframe",
    "is_ecg_result_dataframe",
    "is_gyr1d_dataframe",
    "is_gyr3d_dataframe",
    "is_heart_rate_dataframe",
    "is_heartbeat_segmentation_dataframe",
    "is_hr_phase_dict",
    "is_hr_subject_data_dict",
    "is_icg_raw_dataframe",
    "is_imu_dataframe",
    "is_mean_se_dataframe",
    "is_merged_study_data_dict",
    "is_pep_result_dataframe",
    "is_phase_dict",
    "is_q_peak_dataframe",
    "is_r_peak_dataframe",
    "is_saliva_feature_dataframe",
    "is_saliva_mean_se_dataframe",
    "is_saliva_raw_dataframe",
    "is_sleep_endpoint_dataframe",
    "is_sleep_endpoint_dict",
    "is_sleep_wake_dataframe",
    "is_study_data_dict",
    "is_subject_condition_dataframe",
    "is_subject_condition_dict",
    "is_subject_data_dict",
]

ECG_RESULT_DATAFRAME_COLUMNS = ["ECG_Raw", "ECG_Clean", "ECG_Quality", "ECG_R_Peaks", "R_Peak_Outlier"]
HEART_RATE_DATAFRAME_COLUMNS = ["Heart_Rate"]
R_PEAK_DATAFRAME_COLUMNS = ["R_Peak_Quality", "R_Peak_Idx", "RR_Interval", "R_Peak_Outlier"]

PEP_RESULT_DATAFRAME_COLUMNS = [
    "heartbeat_start_sample",
    "heartbeat_end_sample",
    "r_peak_sample",
    "q_peak_sample",
    "b_point_sample",
    "pep_sample",
    "pep_ms",
]

# these subclasses of pd.DataFrame are needed to be added to the type aliases because otherwise, autosphinx does not
# add the docstring to the documentation of the type aliases. Additionally, they can be used internally to highlight
# which alias types are expected at which position


class _SubjectConditionDataFrame(pd.DataFrame):
    pass


class _CodebookDataFrame(pd.DataFrame):
    pass


class _MeanSeDataFrame(pd.DataFrame):
    pass


class _BiomarkerRawDataFrame(pd.DataFrame):
    pass


class _SalivaRawDataFrame(pd.DataFrame):
    pass


class _SalivaFeatureDataFrame(pd.DataFrame):
    pass


class _SalivaMeanSeDataFrame(pd.DataFrame):
    pass


class _IcgRawDataFrame(pd.DataFrame):
    pass


class _EcgRawDataFrame(pd.DataFrame):
    pass


class _EcgResultDataFrame(pd.DataFrame):
    pass


class _HeartRateDataFrame(pd.DataFrame):
    pass


class _RPeakDataFrame(pd.DataFrame):
    pass


class _Acc1dDataFrame(pd.DataFrame):
    pass


class _Acc3dDataFrame(pd.DataFrame):
    pass


class _Gyr1dDataFrame(pd.DataFrame):
    pass


class _Gyr3dDataFrame(pd.DataFrame):
    pass


class _ImuDataFrame(pd.DataFrame):
    pass


class _SleepWakeDataFrame(pd.DataFrame):
    pass


class _SleepEndpointDataFrame(pd.DataFrame):
    pass


class _HeartbeatSegmentationDataFrame(pd.DataFrame):
    pass


class _QPeakDataFrame(pd.DataFrame):
    pass


class _BPointDataFrame(pd.DataFrame):
    pass


class _CPointDataFrame(pd.DataFrame):
    pass


class _PepResultDataFrame(pd.DataFrame):
    pass


SubjectConditionDataFrame = _SubjectConditionDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing subject IDs and condition assignment in a standardized format.

A ``SubjectConditionDataFrame`` has an index with subject IDs named ``subject`` and a column with the condition
assignment named ``condition``.
"""

SubjectConditionDict = dict[str, np.ndarray]
"""Dictionary containing subject IDs and condition assignment in a standardized format.

A ``SubjectConditionDict`` contains conditions as dictionary keys and a collection of subject IDs
(list, numpy array, pandas Index) as dictionary values.
"""

CodebookDataFrame = _CodebookDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` representing a codebook which encodes numerical and categorical values
in a standardized format.

A ``CodebookDataFrame`` has an index level named ``variable``. The column names are the numerical values (0, 1, ...),
the dataframe entries then represent the mapping of numerical value to categorical value for the variable.
"""

MeanSeDataFrame = _MeanSeDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing mean and standard error of time-series data in a standardized format.

The resulting dataframe must at least the two columns ``mean`` and ``se``. It can have additional index levels,
such as ``phase``, ``subphase`` or ``condition``.
"""

BiomarkerRawDataFrame = _BiomarkerRawDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing raw biomarker data in a standardized format.

Data needs to be in long-format and **must** have a :class:`pandas.MultiIndex` with index level names:

* ``subject``: subject ID; can be number or string
* ``sample``: saliva sample ID; can be number or string

Additionally, the following index levels can be added to identify saliva values, such as:

* ``condition``: subject condition during the study (e.g., "Control" vs. "Condition")
* ``day``: day ID, if saliva samples were collected over multiple days
* ``night``: night ID, if saliva samples were collected over multiple night
* ...

"""

SalivaRawDataFrame = _BiomarkerRawDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing raw saliva data in a standardized format.

Data needs to be in long-format and **must** have a :class:`pandas.MultiIndex` with index level names:

* ``subject``: subject ID; can be number or string
* ``sample``: saliva sample ID; can be number or string

Additionally, the following index levels can be added to identify saliva values, such as:

* ``condition``: subject condition during the study (e.g., "Control" vs. "Condition")
* ``day``: day ID, if saliva samples were collected over multiple days
* ``night``: night ID, if saliva samples were collected over multiple night
* ...

"""

SalivaFeatureDataFrame = _SalivaFeatureDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing feature computed from saliva data in a standardized format.

The resulting dataframe must at least have a ``subject`` index level and all column names need to begin with
the saliva marker type (e.g. "cortisol"), followed by the feature name, separated by underscore '_'
Additionally, the name of the column index needs to be `saliva_feature`.
"""

SalivaMeanSeDataFrame = _SalivaMeanSeDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing mean and standard error of saliva samples in a standardized format.

The resulting dataframe must at least have a ``sample`` index level and the two columns ``mean`` and ``se``.
It can have additional index levels, such as ``condition`` or ``time``.
"""

SleepEndpointDict = dict[str, Any]
"""Dictionary containing sleep endpoints in a standardized format.

The dict entries represent the sleep endpoints and should follow a standardized naming convention,
regardless of the origin (IMU sensor, sleep mattress, psg, etc.).

Required are the entries:

* ``sleep_onset``: Sleep Onset, i.e., time of falling asleep, in absolute time
* ``wake_onset``: Wake Onset, i.e., time of awakening, in absolute time
* ``total_sleep_duration``: Total sleep duration, i.e., time between Sleep Onset and Wake Onset, in minutes

The following entries are common, but not required:

* ``total_duration``: Total recording time, in minutes
* ``net_sleep_duration``: Net duration spent sleeping, in minutes
* ``bed_interval_start``: Bed Interval Start, i.e, time when participant went to bed, in absolute time
* ``bed_interval_end``: Bed Interval End, i.e, time when participant left bed, in absolute time
* ``sleep_efficiency``: Sleep Efficiency, defined as the ratio between net sleep duration and sleep duration
  in percent
* ``sleep_onset_latency``: Sleep Onset Latency, i.e., time in bed needed to fall asleep, in minutes
* ``getup_latency``: Get Up Latency, i.e., time in bed after awakening until getting up, in minutes
* ``wake_after_sleep_onset``: Wake After Sleep Onset (WASO), i.e., total time awake after falling asleep, in minutes
* ``sleep_bouts``: List with start and end times of sleep bouts
* ``wake_bouts``: List with start and end times of wake bouts
* ``number_wake_bouts``: Total number of wake bouts

The following entries are, for instance, further possible:

* ``total_time_light_sleep``: Total time of light sleep, in minutes
* ``total_time_deep_sleep``: Total time of deep sleep, in minutes
* ``total_time_rem_sleep``: Total time of REM sleep, in minutes
* ``total_time_awake``: Total time of being awake, in minutes
* ``count_snoring_episodes``: Total number of snoring episodes
* ``total_time_snoring``: Total time of snoring, in minutes
* ``heart_rate_avg``: Average heart rate during recording, in bpm
* ``heart_rate_min``: Minimum heart rate during recording, in bpm
* ``heart_rate_max``: Maximum heart rate during recording, in bpm

"""

SleepEndpointDataFrame = _SleepEndpointDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing sleep endpoints in a standardized format.

The resulting dataframe must at least have a ``date`` index level,
and, optionally, further index levels like ``night``.

The columns defining the sleep endpoints should follow a standardized naming convention, regardless of the origin
(IMU sensor, sleep mattress, psg, etc.).

Required are the columns:

* ``sleep_onset``: Sleep Onset, i.e., time of falling asleep, in absolute time
* ``wake_onset``: Wake Onset, i.e., time of awakening, in absolute time
* ``total_sleep_duration``: Total sleep duration, i.e., time between Sleep Onset and Wake Onset, in minutes

The following columns are common, but not required:

* ``total_duration``: Total recording time, in minutes
* ``net_sleep_duration``: Net duration spent sleeping, in minutes
* ``bed_interval_start``: Bed Interval Start, i.e, time when participant went to bed, in absolute time
* ``bed_interval_end``: Bed Interval End, i.e, time when participant left bed, in absolute time
* ``sleep_efficiency``: Sleep Efficiency, defined as the ratio between net sleep duration and sleep duration
  in percent
* ``sleep_onset_latency``: Sleep Onset Latency, i.e., time in bed needed to fall asleep, in minutes
* ``getup_latency``: Get Up Latency, i.e., time in bed after awakening until getting up, in minutes
* ``wake_after_sleep_onset``: Wake After Sleep Onset (WASO), i.e., total time awake after falling asleep, in minutes
* ``number_wake_bouts``: Total number of wake bouts

The following columns are further possible:

* ``total_time_light_sleep``: Total time of light sleep, in minutes
* ``total_time_deep_sleep``: Total time of deep sleep, in minutes
* ``total_time_rem_sleep``: Total time of REM sleep, in minutes
* ``total_time_awake``: Total time of being awake, in minutes
* ``count_snoring_episodes``: Total number of snoring episodes
* ``total_time_snoring``: Total time of snoring, in minutes
* ``heart_rate_avg``: Average heart rate during recording, in bpm
* ``heart_rate_min``: Minimum heart rate during recording, in bpm
* ``heart_rate_max``: Maximum heart rate during recording, in bpm

"""

IcgRawDataFrame = _IcgRawDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing raw ICG data of `one` subject.

The dataframe is expected to have one of the following columns:

* ``icg``: Raw ICG signal
* ``icg_der``: Derivative of the ICG signal

"""


EcgRawDataFrame = _EcgRawDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing raw ECG data of `one` subject.

The dataframe is expected to have the following columns:

* ``ecg``: Raw ECG signal

"""

EcgResultDataFrame = _EcgResultDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing processed ECG data of `one` subject.

The dataframe is expected to have the following columns:

* ``ECG_Raw``: Raw ECG signal
* ``ECG_Clean``: Cleaned (filtered) ECG signal
* ``ECG_Quality``: ECG signal quality indicator in the range of [0, 1]
* ``ECG_R_Peaks``: 1.0 where R peak was detected in the ECG signal, 0.0 else
* ``R_Peak_Outlier``: 1.0 when a detected R peak was classified as outlier, 0.0 else
* ``Heart_Rate``: Computed Heart rate interpolated to signal length

"""

HeartRateDataFrame = _HeartRateDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing heart rate time series data of `one` subject.

The dataframe is expected to have the following columns:

* ``Heart_Rate``: Heart rate data. Can either be instantaneous heart rate or resampled heart rate

"""

RPeakDataFrame = _RPeakDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing R-peak locations of `one` subject extracted from ECG data.

The dataframe is expected to have the following columns:

* ``R_Peak_Quality``: Signal quality indicator (of the raw ECG signal) in the range of [0, 1]
* ``R_Peak_Idx``: Array index of detected R peak in the raw ECG signal
* ``RR_Interval``: Interval between the current and the successive R peak in seconds
* ``R_Peak_Outlier``: 1.0 when a detected R peak was classified as outlier, 0.0 else

"""

Acc1dDataFrame = _Acc1dDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing 1-d acceleration data.

The dataframe is expected to have one of the following column sets:

* ["acc"]: one level column index
* ["acc_norm"]: one level column index

"""

Acc3dDataFrame = _Acc3dDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing 3-d acceleration data.

The dataframe is expected to have one of the following column sets:

* ["acc_x", "acc_y", "acc_z"]: one level column index
* [("acc", "x"), ("acc", "y"), ("acc", "z")]: two-level column index, first level specifying the channel
  (acceleration), second level specifying the axes

"""

Gyr1dDataFrame = _Gyr1dDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing 1-d gyroscope data.

The dataframe is expected to have one of the following column sets:

* ["gyr"]: one level column index
* ["gyr_norm"]: one level column index

"""

Gyr3dDataFrame = _Gyr3dDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing 3-d gyroscope data.

The dataframe is expected to have one of the following column sets:

* ["gyr_x", "gyr_y", "gyr_z"]: one level column index
* [("gyr", "x"), ("gyr", "y"), ("gyr", "z")]: two-level column index, first level specifying the channel
  (gyroscope), second level specifying the axes

"""

ImuDataFrame = _ImuDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing 6-d inertial measurement (IMU) (acceleration and gyroscope) data.

Hence, an ``ImuDataFrame`` must both be a ``AccDataFrame`` **and** a ``GyrDataFrame``.

The dataframe is expected to have one of the following column sets:

* ["acc_x", "acc_y", "acc_z", "gyr_x", "gyr_y", "gyr_z"]: one level column index
* [("acc", "x"), ("acc", "y"), ("acc", "z"), ("gyr", "x"), ("gyr", "y"), ("gyr", "z")]:
  two-level column index, first level specifying the channel (acceleration and gyroscope),
  second level specifying the axes

"""

SleepWakeDataFrame = _SleepWakeDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing sleep/wake predictions.

The dataframe is expected to have at least the following column(s):

* ["sleep_wake"]: sleep/wake predictions where 1 indicates sleep and 0 indicates wake

"""

HeartbeatSegmentationDataFrame = _HeartbeatSegmentationDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing results of heartbeat segmentation.

The dataframe is expected to have *at least* the following columns:

* ``start_sample``: Start sample of segmented heartbeat
* ``end_sample``: End sample of segmented heartbeat
* ``r_peak_sample``: R-peak sample of segmented heartbeat

"""

QPeakDataFrame = _QPeakDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing Q-peak locations extracted from ECG data.

The dataframe is expected to have *at least* the following columns:

* ``q_peak_sample``: The sample index of the Q-peak in the ECG signal

Optionally, the dataframe can contain additional columns, such as:

* ``nan_reason``: Reason why the Q-peak was set to NaN (e.g., "r_peak_nan", "no_zero_crossing")

"""

BPointDataFrame = _BPointDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing B-Point locations extracted from ICG data.

The dataframe is expected to have *at least* the following columns:

* ``b_point_sample``: The sample index of the B-point in the ICG signal

Optionally, the dataframe can contain additional columns, such as:

* ``nan_reason``: Reason why the B-point was set to NaN (e.g., "c_point_nan", "no_zero_crossing")

"""

CPointDataFrame = _CPointDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing C-Point locations extracted from ICG data.

The dataframe is expected to have *at least* the following columns:

* ``c_point_sample``: The sample index of the C-point in the ICG signal

Optionally, the dataframe can contain additional columns, such as:

* ``nan_reason``: Reason why the C-point was set to NaN (e.g., "no_local_maxima")

"""

PepResultDataFrame = _PepResultDataFrame | pd.DataFrame
""":class:`~pandas.DataFrame` containing results of PEP extraction.

The dataframe is expected to have *at least* the following columns:

* ``heartbeat_start_sample``: Start sample of segmented heartbeat
* ``heartbeat_end_sample``: End sample of segmented heartbeat
* ``r_peak_sample``: R-peak sample of segmented heartbeat
* ``q_peak_sample``: Q-peak sample of segmented heartbeat
* ``b_point_sample``: B-point sample of segmented heartbeat
* ``pep_sample``: Pre-ejection period (PEP) in samples
* ``pep_ms``: Pre-ejection period (PEP) in milliseconds



Additionally, the dataframe can contain the following columns:

* ``rr_interval_sample``: RR interval between the previous and the current heartbeat in samples
* ``rr_interval_ms``: RR interval between the previous and the current heartbeat in milliseconds
* ``heart_rate_bpm``: Heart rate in beats per minute, derived from RR interval
* ``nan_reason``: Reason why the PEP was set to NaN (e.g., "r_peak_nan", "no_zero_crossing")

"""


PhaseDict = dict[str, pd.DataFrame]
"""Dictionary containing general time-series data of **one single subject** split into **different phases**.

A ``PhaseDict`` is a dictionary with the following format:

{ "phase_1" : dataframe, "phase_2" : dataframe, ... }

Each ``dataframe`` is a :class:`~pandas.DataFrame` with the following format:

* Index: :class:`pandas.DatetimeIndex` with timestamps, name of index level: ``time``

"""

HeartRatePhaseDict = dict[str, HeartRateDataFrame]
"""Dictionary containing time-series heart rate data of **one single subject** split into **different phases**.

A ``HeartRatePhaseDict`` is a dictionary with the following format:

{ "phase_1" : hr_dataframe, "phase_2" : hr_dataframe, ... }

Each ``hr_dataframe`` is a :class:`~pandas.DataFrame` with the following format:

* ``time`` Index: :class:`pandas.DatetimeIndex` with heart rate sample timestamps
* ``Heart_Rate`` Column: heart rate values

"""

SubjectDataDict = dict[str, PhaseDict]
"""Dictionary representing time-series data from **multiple subjects** collected during a psychological protocol.

A ``SubjectDataDict`` is a nested dictionary with time-series data from multiple subjects, each containing data
from different phases. It is expected to have the level order `subject`, `phase`:

| {
|     "subject1" : { "phase_1" : dataframe, "phase_2" : dataframe, ... },
|     "subject2" : { "phase_1" : dataframe, "phase_2" : dataframe, ... },
|     ...
| }

This dictionary can, for instance, be rearranged to a :obj:`biopsykit.utils.dtypes.StudyDataDict`,
where the level order is reversed: `phase`, `subject`.
"""

HeartRateSubjectDataDict = dict[str, HeartRatePhaseDict] | dict[str, dict[str, HeartRatePhaseDict]]
"""Dictionary with time-series heart rate data from **multiple subjects** collected during a psychological protocol.

A ``HeartRateSubjectDataDict`` is a nested dictionary with time-series heart rate data from multiple subjects,
each containing data from different phases. It is expected to have the level order `subject`, `phase`:

| {
|     "subject1" : { "phase_1" : hr_dataframe, "phase_2" : hr_dataframe, ... },
|     "subject2" : { "phase_1" : hr_dataframe, "phase_2" : hr_dataframe, ... },
|     ...
| }

Each ``hr_dataframe`` is a :class:`~pandas.DataFrame` with the following format:

* ``time`` Index: :class:`pandas.DatetimeIndex` with heart rate sample timestamps
* ``Heart_Rate`` Column: heart rate values

This dictionary can, for instance, be rearranged to a :obj:`~biopsykit.utils.dtypes.HeartRateStudyDataDict`,
where the level order is reversed: `phase`, `subject`.

"""

StudyDataDict = dict[str, dict[str, pd.DataFrame]]
"""Dictionary with data from **multiple phases** collected during a psychological protocol.

A ``StudyDataDict`` is a nested dictionary with time-series data from multiple phases, each phase containing data
from different subjects. It is expected to have the level order `phase`, `subject`:

| {
|     "phase_1" : { "subject1" : dataframe, "subject2" : dataframe, ... },
|     "phase_2" : { "subject1" : dataframe, "subject2" : dataframe, ... },
|     ...
| }

This dict results from rearranging a :obj:`biopsykit.utils.dtypes.SubjectDataDict` by calling
:func:`~biopsykit.utils.data_processing.rearrange_subject_data_dict`.
"""


HeartRateStudyDataDict = dict[str, dict[str, HeartRateDataFrame]]
"""Dictionary with heart rate data from **multiple phases** collected during a psychological protocol.

A ``HeartRateStudyDataDict`` is a nested dictionary with time-series heart rate data from multiple phases,
each phase containing data from different subjects. It is expected to have the level order `phase`, `subject`:

| {
|     "phase_1" : { "subject1" : hr_dataframe, "subject2" : hr_dataframe, ... },
|     "phase_2" : { "subject1" : hr_dataframe, "subject2" : hr_dataframe, ... },
|     ...
| }

Each ``hr_dataframe`` is a :class:`~pandas.DataFrame` with the following format:

* ``time`` Index: :class:`pandas.DatetimeIndex` with heart rate sample timestamps
* ``Heart_Rate`` Column: heart rate values

This dict results from rearranging a :obj:`~biopsykit.utils.dtypes.HeartRateSubjectDataDict` by calling
:func:`~biopsykit.utils.data_processing.rearrange_subject_data_dict`.
"""

MergedStudyDataDict = dict[str, pd.DataFrame]
"""Dictionary with merged time-series data of **multiple subjects**, split into **different phases**.

A ``MergedStudyDataDict`` is a dictionary with the following format:

| {
|     "phase_1" : merged_dataframe,
|     "phase_2" : merged_dataframe,
|     ...
| }

This dict results from merging the inner dictionary into one dataframe by calling
:func:`~biopsykit.utils.data_processing.merge_study_data_dict`.

.. note::
    Merging the inner dictionaries requires that the dataframes of all subjects have same length within each phase.

Each ``merged_dataframe`` is a :class:`~pandas.DataFrame` with the following format:

* Index: time. Name of index level: ``time``
* Columns: time series data per subject, each subject has its own column.
  Name of the column index level: ``subject``
"""


[docs]def is_subject_condition_dataframe(data: SubjectConditionDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SubjectConditionDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SubjectConditionDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SubjectConditionDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SubjectConditionDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SubjectConditionDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_multiindex(data, expected=False)
        _assert_has_index_levels(data, index_levels=["subject"], match_atleast=False, match_order=True)
        _assert_has_columns(data, [["condition"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SubjectConditionDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_subject_condition_dict(data: SubjectConditionDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SubjectConditionDict`.

    Parameters
    ----------
    data : dict
        dict to check if it is a ``SubjectConditionDict``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SubjectConditionDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SubjectConditionDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SubjectConditionDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for val in data.values():
            _assert_is_dtype(val, (np.ndarray, list, pd.Index))
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SubjectConditionDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_codebook_dataframe(data: CodebookDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.CodebookDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``CodebookDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value


    Returns
    -------
    ``True`` if ``data`` is a ``CodebookDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)


    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``CodebookDataFrame``


    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.CodebookDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_index_levels(data, index_levels="variable", match_atleast=True, match_order=False)
        if not np.issubdtype(data.columns.dtype, np.integer):
            raise ValidationError(
                f"The dtypes of columns in a CodebookDataFrame are expected to be of type int, "
                f"but it is {data.columns.dtype}."
            )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a CodebookDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_mean_se_dataframe(data: MeanSeDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.MeanSeDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``MeanSeDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value


    Returns
    -------
    ``True`` if ``data`` is a ``MeanSeDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)


    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``MeanSeDataFrame``


    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.MeanSeDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        if data.columns.nlevels == 1:
            _assert_has_columns(data, [["mean", "se"]])
        else:
            _assert_has_columns_any_level(data, [["mean", "se"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a MeanSeDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_hr_phase_dict(data: HeartRatePhaseDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.HeartRatePhaseDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``HeartRatePhaseDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``HeartRatePhaseDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.HeartRatePhaseDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for df in data.values():
            is_heart_rate_dataframe(df)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a HeartRatePhaseDict. "
                f"The validation failed with the following error:\n\n{e!s}\n"
                "HeartRatePhaseDicts in an old format can be converted into the new format using "
                "`biopsykit.utils.legacy_helper.legacy_convert_hr_phase_dict()`"
            ) from e
        return False
    return True


[docs]def is_phase_dict(data: PhaseDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.PhaseDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``PhaseDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``PhaseDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.PhaseDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for df in data.values():
            _assert_is_dtype(df, pd.DataFrame)
            _assert_has_multiindex(df, expected=False)
            _assert_has_column_multiindex(df, expected=False)
            _assert_has_index_levels(df, ["time"])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a PhaseDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_hr_subject_data_dict(data: HeartRateSubjectDataDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.HeartRateSubjectDataDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``HeartRateSubjectDataDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``HeartRateSubjectDataDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.HeartRateSubjectDataDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for data_dict in data.values():
            is_hr_phase_dict(data_dict)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a HeartRateSubjectDataDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_study_data_dict(data: StudyDataDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.StudyDataDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``StudyDataDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``StudyDataDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.StudyDataDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for data_dict in data.values():
            _assert_is_dtype(data_dict, dict)
            for df in data_dict.values():
                _assert_is_dtype(df, pd.DataFrame)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a StudyDataDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_subject_data_dict(data: SubjectDataDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.SubjectDataDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SubjectDataDict```
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SubjectDataDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SubjectDataDict``
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for data_dict in data.values():
            _assert_is_dtype(data_dict, dict)
            for df in data_dict.values():
                _assert_is_dtype(df, pd.DataFrame)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SubjectDataDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_merged_study_data_dict(data: MergedStudyDataDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether a dict is a :obj:`~biopsykit.utils.dtypes.MergedStudyDataDict`.

    Parameters
    ----------
    data : dict
        dict to check
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``MergedStudyDataDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``MergedStudyDataDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.MergedStudyDataDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        for df in data.values():
            _assert_is_dtype(df, pd.DataFrame)
            _assert_has_multiindex(df, expected=False)
            _assert_has_index_levels(df, ["time"])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a MergedStudyDataDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_biomarker_raw_dataframe(
    data: BiomarkerRawDataFrame, biomarker_type: str | list[str], raise_exception: bool | None = True
) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SalivaRawDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SalivaRawDataFrame``
    saliva_type : str or list of str
        type of saliva data (or list of saliva types) in the dataframe, e.g., "cortisol" or "amylase"
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SalivaRawDataFrame```
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SalivaRawDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SalivaRawDataFrame`
        dataframe format

    """
    try:
        if biomarker_type is None:
            raise ValidationError("`saliva_type` is None!")
        if isinstance(biomarker_type, str):
            biomarker_type = [biomarker_type]
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_multiindex(data, nlevels=2, nlevels_atleast=True)
        _assert_has_index_levels(data, index_levels=["subject", "sample"], match_atleast=True, match_order=False)
        _assert_has_columns(data, [biomarker_type, [*biomarker_type, "time"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a BiomarkerRawDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_saliva_raw_dataframe(
    data: SalivaRawDataFrame, saliva_type: str | list[str], raise_exception: bool | None = True
) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SalivaRawDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SalivaRawDataFrame``
    saliva_type : str or list of str
        type of saliva data (or list of saliva types) in the dataframe, e.g., "cortisol" or "amylase"
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SalivaRawDataFrame```
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SalivaRawDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SalivaRawDataFrame`
        dataframe format

    """
    return is_biomarker_raw_dataframe(data, saliva_type, raise_exception)


[docs]def is_saliva_feature_dataframe(
    data: SalivaFeatureDataFrame, saliva_type: str, raise_exception: bool | None = True
) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SalivaFeatureDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SalivaFeatureDataFrame``
    saliva_type : str or list of str
        type of saliva data in the dataframe, e.g., "cortisol" or "amylase"
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SalivaFeatureDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SalivaFeatureDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SalivaFeatureDataFrame`
        dataframe format

    """
    try:
        if saliva_type is None:
            raise ValidationError("`saliva_type` is None!")
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_index_levels(data, index_levels="subject", match_atleast=True, match_order=False)
        # _assert_has_column_levels(data, column_levels="saliva_feature", match_atleast=True, match_order=False)
        _assert_has_column_prefix(data.columns, prefix=saliva_type)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SalivaFeatureDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_saliva_mean_se_dataframe(data: SalivaFeatureDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SalivaMeanSeDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SalivaMeanSeDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SalivaMeanSeDataFrame```
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SalivaMeanSeDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SalivaMeanSeDataFrame``
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_index_levels(data, index_levels="sample", match_atleast=True, match_order=False)
        _assert_has_columns(data, [["mean", "se"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SalivaMeanSeDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_sleep_endpoint_dataframe(data: SleepEndpointDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SleepEndpointDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SleepEndpointDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SleepEndpointDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SleepEndpointDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SleepEndpointDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_is_dtype(data.index, pd.DatetimeIndex)
        _assert_has_index_levels(data, index_levels="date", match_atleast=True, match_order=False)
        _assert_has_columns(data, column_sets=[["sleep_onset", "wake_onset", "total_sleep_duration"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SleepEndpointDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_sleep_endpoint_dict(data: SleepEndpointDict, raise_exception: bool | None = True) -> bool | None:
    """Check whether dictionary is a :obj:`~biopsykit.utils.dtypes.SleepEndpointDict`.

    Parameters
    ----------
    data : dict
        data to check if it is a ``SleepEndpointDict``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SleepEndpointDict``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SleepEndpointDict``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SleepEndpointDict`
        dictionary format

    """
    try:
        _assert_is_dtype(data, dict)
        expected_keys = ["date", "sleep_onset", "wake_onset", "total_sleep_duration"]
        if any(col not in list(data.keys()) for col in expected_keys):
            raise ValidationError(f"Not all of {expected_keys} are in the dictionary!")
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SleepEndpointDict. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_icg_raw_dataframe(data: IcgRawDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.IcgRawDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``IcgRawDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``IcgRawDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``IcgRawDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.IcgRawDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, column_sets=[["icg_der"], ["icg"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a IcgRawDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_ecg_raw_dataframe(data: EcgRawDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.EcgRawDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``EcgRawDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``EcgRawDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``EcgRawDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.EcgRawDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, column_sets=[["ecg"]])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a EcgRawDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_ecg_result_dataframe(data: EcgRawDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.EcgResultDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``EcgResultDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``EcgResultDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``EcgResultDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.EcgResultDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[
                ECG_RESULT_DATAFRAME_COLUMNS,
                ECG_RESULT_DATAFRAME_COLUMNS + HEART_RATE_DATAFRAME_COLUMNS,
            ],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a EcgResultDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_heart_rate_dataframe(data: HeartRateDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.HeartRateDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``HeartRateDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``HeartRateDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``HeartRateDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.HeartRateDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, column_sets=[HEART_RATE_DATAFRAME_COLUMNS])
        _assert_has_multiindex(data, expected=False)
        _assert_has_column_multiindex(data, expected=False)
        _assert_has_index_levels(data, ["time"])
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a HeartRateDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_r_peak_dataframe(data: EcgRawDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.RPeakDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``RPeakDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``RPeakDataFrame```
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``RPeakDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.RPeakDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[
                ["R_Peak_Idx", "RR_Interval"],
                ["R_Peak_Quality", "R_Peak_Idx", "RR_Interval"],
                R_PEAK_DATAFRAME_COLUMNS,
            ],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a RPeakDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_acc1d_dataframe(data: Acc3dDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.Acc1dDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``Acc1dDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``Acc1dDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``Acc1dDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.Acc1dDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[["acc"], ["acc_norm"]],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a Acc1dDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_acc3d_dataframe(data: Acc3dDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.Acc3dDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``Acc3dDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``Acc3dDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``Acc3dDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.Acc3dDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[
                ["acc_x", "acc_y", "acc_z"],
                [("acc", "x"), ("acc", "y"), ("acc", "z")],
            ],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a Acc3dDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_gyr1d_dataframe(data: Gyr3dDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.Gyr1dDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``Gyr1dDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``Gyr1dDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``Gyr1dDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.Gyr1dDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[["gyr"], ["gyr_norm"]],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a Gyr1dDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_gyr3d_dataframe(data: Gyr3dDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.Gyr3dDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``Gyr3dDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``Gyr3dDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``Gyr3dDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.Gyr3dDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[
                ["gyr_x", "gyr_y", "gyr_z"],
                [("gyr", "x"), ("gyr", "y"), ("gyr", "z")],
            ],
        )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a Gyr3dDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_imu_dataframe(data: Gyr3dDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.ImuDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``ImuDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``ImuDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``ImuDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.ImuDataFrame`
        dataframe format

    """
    try:
        is_acc3d_dataframe(data, raise_exception=True)
        is_gyr3d_dataframe(data, raise_exception=True)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a ImuDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_sleep_wake_dataframe(data: SleepWakeDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.SleepWakeDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``SleepWakeDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``SleepWakeDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``SleepWakeDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.SleepWakeDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, [["sleep_wake"]])
        if not all(data["sleep_wake"].between(0, 1, inclusive=True)):
            raise ValidationError(
                "Invalid values for sleep/wake prediction! Sleep/wake scores are expected to be in the interval [0, 1]."
            )
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a SleepWakeDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_heartbeat_segmentation_dataframe(
    data: HeartbeatSegmentationDataFrame, raise_exception: bool | None = True
) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.HeartbeatSegmentationDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``HeartbeatSegmentationDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``HeartbeatSegmentationDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``HeartbeatSegmentationDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.HeartbeatSegmentationDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, [["start_sample", "end_sample", "r_peak_sample"]])
        # assert that columns with "_sample" in the end are of type int
        _assert_sample_columns_int(data)
        _assert_has_index_levels(data, "heartbeat_id", match_atleast=True, match_order=False)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a HeartbeatSegmentationDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_c_point_dataframe(data: CPointDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.CPointDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``CPointDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``CPointDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``CPointDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.CPointDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, [["c_point_sample"], ["c_point_sample", "nan_reason"]])
        # assert that columns with "_sample" in the end are of type int
        _assert_sample_columns_int(data)
        _assert_has_index_levels(data, "heartbeat_id", match_atleast=True, match_order=False)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a CPointDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_b_point_dataframe(data: BPointDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.BPointDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``BPointDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``BPointDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``BPointDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.BPointDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, [["b_point_sample"], ["b_point_sample", "nan_reason"]])
        # assert that columns with "_sample" in the end are of type int
        _assert_sample_columns_int(data)
        _assert_has_index_levels(data, "heartbeat_id", match_atleast=True, match_order=False)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a BPointDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_q_peak_dataframe(data: QPeakDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.QPeakDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``QPeakDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``QPeakDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``QPeakDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.QPeakDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(data, [["q_peak_sample"], ["q_peak_sample", "nan_reason"]])
        # assert that columns with "_sample" in the end are of type int
        _assert_sample_columns_int(data)
        _assert_has_index_levels(data, "heartbeat_id", match_atleast=True, match_order=False)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a QPeakDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True


[docs]def is_pep_result_dataframe(data: PepResultDataFrame, raise_exception: bool | None = True) -> bool | None:
    """Check whether dataframe is a :obj:`~biopsykit.utils.dtypes.PepResultDataFrame`.

    Parameters
    ----------
    data : :class:`~pandas.DataFrame`
        data to check if it is a ``PepResultDataFrame``
    raise_exception : bool, optional
        whether to raise an exception or return a bool value

    Returns
    -------
    ``True`` if ``data`` is a ``PepResultDataFrame``
    ``False`` otherwise (if ``raise_exception`` is ``False``)

    Raises
    ------
    ValidationError
        if ``raise_exception`` is ``True`` and ``data`` is not a ``PepResultDataFrame``

    See Also
    --------
    :obj:`~biopsykit.utils.dtypes.PepResultDataFrame`
        dataframe format

    """
    try:
        _assert_is_dtype(data, pd.DataFrame)
        _assert_has_columns(
            data,
            column_sets=[
                PEP_RESULT_DATAFRAME_COLUMNS,
                [*PEP_RESULT_DATAFRAME_COLUMNS, "rr_interval_sample", "rr_interval_ms", "heart_rate_bpm", "nan_reason"],
            ],
        )
        _assert_sample_columns_int(data)
        _assert_has_index_levels(data, "heartbeat_id", match_atleast=True, match_order=False)
    except ValidationError as e:
        if raise_exception is True:
            raise ValidationError(
                "The passed object does not seem to be a PepResultDataFrame. "
                f"The validation failed with the following error:\n\n{e!s}"
            ) from e
        return False
    return True