Source code for biopsykit.signals.icg.event_extraction._c_point_scipy_findpeaks

import warnings

import numpy as np
import pandas as pd
from scipy import signal
from tpcp import Parameter

from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin
from biopsykit.signals.icg.event_extraction._base_c_point_extraction import BaseCPointExtraction
from biopsykit.utils.array_handling import sanitize_input_dataframe_1d
from biopsykit.utils.dtypes import (
    HeartbeatSegmentationDataFrame,
    IcgRawDataFrame,
    is_c_point_dataframe,
    is_heartbeat_segmentation_dataframe,
    is_icg_raw_dataframe,
)
from biopsykit.utils.exceptions import EventExtractionError

__all__ = ["CPointExtractionScipyFindPeaks"]


[docs]class CPointExtractionScipyFindPeaks(BaseCPointExtraction, CanHandleMissingEventsMixin):
    """C-point extraction algorithm using :func:`~scipy.signal.find_peaks`.

    This algorithm extracts C-points based on the maximum of the most prominent peak in the ICG derivative signal
    using the :func:`~scipy.signal.find_peaks` function.

    """

    # input parameters
    window_c_correction: Parameter[int]

    def __init__(
        self,
        window_c_correction: int = 3,
        handle_missing_events: HANDLE_MISSING_EVENTS = "warn",
    ):
        """Initialize new ``CPointExtractionScipyFindPeaks`` instance.

        Parameters
        ----------
        window_c_correction : int, optional
            Number of preceding heartbeats taken into account for C-point correction (using mean R-C-distance).
            Default: 3.
        handle_missing_events : one of {"warn", "raise", "ignore"}, optional
            How to handle failing event extraction. Must be one of:

            - ``"warn"``: issue a warning and set the event to NaN,
            - ``"raise"``: raise an ``EventExtractionError``, or
            - ``"ignore"``: continue silently.

            Default: ``"warn"``.

        """
        super().__init__(handle_missing_events=handle_missing_events)
        self.window_c_correction = window_c_correction

    # @make_action_safe
[docs]    def extract(
        self,
        *,
        icg: IcgRawDataFrame,
        heartbeats: HeartbeatSegmentationDataFrame,
        sampling_rate_hz: float | None,  # noqa: ARG002
    ):
        """Extract C-points from given cleaned ICG derivative signal using :func:`~scipy.signal.find_peaks`.

        The C-point is detected as the maximum of the most prominent peak in the ICG derivative signal within each
        segmented heartbeat.

        The resulting C-points are saved in the ``points_`` attribute of the class instance.

        Parameters
        ----------
        icg : :class:`~pandas.DataFrame`
            cleaned ICG derivative signal
        heartbeats : :class:`~pandas.DataFrame`
            Dataframe containing one row per segmented heartbeat, each row contains start, end, and R-peak.
            Result from :class:`~biopsykit.signals.ecg.segmentation.HeartbeatSegmentationNeurokit`.
        sampling_rate_hz : int
            Sampling rate of ICG derivative signal in Hz. Not used in this function.

        Returns
        -------
        self

        """
        self._check_valid_missing_handling()
        is_icg_raw_dataframe(icg)
        is_heartbeat_segmentation_dataframe(heartbeats)
        icg = sanitize_input_dataframe_1d(icg, column="icg_der")
        icg = icg.squeeze()

        # result df
        c_points = pd.DataFrame(index=heartbeats.index, columns=["c_point_sample", "nan_reason"])

        # distance of R-peak to C-point, averaged over as many preceding heartbeats as window_c_correction specifies
        # R-C-distances are positive when C-point occurs after R-Peak (which is the physiologically correct order)
        mean_prev_r_c_distance = np.nan

        # saves R-C-distances of previous heartbeats
        prev_r_c_distances = []

        # used subsequently to store heartbeats for which no C-point could be detected
        heartbeats_no_c = []

        # search C-point for each heartbeat of the given signal
        for idx, data in heartbeats.iterrows():
            # slice signal for current heartbeat
            heartbeat_start = data["start_sample"]
            heartbeat_end = data["end_sample"]

            heartbeat_icg_der = icg.iloc[heartbeat_start:heartbeat_end].squeeze()

            # calculate R-peak position relative to start of current heartbeat
            heartbeat_r_peak = data["r_peak_sample"] - heartbeat_start

            # detect possible C-point candidates, prominence=1 gives reasonable Cs
            # (prominence=2 results in a considerable amount of heartbeats with no C)
            # (prominence=1 might detect more than one C in one heartbeat, but that will be corrected subsequently)
            heartbeat_c_candidates = signal.find_peaks(heartbeat_icg_der, prominence=1)[0]

            if len(heartbeat_c_candidates) < 1:
                heartbeats_no_c.append(idx)
                c_points.loc[idx, "c_point_sample"] = np.nan
                continue

            # calculates distance of R-peak to all C-candidates in samples, positive when C occurs after R
            r_c_distance = heartbeat_c_candidates - heartbeat_r_peak

            if len(heartbeat_c_candidates) == 1:
                selected_c = heartbeat_c_candidates[0]  # convert to int (instead of array)
                r_c_distance = r_c_distance[0]

                # C-point before R-peak is invalid
                if r_c_distance < 0:
                    heartbeats_no_c.append(idx)
                    c_points.loc[idx, "c_point_sample"] = np.nan
                    continue
            else:
                # take averaged R-C-distance over the 'window_c_correction' (default: 3) preceding heartbeats
                # calculate the absolute difference of R-C-distances for all C-candidates to this mean
                # (to check which of the C-candidates are most probably the wrongly detected Cs)
                distance_diff = np.abs(r_c_distance - mean_prev_r_c_distance)

                # choose the C-candidate with the smallest absolute difference in R-C-distance
                # (the one, where R-C-distance changed the least compared to previous heartbeats)
                c_idx = np.argmin(distance_diff)
                selected_c = heartbeat_c_candidates[c_idx]
                r_c_distance = r_c_distance[c_idx]  # save only R-C-distance for selected C

            # update R-C-distances and mean for next heartbeat
            prev_r_c_distances.append(r_c_distance)
            if len(prev_r_c_distances) > self.window_c_correction:
                prev_r_c_distances.pop(0)
            mean_prev_r_c_distance = np.mean(prev_r_c_distances)

            # save C-point (and C-candidates) to result property
            c_points.loc[idx, "c_point_sample"] = (
                selected_c + heartbeat_start
            )  # get C-point relative to complete signal

        if len(heartbeats_no_c) > 0:
            c_points.loc[heartbeats_no_c, "nan_reason"] = "no_c_detected"
            missing_str = f"No valid C-point detected in {len(heartbeats_no_c)} heartbeats ({heartbeats_no_c})"
            if self.handle_missing_events == "warn":
                warnings.warn(missing_str)
            elif self.handle_missing_events == "raise":
                raise EventExtractionError(missing_str)

        c_points = c_points.astype({"c_point_sample": "Int64", "nan_reason": "object"})
        is_c_point_dataframe(c_points)

        self.points_ = c_points
        return self