Source code for biopsykit.signals.icg.event_extraction._c_point_scipy_findpeaks
import warnings
import numpy as np
import pandas as pd
from scipy import signal
from tpcp import Parameter
from biopsykit.signals._base_extraction import HANDLE_MISSING_EVENTS, CanHandleMissingEventsMixin
from biopsykit.signals.icg.event_extraction._base_c_point_extraction import BaseCPointExtraction
from biopsykit.utils.array_handling import sanitize_input_dataframe_1d
from biopsykit.utils.dtypes import (
HeartbeatSegmentationDataFrame,
IcgRawDataFrame,
is_c_point_dataframe,
is_heartbeat_segmentation_dataframe,
is_icg_raw_dataframe,
)
from biopsykit.utils.exceptions import EventExtractionError
__all__ = ["CPointExtractionScipyFindPeaks"]
[docs]class CPointExtractionScipyFindPeaks(BaseCPointExtraction, CanHandleMissingEventsMixin):
"""C-point extraction algorithm using :func:`~scipy.signal.find_peaks`.
This algorithm extracts C-points based on the maximum of the most prominent peak in the ICG derivative signal
using the :func:`~scipy.signal.find_peaks` function.
"""
# input parameters
window_c_correction: Parameter[int]
def __init__(
self,
window_c_correction: int = 3,
handle_missing_events: HANDLE_MISSING_EVENTS = "warn",
):
"""Initialize new ``CPointExtractionScipyFindPeaks`` instance.
Parameters
----------
window_c_correction : int, optional
Number of preceding heartbeats taken into account for C-point correction (using mean R-C-distance).
Default: 3.
handle_missing_events : one of {"warn", "raise", "ignore"}, optional
How to handle failing event extraction. Must be one of:
- ``"warn"``: issue a warning and set the event to NaN,
- ``"raise"``: raise an ``EventExtractionError``, or
- ``"ignore"``: continue silently.
Default: ``"warn"``.
"""
super().__init__(handle_missing_events=handle_missing_events)
self.window_c_correction = window_c_correction
# @make_action_safe
[docs] def extract(
self,
*,
icg: IcgRawDataFrame,
heartbeats: HeartbeatSegmentationDataFrame,
sampling_rate_hz: float | None, # noqa: ARG002
):
"""Extract C-points from given cleaned ICG derivative signal using :func:`~scipy.signal.find_peaks`.
The C-point is detected as the maximum of the most prominent peak in the ICG derivative signal within each
segmented heartbeat.
The resulting C-points are saved in the ``points_`` attribute of the class instance.
Parameters
----------
icg : :class:`~pandas.DataFrame`
cleaned ICG derivative signal
heartbeats : :class:`~pandas.DataFrame`
Dataframe containing one row per segmented heartbeat, each row contains start, end, and R-peak.
Result from :class:`~biopsykit.signals.ecg.segmentation.HeartbeatSegmentationNeurokit`.
sampling_rate_hz : int
Sampling rate of ICG derivative signal in Hz. Not used in this function.
Returns
-------
self
"""
self._check_valid_missing_handling()
is_icg_raw_dataframe(icg)
is_heartbeat_segmentation_dataframe(heartbeats)
icg = sanitize_input_dataframe_1d(icg, column="icg_der")
icg = icg.squeeze()
# result df
c_points = pd.DataFrame(index=heartbeats.index, columns=["c_point_sample", "nan_reason"])
# distance of R-peak to C-point, averaged over as many preceding heartbeats as window_c_correction specifies
# R-C-distances are positive when C-point occurs after R-Peak (which is the physiologically correct order)
mean_prev_r_c_distance = np.nan
# saves R-C-distances of previous heartbeats
prev_r_c_distances = []
# used subsequently to store heartbeats for which no C-point could be detected
heartbeats_no_c = []
# search C-point for each heartbeat of the given signal
for idx, data in heartbeats.iterrows():
# slice signal for current heartbeat
heartbeat_start = data["start_sample"]
heartbeat_end = data["end_sample"]
heartbeat_icg_der = icg.iloc[heartbeat_start:heartbeat_end].squeeze()
# calculate R-peak position relative to start of current heartbeat
heartbeat_r_peak = data["r_peak_sample"] - heartbeat_start
# detect possible C-point candidates, prominence=1 gives reasonable Cs
# (prominence=2 results in a considerable amount of heartbeats with no C)
# (prominence=1 might detect more than one C in one heartbeat, but that will be corrected subsequently)
heartbeat_c_candidates = signal.find_peaks(heartbeat_icg_der, prominence=1)[0]
if len(heartbeat_c_candidates) < 1:
heartbeats_no_c.append(idx)
c_points.loc[idx, "c_point_sample"] = np.nan
continue
# calculates distance of R-peak to all C-candidates in samples, positive when C occurs after R
r_c_distance = heartbeat_c_candidates - heartbeat_r_peak
if len(heartbeat_c_candidates) == 1:
selected_c = heartbeat_c_candidates[0] # convert to int (instead of array)
r_c_distance = r_c_distance[0]
# C-point before R-peak is invalid
if r_c_distance < 0:
heartbeats_no_c.append(idx)
c_points.loc[idx, "c_point_sample"] = np.nan
continue
else:
# take averaged R-C-distance over the 'window_c_correction' (default: 3) preceding heartbeats
# calculate the absolute difference of R-C-distances for all C-candidates to this mean
# (to check which of the C-candidates are most probably the wrongly detected Cs)
distance_diff = np.abs(r_c_distance - mean_prev_r_c_distance)
# choose the C-candidate with the smallest absolute difference in R-C-distance
# (the one, where R-C-distance changed the least compared to previous heartbeats)
c_idx = np.argmin(distance_diff)
selected_c = heartbeat_c_candidates[c_idx]
r_c_distance = r_c_distance[c_idx] # save only R-C-distance for selected C
# update R-C-distances and mean for next heartbeat
prev_r_c_distances.append(r_c_distance)
if len(prev_r_c_distances) > self.window_c_correction:
prev_r_c_distances.pop(0)
mean_prev_r_c_distance = np.mean(prev_r_c_distances)
# save C-point (and C-candidates) to result property
c_points.loc[idx, "c_point_sample"] = (
selected_c + heartbeat_start
) # get C-point relative to complete signal
if len(heartbeats_no_c) > 0:
c_points.loc[heartbeats_no_c, "nan_reason"] = "no_c_detected"
missing_str = f"No valid C-point detected in {len(heartbeats_no_c)} heartbeats ({heartbeats_no_c})"
if self.handle_missing_events == "warn":
warnings.warn(missing_str)
elif self.handle_missing_events == "raise":
raise EventExtractionError(missing_str)
c_points = c_points.astype({"c_point_sample": "Int64", "nan_reason": "object"})
is_c_point_dataframe(c_points)
self.points_ = c_points
return self