Source code for skferm.datasets.rheolaser

from importlib.resources import files
from typing import Optional

import pandas as pd



[docs]
def clean_rheolaser(df: pd.DataFrame, cutoff: Optional[int] = None) -> pd.DataFrame:
    """
    Function which transforms the raw rheolaser format to a nice long format.
    """
    df = df.dropna(axis=1, how="all")

    # grab unique ids
    ids = [s.replace(" - Elasticity Index", "") for s in df.columns.tolist()[0::2]]

    # transform to long format
    seperate_frames = []
    n_samples = len(ids)

    for i in range(0, n_samples):
        sub_df = df.iloc[:, i * 2 : i * 2 + 2].copy()
        sub_df.columns = ["time", "elasticity_index"]
        sub_df = sub_df.assign(sample_id=ids[i]).dropna()
        seperate_frames.append(sub_df)

    result_df = (
        pd.concat(seperate_frames, axis=0)
        .reset_index(drop=True)
        .assign(time=lambda d: d["time"] / 60)
        .assign(elasticity_index=lambda d: d["elasticity_index"] * 1000)
        .loc[:, ["sample_id", "time", "elasticity_index"]]
        .sort_values(["sample_id", "time"])
        .reset_index(drop=True)
    )

    if cutoff:
        result_df = result_df.loc[lambda d: d["time"] <= cutoff]

    return result_df




[docs]
def load_rheolaser_data(clean: bool = True, cutoff: Optional[int] = None) -> pd.DataFrame:
    """
    Load the Rheolaser dataset from the package resources. This is the exact
    format you get from a Rheolaser machine. Use `clean_rheolaser` to transform it
    into a long format DataFrame.

    Returns:
        pd.DataFrame: DataFrame containing the Rheolaser dataset.
    """
    rheolaser_path = files("skferm.data").joinpath("rheolaser_export.csv.gz")
    with rheolaser_path.open("rb") as f:
        df = pd.read_csv(f, compression="gzip")

    if clean:
        df = clean_rheolaser(df, cutoff=cutoff)

    return df