Source code for skferm.datasets.rheolaser
from importlib.resources import files
from typing import Optional
import pandas as pd
[docs]
def clean_rheolaser(df: pd.DataFrame, cutoff: Optional[int] = None) -> pd.DataFrame:
"""
Function which transforms the raw rheolaser format to a nice long format.
"""
df = df.dropna(axis=1, how="all")
# grab unique ids
ids = [s.replace(" - Elasticity Index", "") for s in df.columns.tolist()[0::2]]
# transform to long format
seperate_frames = []
n_samples = len(ids)
for i in range(0, n_samples):
sub_df = df.iloc[:, i * 2 : i * 2 + 2].copy()
sub_df.columns = ["time", "elasticity_index"]
sub_df = sub_df.assign(sample_id=ids[i]).dropna()
seperate_frames.append(sub_df)
result_df = (
pd.concat(seperate_frames, axis=0)
.reset_index(drop=True)
.assign(time=lambda d: d["time"] / 60)
.assign(elasticity_index=lambda d: d["elasticity_index"] * 1000)
.loc[:, ["sample_id", "time", "elasticity_index"]]
.sort_values(["sample_id", "time"])
.reset_index(drop=True)
)
if cutoff:
result_df = result_df.loc[lambda d: d["time"] <= cutoff]
return result_df
[docs]
def load_rheolaser_data(clean: bool = True, cutoff: Optional[int] = None) -> pd.DataFrame:
"""
Load the Rheolaser dataset from the package resources. This is the exact
format you get from a Rheolaser machine. Use `clean_rheolaser` to transform it
into a long format DataFrame.
Returns:
pd.DataFrame: DataFrame containing the Rheolaser dataset.
"""
rheolaser_path = files("skferm.data").joinpath("rheolaser_export.csv.gz")
with rheolaser_path.open("rb") as f:
df = pd.read_csv(f, compression="gzip")
if clean:
df = clean_rheolaser(df, cutoff=cutoff)
return df