Coverage for rt.py : 29%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from pathlib import Path
2from typing import Union, List
4from tqdm.auto import tqdm
5import pandas as pd
6from elfragmentador import constants
7from elfragmentador.evaluate import polyfit
10def calculate_file_iRT(file: Union[Path, str]) -> pd.DataFrame:
11 df = pd.read_csv(str(file))
12 df["RT"] = df["Min Start Time"] + df["Max End Time"]
13 fits = {}
14 for g, sub_df in df.groupby("File Name"):
15 irt_sub_df = sub_df[
16 [x in constants.IRT_PEPTIDES for x in sub_df["Peptide Modified Sequence"]]
17 ].copy()
18 if len(irt_sub_df) < 4:
19 continue
21 irt_sub_df["iRT"] = [
22 constants.IRT_PEPTIDES[x]["irt"]
23 for x in irt_sub_df["Peptide Modified Sequence"]
24 ]
25 fit = polyfit(irt_sub_df["RT"], irt_sub_df["iRT"])
26 fits.update({g: fit})
28 pred_irt = (
29 lambda rt, poly: None
30 if poly is None
31 else rt * poly["polynomial"][0] + poly["polynomial"][1]
32 )
33 df["Calculated iRT"] = [
34 pred_irt(y, fits.get(x, None)) for x, y in zip(df["File Name"], df["RT"])
35 ]
36 return df.dropna().copy().reindex()
39def calculate_multifile_iRT(filelist: List[Union[str, Path]]):
40 out_dfs = (calculate_file_iRT(x) for x in tqdm(filelist))
42 out_df = pd.concat(out_dfs)
43 group_cols = [x for x in list(out_df) if "Sequence" in x]
44 gdf = (
45 out_df.groupby(group_cols)
46 .aggregate({"Calculated iRT": ["mean", "std", "count"]})
47 .fillna(0)
48 )
49 gdf.columns = [" ".join(col) for col in gdf.columns.values]
50 gdf.sort_values("Calculated iRT std")
51 return gdf