Coverage for sleapyfaces/normalize.py: 48%

25 statements  

« prev     ^ index     » next       coverage.py v7.0.2, created at 2023-01-03 12:07 -0800

1import pandas as pd 

2from sklearn.decomposition import PCA 

3 

4 

5def mean_center(data: pd.DataFrame, track_names: list[str]) -> pd.DataFrame: 

6 """Mean center the data. 

7 

8 Args: 

9 data (pd.DataFrame): The data to be mean centered. 

10 track_names (list[str]): The names of the tracks to mean center. 

11 

12 Returns: 

13 pd.DataFrame: The mean centered data.""" 

14 num_data = data.loc[:, track_names] 

15 num_data = num_data - num_data.mean() 

16 data.loc[:, track_names] = num_data 

17 return data 

18 

19 

20def z_score(data: pd.DataFrame, track_names: list[str]) -> pd.DataFrame: 

21 """z-score the data. 

22 

23 Args: 

24 data (pd.DataFrame): The data to be z-scored. 

25 track_names (list[str]): The names of the tracks to be z-scored. 

26 

27 Returns: 

28 pd.DataFrame: The z-scored data.""" 

29 data = mean_center(data, track_names) 

30 for track in track_names: 

31 data.loc[:, track] = data.loc[:, track] / data.loc[:, track].std() 

32 return data 

33 

34 

35def pca(data: pd.DataFrame, track_names: list[str]) -> dict[str, pd.DataFrame]: 

36 """Runs 2D and 3D PCA dimensionality reduction on the data. 

37 

38 Args: 

39 data (pd.DataFrame): The data to be reduced. 

40 track_names (list[str]): The names of the tracks to be reduced. 

41 

42 Returns: 

43 dict[str, pd.DataFrame]: The reduced data with keys "pca2d" and "pca3d".""" 

44 num_data = data.loc[:, track_names] 

45 qual_data = data.drop(columns=track_names) 

46 pcas = {} 

47 

48 pca2d = PCA(n_components=2) 

49 pca3d = PCA(n_components=3) 

50 

51 num_data_2d = pca2d.fit_transform(num_data) 

52 num_data_3d = pca3d.fit_transform(num_data) 

53 

54 num_data_2d = pd.DataFrame( 

55 num_data_2d, columns=["principal component 1", "principal component 2"] 

56 ) 

57 num_data_3d = pd.DataFrame( 

58 num_data_3d, 

59 columns=[ 

60 "principal component 1", 

61 "principal component 2", 

62 "principal component 3", 

63 ], 

64 ) 

65 

66 pcas["pca2d"] = pd.concat([qual_data.reset_index(), num_data_2d], axis=1) 

67 pcas["pca3d"] = pd.concat([qual_data.reset_index(), num_data_3d], axis=1) 

68 

69 return pcas