Coverage for sleapyfaces/io.py: 47%

106 statements  

« prev     ^ index     » next       coverage.py v7.0.2, created at 2023-01-03 12:07 -0800

1from dataclasses import dataclass 

2from os import PathLike 

3import pandas as pd 

4import numpy as np 

5from io import FileIO 

6from sleapyfaces.utils import ( 

7 fill_missing, 

8 json_dumps, 

9 save_dict_to_hdf5, 

10 save_dt_to_hdf5, 

11 tracks_deconstructor, 

12) 

13import json 

14import ffmpeg 

15import h5py as h5 

16 

17 

18@dataclass(slots=True) 

19class DAQData: 

20 """ 

21 Summary: 

22 Cache for DAQ data. 

23 

24 Attrs: 

25 path (Text or PathLike[Text]): Path to the directory containing the DAQ data. 

26 cache (pd.DataFrame): Pandas DataFrame containing the DAQ data. 

27 columns (List): List of column names in the cache. 

28 

29 Methods: 

30 append: Append a column to the cache. 

31 save_data: Save the cache to a csv file. 

32 """ 

33 

34 path: str | PathLike[str] 

35 cache: pd.DataFrame 

36 columns: list 

37 

38 def __init__(self, path: str | PathLike[str]): 

39 self.path = path 

40 self.cache = pd.read_csv(self.path) 

41 self.columns = self.cache.columns.to_list()[1:] 

42 

43 def append(self, name: str, value: list) -> None: 

44 """takes in a list with a name and appends it to the cache as a column 

45 

46 Args: 

47 name (str): The column name. 

48 value (list): The column data. 

49 

50 Raises: 

51 ValueError: If the length of the list does not match the length of the cached data. 

52 """ 

53 if len(list) == len(self.cache.iloc[:, 0]): 

54 self.cache = pd.concat( 

55 [self.cache, pd.DataFrame(value, columns=[name])], axis=1 

56 ) 

57 elif len(list) == len(self.cache.iloc[0, :]): 

58 self.cache.columns = value 

59 else: 

60 raise ValueError("Length of list does not match length of cached data.") 

61 

62 def saveData(self, filename: str | PathLike[str] | FileIO) -> None: 

63 """saves the cached data to a csv file 

64 

65 Args: 

66 filename (Text | PathLike[Text] | BufferedWriter): the name of the file to save the data to 

67 """ 

68 if ( 

69 filename.endswith(".csv") 

70 or filename.endswith(".CSV") 

71 or isinstance(filename, FileIO) 

72 ): 

73 self.cache.to_csv(filename, index=True) 

74 else: 

75 self.cache.to_csv(f"{filename}.csv", index=True) 

76 

77 

78@dataclass(slots=True) 

79class SLEAPanalysis: 

80 """ 

81 Summary: 

82 a class for reading and storing SLEAP analysis files 

83 

84 Args: 

85 path (Text | PathLike[Text]): path to the directory containing the SLEAP analysis file 

86 

87 Attributes: 

88 data (Dict): dictionary of all the data from the SLEAP analysis file 

89 track_names (List): list of the track names from the SLEAP analysis file 

90 tracks (pd.DataFrame): a pandas DataFrame containing the tracks from the SLEAP analysis file 

91 (with missing frames filled in using a linear interpolation method) 

92 

93 Methods: 

94 getDatasets: gets the datasets from the SLEAP analysis file 

95 getTracks: gets the tracks from the SLEAP analysis file 

96 getTrackNames: gets the track names from the SLEAP analysis file 

97 append: appends a column to the tracks DataFrame 

98 saveData: saves the data to a json file 

99 """ 

100 

101 path: str | PathLike[str] 

102 data: dict[str, np.ndarray | pd.DataFrame | list] 

103 tracks: pd.DataFrame 

104 track_names: list 

105 

106 def __init__(self, path: str | PathLike[str]): 

107 self.path = path 

108 self.getDatasets() 

109 self.getTracks() 

110 self.getTrackNames() 

111 

112 def getDatasets( 

113 self, 

114 ) -> None: 

115 """gets the datasets from the SLEAP analysis file 

116 

117 Initializes Attributes: 

118 data (Dict): dictionary of all the data from the SLEAP analysis file 

119 """ 

120 self.data = {} 

121 with h5.File(f"{self.path}", "r") as f: 

122 datasets = list(f.keys()) 

123 for dataset in datasets: 

124 if len(f[dataset].shape) == 0: 

125 continue 

126 elif dataset == "tracks": 

127 self.data[dataset] = fill_missing(f[dataset][:].T) 

128 elif "name" in dataset: 

129 self.data[dataset] = [n.decode() for n in f[dataset][:].flatten()] 

130 else: 

131 self.data[dataset] = f[dataset][:].T 

132 

133 def getTracks(self) -> None: 

134 """gets the tracks from the SLEAP analysis file 

135 

136 Initializes Attributes: 

137 tracks (pd.DataFrame): a pandas DataFrame containing the tracks from the SLEAP analysis file 

138 (with missing frames filled in using a linear interpolation method) 

139 """ 

140 if len(self.data.values()) == 0: 

141 raise ValueError("No data has been loaded.") 

142 else: 

143 self.tracks = tracks_deconstructor( 

144 self.data["tracks"], self.data["node_names"] 

145 ) 

146 

147 def getTrackNames(self) -> None: 

148 """gets the track names from the SLEAP analysis file 

149 

150 Initializes Attributes: 

151 track_names (List): list of the track names from the SLEAP analysis file 

152 """ 

153 self.track_names = [0] * (len(self.data["node_names"]) * 2) 

154 for name, i in zip( 

155 self.data["node_names"], range(0, (len(self.data["node_names"]) * 2), 2) 

156 ): 

157 self.track_names[i] = f"{name.replace(' ', '_')}_x" 

158 self.track_names[i + 1] = f"{name.replace(' ', '_')}_y" 

159 

160 def append(self, item: pd.Series | pd.DataFrame) -> None: 

161 """Appends a column to the tracks DataFrame 

162 

163 Args: 

164 item (pd.Series | pd.DataFrame): The column to append to the tracks DataFrame 

165 

166 Raises: 

167 ValueError: if the length of the column does not match the length of the tracks data columns 

168 (i.e. if the column is not the same length as the number of frames) 

169 

170 Updates Attributes: 

171 tracks (pd.DataFrame): a pandas DataFrame containing the tracks from the SLEAP analysis file 

172 """ 

173 if len(item.index) == len(self.tracks.index): 

174 self.tracks = pd.concat([self.tracks, item], axis=1) 

175 else: 

176 raise ValueError("Length of list does not match length of cached data.") 

177 

178 def saveData(self, filename: str | PathLike[str], path="SLEAP") -> None: 

179 """saves the SLEAP analysis data to an HDF5 file 

180 

181 Args: 

182 filename (Text | PathLike[Text]): the name of the file to save the data to 

183 path (str, optional): the internal HDF5 path to save the data to. Defaults to "SLEAP". 

184 """ 

185 if filename.endswith(".h5") or filename.endswith(".hdf5"): 

186 with h5.File(filename) as f: 

187 save_dict_to_hdf5(f, path, self.datasets) 

188 with pd.HDFStore(filename, mode="a") as store: 

189 save_dt_to_hdf5(store, self.tracks, f"{path}/tracks") 

190 

191 

192@dataclass(slots=True) 

193class BehMetadata: 

194 """ 

195 Summary: 

196 Cache for JSON data. 

197 

198 Args: 

199 path (str of PathLike[str]): Path to the file containing the JSON data. 

200 MetaDataKey (str, optional): Key for the metadata in the JSON data. Defaults to "beh_metadata" based on bruker_control. 

201 TrialArrayKey (str, optional): Key for the trial array in the JSON data. Defaults to "trialArray" based on bruker_control. 

202 ITIArrayKey (str, optional): Key for the ITI array in the JSON data. Defaults to "ITIArray" based on bruker_control. 

203 

204 Bruker Control Repository: 

205 Link: https://github.com/Tyelab/bruker_control 

206 Author: Jeremy Delahanty 

207 

208 Attributes: 

209 cache (pd.DataFrame): Pandas DataFrame containing the JSON data. 

210 columns (List): List of column names in the cache. 

211 

212 Methods: 

213 saveData: saves the data to a json file""" 

214 

215 path: str | PathLike[str] 

216 MetaDataKey: str 

217 TrialArrayKey: str 

218 ITIArrayKey: str 

219 cache: pd.DataFrame 

220 columns: list[str] 

221 

222 def __init__( 

223 self, 

224 path: str | PathLike[str], 

225 MetaDataKey="beh_metadata", 

226 TrialArrayKey="trialArray", 

227 ITIArrayKey="ITIArray", 

228 ): 

229 self.path = path 

230 self.MetaDataKey = MetaDataKey 

231 self.TrialArrayKey = TrialArrayKey 

232 self.ITIArrayKey = ITIArrayKey 

233 

234 with open(self.path, "r") as json_file: 

235 json_file = json.load(json_file) 

236 trialArray = json_file.get(self.MetaDataKey)[self.TrialArrayKey] 

237 ITIArray = json_file.get(self.MetaDataKey)[self.ITIArrayKey] 

238 self.cache = pd.DataFrame( 

239 {self.TrialArrayKey: trialArray, self.ITIArrayKey: ITIArray}, 

240 columns=[self.TrialArrayKey, self.ITIArrayKey], 

241 ) 

242 self.columns = self.cache.columns.to_list() 

243 

244 def saveData(self, filename: str | PathLike[str] | FileIO) -> None: 

245 """Saves the DAQ data to a csv file. 

246 

247 Args: 

248 filename (str | PathLike[str] | FileIO): The name and path of the file to save the data to. 

249 """ 

250 if ( 

251 filename.endswith(".csv") 

252 or filename.endswith(".CSV") 

253 or isinstance(filename, FileIO) 

254 ): 

255 self.cache.to_csv(filename, index=True) 

256 else: 

257 self.cache.to_csv(f"{filename}.csv", index=True) 

258 

259 

260@dataclass(slots=True) 

261class VideoMetadata: 

262 """ 

263 Summary: 

264 class for caching the video metadata. 

265 

266 Args: 

267 path (str of PathLike[str]): Path to the directory containing the video data. 

268 

269 Attributes: 

270 cache (dict): Dictionary containing the video metadata from ffmpeg. 

271 fps (float): Frames per second of the video data. 

272 

273 Methods: 

274 saveData: saves the data to a json file 

275 """ 

276 

277 path: str | PathLike[str] 

278 cache: dict 

279 fps: float 

280 

281 def __init__(self, path: str | PathLike[str]): 

282 self.path = path 

283 self.cache = ffmpeg.probe(f"{self.path}")["streams"][ 

284 (int(ffmpeg.probe(f"{self.path}")["format"]["nb_streams"]) - 1) 

285 ] 

286 self.fps = float(eval(self.cache.get("avg_frame_rate"))) 

287 

288 def saveData(self, filename: str | PathLike[str] | FileIO) -> None: 

289 """Saves the video metadata to a json file. 

290 

291 Args: 

292 filename (str | PathLike[str] | FileIO): the name and path of the file to save the data to. 

293 """ 

294 if ( 

295 filename.endswith(".json") 

296 or filename.endswith(".JSON") 

297 or isinstance(filename, FileIO) 

298 ): 

299 json_dumps(self.cache, filename) 

300 else: 

301 json_dumps(self.cache, f"{filename}.csv")