csi_images.csi_events
Contains the Event class, which represents a single event in a scan. The Event class optionally holds metadata and features. Lists of events with similar metadata or features can be combined into DataFrames for analysis.
The Event class holds the position of the event in the frame, which can be converted to the position in the scanner or slide coordinate positions. See the csi_utils.csi_scans documentation page for more information on the coordinate systems.
1""" 2Contains the Event class, which represents a single event in a scan. 3The Event class optionally holds metadata and features. Lists of events with 4similar metadata or features can be combined into DataFrames for analysis. 5 6The Event class holds the position of the event in the frame, which can be converted 7to the position in the scanner or slide coordinate positions. See the 8csi_utils.csi_scans documentation page for more information on the coordinate systems. 9""" 10 11import os 12import glob 13import math 14import warnings 15from typing import Self, Iterable, Hashable, Sequence 16 17import numpy as np 18import pandas as pd 19 20from .csi_scans import Scan 21from .csi_tiles import Tile 22from .csi_frames import Frame 23 24# Optional dependencies; will raise errors in particular functions if not installed 25try: 26 from . import csi_images 27except ImportError: 28 csi_images = None 29try: 30 import imageio.v3 as imageio 31except ImportError: 32 imageio = None 33try: 34 import pyreadr 35except ImportError: 36 pyreadr = None 37 38 39class Event: 40 """ 41 A class that represents a single event in a scan, making it easy to evaluate 42 singular events. Required metadata is exposed as attributes, and optional 43 metadata and features are stored as DataFrames. 44 """ 45 46 SCAN_TO_SLIDE_TRANSFORM = { 47 # Axioscan zero is in the top-right corner instead of top-left 48 Scan.Type.AXIOSCAN7: np.array( 49 [ 50 [1, 0, 75000], 51 [0, 1, 0], 52 [0, 0, 1], 53 ] 54 ), 55 # BZScanner coordinates are a special kind of messed up: 56 # - The slide is upside-down. 57 # - The slide is oriented vertically, with the barcode at the bottom. 58 # - Tiles are numbered from the top-right 59 Scan.Type.BZSCANNER: np.array( 60 [ 61 [0, -1, 75000], 62 [-1, 0, 25000], 63 [0, 0, 1], 64 ] 65 ), 66 } 67 """ 68 Homogeneous transformation matrices for converting between scanner and slide 69 coordinates. The matrices are 3x3, with the final column representing the 70 translation in micrometers (um). For more information, see 71 [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations). 72 73 Transformations are nominal, and accuracy is not guaranteed; this is due to 74 imperfections in slides and alignment in the scanners. Units are in micrometers. 75 """ 76 77 def __init__( 78 self, 79 tile: Tile, 80 x: int, 81 y: int, 82 metadata: pd.Series = None, 83 features: pd.Series = None, 84 ): 85 self.tile = tile 86 self.x = int(x) 87 self.y = int(y) 88 self.metadata = metadata 89 self.features = features 90 91 def __repr__(self) -> str: 92 return f"{self.tile}-{self.x}-{self.y}" 93 94 def __eq__(self, other) -> bool: 95 return self.__repr__() == other.__repr__() 96 97 def __lt__(self, other): 98 return self.__repr__() < other.__repr__() 99 100 def get_scan_position(self) -> tuple[float, float]: 101 """ 102 Get the position of the event in the scanner's coordinate frame. 103 :return: the scan position of the event in micrometers (um). 104 """ 105 # Get overall pixel position 106 real_tile_height, real_tile_width = self.tile.scan.get_image_size() 107 pixel_x = self.x + (real_tile_width * self.tile.x) 108 pixel_y = self.y + (real_tile_height * self.tile.y) 109 # Convert to micrometers 110 x_um = pixel_x * self.tile.scan.pixel_size_um 111 y_um = pixel_y * self.tile.scan.pixel_size_um 112 # Add the scan's origin in the scanner frame 113 x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um 114 y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um 115 return x_um, y_um 116 117 def get_slide_position(self) -> tuple[float, float]: 118 """ 119 Get the slide position of the event in micrometers (um). 120 :return: the slide position of the event. 121 """ 122 # Turn scan_position into a 3x1 vector 123 scan_position = self.get_scan_position() 124 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 125 126 # Multiply by the appropriate homogeneous matrix 127 if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value): 128 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7] 129 elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value): 130 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER] 131 else: 132 raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.") 133 slide_position = np.matmul(transform, scan_position) 134 return float(slide_position[0][0]), float(slide_position[1][0]) 135 136 def crop( 137 self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True 138 ) -> list[np.ndarray]: 139 """ 140 Crop the event from the provided frame images. Use if you have already gotten 141 frame images; useful for cropping multiple events from the same frame image. 142 :param images: the frame images. 143 :param crop_size: the square size of the image crop to get for this event. 144 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 145 :return: image_size x image_size crops of the event in the provided frames. If 146 the event is too close to the edge, the crop will be smaller and not centered. 147 """ 148 # Convert a crop size in micrometers to pixels 149 if not in_pixels: 150 crop_size = round(crop_size / self.tile.scan.pixel_size_um) 151 image_height, image_width = 0, 0 152 for image in images: 153 if image_height == 0 and image_width == 0: 154 image_height, image_width = image.shape 155 else: 156 if image_height != image.shape[0] or image_width != image.shape[1]: 157 raise ValueError("All images must be the same size") 158 if image_height == 0 or image_width == 0: 159 raise ValueError("No images provided") 160 161 # Find the crop bounds 162 bounds = [ 163 self.x - (crop_size // 2) + 1, 164 self.y - (crop_size // 2) + 1, 165 self.x + math.ceil(crop_size / 2) + 1, 166 self.y + math.ceil(crop_size / 2) + 1, 167 ] 168 # Determine how much the bounds violate the image size 169 displacements = [ 170 max(0, -bounds[0]), 171 max(0, -bounds[1]), 172 max(0, bounds[2] - image_width), 173 max(0, bounds[3] - image_height), 174 ] 175 # Cap off the bounds 176 bounds = [ 177 max(0, bounds[0]), 178 max(0, bounds[1]), 179 min(image_width, bounds[2]), 180 min(image_height, bounds[3]), 181 ] 182 183 # Crop the images 184 crops = [] 185 for image in images: 186 # Create a blank image of the right size 187 crop = np.zeros((crop_size, crop_size), dtype=image.dtype) 188 189 # Insert the cropped image into the blank image, leaving a black buffer 190 # around the edges if the crop would go beyond the original image bounds 191 crop[ 192 displacements[1] : crop_size - displacements[3], 193 displacements[0] : crop_size - displacements[2], 194 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 195 crops.append(crop) 196 return crops 197 198 def get_crops( 199 self, 200 crop_size: int = 100, 201 in_pixels: bool = True, 202 input_path: str = None, 203 channels: Iterable[int | str] = None, 204 apply_gain: bool | Iterable[bool] = True, 205 ) -> list[np.ndarray]: 206 """ 207 Gets the frame images for this event and then crops the event from the images. 208 Convenient for retrieving a single event's crops, but less efficient when 209 retrieving multiple events from the same tile as it will reread the images. 210 :param crop_size: the square size of the image crop to get for this event. 211 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 212 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 213 :param channels: the channels to extract images for. Defaults to all channels. 214 :param apply_gain: whether to apply scanner-calculated gain to the images, if 215 not already applied. If a list, matches the channels. 216 :return: a list of cropped images from the scan in the order of the channels. 217 """ 218 # This function validates channels 219 frames = Frame.get_frames(self.tile, channels) 220 # Convert individual inputs to lists of appropriate length 221 if isinstance(apply_gain, bool): 222 apply_gain = [apply_gain] * len(frames) 223 images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)] 224 return self.crop(images, crop_size, in_pixels) 225 226 def save_crops( 227 self, 228 crops: Sequence[np.ndarray], 229 output_path: str, 230 labels: Sequence[str], 231 ext: str = "auto", 232 ): 233 """ 234 Save the crops to image files. 235 :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or 236 grayscale if 1 channel [h, w] or [h, w, 1]. 237 :param labels: the labels to append to the file name, usually the channel names 238 associated with each crop. 239 :param output_path: the folder to save the crops to. Will make if needed. 240 :param ext: the file extension to save the crops as. Defaults to "auto", which 241 will save as .tif for grayscale images and .jpg for RGB images. 242 :return: None 243 """ 244 if len(crops) != len(labels): 245 raise ValueError("Crops and labels must be the same length") 246 247 if csi_images is None or imageio is None: 248 raise ModuleNotFoundError( 249 "imageio libraries not installed! " 250 "run `pip install csi_images[imageio]` to resolve." 251 ) 252 253 os.makedirs(output_path, exist_ok=True) 254 255 for crop, label in zip(crops, labels): 256 if ext == "auto": 257 if len(crop.shape) == 2 or crop.shape[2] == 1: 258 file_extension = ".tif" 259 elif crop.shape[2] == 3: 260 file_extension = ".jpg" 261 else: 262 warnings.warn( 263 f"Image shape {crop.shape} not recognized; saving as .tif" 264 ) 265 file_extension = ".tif" 266 else: 267 file_extension = ext 268 file = os.path.join(output_path, f"{self}-{label}{file_extension}") 269 # TODO: add more file types here 270 if file_extension == ".tif": 271 imageio.imwrite(file, crop, compression="deflate") 272 elif file_extension in [".jpg", ".jpeg"]: 273 crop = csi_images.scale_bit_depth(crop, np.uint8) 274 imageio.imwrite(file, crop, quality=80) 275 else: 276 imageio.imwrite(file, crop) 277 278 def load_crops( 279 self, input_path: str, labels: list[str] = None 280 ) -> dict[str, np.ndarray]: 281 """ 282 Loads previously saved crop files from a folder. 283 :param input_path: folder containing crop files. 284 :param labels: optional label filter, will only return crops with these labels. 285 :return: a tuple of lists containing the crops and their labels. 286 """ 287 crops = {} 288 for file in glob.glob(os.path.join(input_path, f"{self}-*")): 289 label = os.path.splitext(os.path.basename(file))[0].split("-")[-1] 290 # Skip if we have labels to target 291 if labels is not None and label not in labels: 292 continue 293 crops[label] = imageio.imread(file) 294 return crops 295 296 def get_montage_channels( 297 self, 298 channels: Sequence[int | str] | None, 299 composites: dict[int | str, tuple[float, float, float]] | None, 300 ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]: 301 """ 302 Get the channel names for the montage from the event's tile. 303 :param channels: channel indices or names for grayscale channels 304 :param composites: dictionary of channel indices or names and RGB values 305 :return: (1) channel indices to retrieve, 306 (2) relative grayscale channel indices, and 307 (3) composite channel indices and RGB values. 308 """ 309 if channels is None: 310 channels = list(range(len(self.tile.scan.channels))) 311 if (len(channels) == 0) and (composites is None or len(composites) == 0): 312 raise ValueError("Must provide at least one channel type to montage") 313 314 channels_to_get = [] 315 316 # Build the list of channels to retrieve 317 if channels is not None: 318 if isinstance(channels[0], str): 319 channels = self.tile.scan.get_channel_indices(channels) 320 channels_to_get += channels 321 order = list(range(len(channels))) # Always the first n channels 322 else: 323 order = None 324 325 if composites is not None: 326 relative_composites = {} # Relative indices for retrieved channels 327 # Convert to scan indices 328 rgb_channels = list(composites.keys()) 329 if isinstance(rgb_channels[0], str): 330 rgb_channels = self.tile.scan.get_channel_indices(rgb_channels) 331 # Find the index or add to the end 332 for channel, rgb in zip(rgb_channels, composites.values()): 333 if channel not in channels_to_get: 334 channels_to_get.append(channel) 335 relative_composites[channel] = rgb 336 else: 337 relative_composites[channels_to_get.index(channel)] = rgb 338 else: 339 relative_composites = None 340 341 return channels_to_get, order, relative_composites 342 343 def get_montage( 344 self, 345 channels: Sequence[int | str] = None, 346 composites: dict[int | str, tuple[float, float, float]] = None, 347 mask: np.ndarray[np.uint8] = None, 348 labels: Sequence[str] = None, 349 crop_size: int = 100, 350 in_pixels: bool = True, 351 input_path: str = None, 352 apply_gain: bool = True, 353 **kwargs, 354 ) -> np.ndarray: 355 """ 356 Convenience function for getting frame images and creating a montage. Mirrors 357 csi_images.make_montage(). Convenient for a single event's montage, but less 358 efficient when for multiple events from the same tile. 359 :param channels: the channels to use for black-and-white montages. 360 :param composites: dictionary of indices and RGB tuples for a composite. 361 :param mask: a mask to apply to the montage. Must be the same size as the crop. 362 :param crop_size: the square size of the image crop to get for this event. 363 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 364 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 365 :param apply_gain: whether to apply scanner-calculated gain to the images, if 366 not already applied. If a list, matches the channels. 367 :param kwargs: montage options. See csi_images.make_montage() for more details. 368 :return: numpy array representing the montage. 369 """ 370 channels, order, composites = self.get_montage_channels(channels, composites) 371 images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain) 372 return csi_images.make_montage( 373 images, order, composites, mask, labels, **kwargs 374 ) 375 376 def save_montage( 377 self, 378 montage: np.ndarray, 379 output_path: str, 380 ocular_names: bool = False, 381 tag: str = "", 382 file_extension: str = ".jpeg", 383 **kwargs, 384 ): 385 """ 386 Save the montage as a JPEG image with a set name. 387 :param montage: the montage to save. 388 :param output_path: the folder to save the montage in. Will make if needed. 389 :param ocular_names: whether to use the OCULAR naming convention. 390 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 391 :param file_extension: the file extension to save the montage as. Defaults to .jpeg. 392 :param kwargs: additional arguments to pass to imageio.imwrite(). 393 :return: None 394 """ 395 if csi_images is None or imageio is None: 396 raise ModuleNotFoundError( 397 "imageio libraries not installed! " 398 "run `pip install csi_images[imageio]` to resolve." 399 ) 400 401 montage = csi_images.scale_bit_depth(montage, np.uint8) 402 403 if not file_extension.startswith("."): 404 file_extension = f".{file_extension}" 405 406 if ocular_names: 407 if "cell_id" not in self.metadata.index: 408 raise ValueError( 409 "Event metadata must include 'cell_id' for OCULAR naming." 410 ) 411 file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}" 412 else: 413 file = f"{self}{tag}{file_extension}" 414 415 os.makedirs(output_path, exist_ok=True) 416 imageio.imwrite(os.path.join(output_path, file), montage, **kwargs) 417 418 def load_montage(self, input_path: str, tag: str = "") -> np.ndarray: 419 """ 420 Loads the montage from a file saved by Event.save_montage. 421 :param input_path: the path to the folder where the montage was saved. 422 :param tag: a string to add to the file name, before the extension. 423 :return: 424 """ 425 file = f"{self}{tag}.jpeg" 426 return imageio.imread(os.path.join(input_path, file)) 427 428 @classmethod 429 def get_many_crops( 430 cls, 431 events: Sequence[Self], 432 crop_size: int | Sequence[int] = 100, 433 in_pixels: bool = True, 434 input_path: str | Sequence[str] = None, 435 channels: Sequence[int | str] = None, 436 apply_gain: bool | Sequence[bool] = True, 437 ) -> list[list[np.ndarray]]: 438 """ 439 Get the crops for a list of events, ensuring that there is no wasteful reading 440 of the same tile multiple times. This function is more efficient than calling 441 get_crops() for each event. 442 :param events: the events to get crops for. 443 :param crop_size: the square size of the image crop to get for this event. 444 Defaults to four times the size of the event. 445 :param in_pixels: whether the crop size is in pixels or micrometers. 446 Defaults to pixels, and is ignored if crop_size is None. 447 :param input_path: the path to the input images. Will only work for lists of events 448 from the same scan. Defaults to None (uses the scan's path). 449 :param channels: the channels to extract images for. Defaults to all channels. 450 :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True. 451 Can be supplied as a list to apply gain to individual channels. 452 :return: a list of lists of cropped images for each event. 453 """ 454 if len(events) == 0: 455 return [] 456 # Adapt singular inputs to lists of appropriate length 457 if isinstance(crop_size, int): 458 crop_size = [crop_size] * len(events) 459 if input_path is None or isinstance(input_path, str): 460 input_path = [input_path] * len(events) 461 462 # Get the order of the events when sorted by slide/tile 463 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 464 465 # Allocate the list to size 466 crops = [[]] * len(events) 467 last_tile = None 468 images = None # Holds large numpy arrays, so expensive to compare 469 # Iterate through in slide/tile sorted order 470 for i in order: 471 if last_tile != events[i].tile: 472 # Gather the frame images, preserving them for the next event 473 frames = Frame.get_frames(events[i].tile, channels) 474 if isinstance(apply_gain, bool): 475 apply = [apply_gain] * len(frames) 476 else: 477 apply = apply_gain 478 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 479 last_tile = events[i].tile 480 # Use the frame images to crop the event images 481 crops[i] = events[i].crop(images, crop_size[i], in_pixels) 482 return crops 483 484 @classmethod 485 def get_many_montages( 486 cls, 487 events: Sequence[Self], 488 channels: Sequence[int | str] = None, 489 composites: dict[int | str, tuple[float, float, float]] = None, 490 masks: Sequence[np.ndarray[np.uint8]] = None, 491 labels: Sequence[str] = None, 492 crop_size: int = 100, 493 in_pixels: bool = True, 494 input_path: str = None, 495 apply_gain: bool | Iterable[bool] = True, 496 **kwargs, 497 ) -> list[np.ndarray]: 498 """ 499 Convenience function for get_montage(), but for a list of events. More efficient 500 than get_montage() when working with multiple events from the same tile. 501 :param events: a list of Event objects. 502 :param channels: the channels to extract images for. Defaults to all channels. 503 :param composites: dictionary of indices and RGB tuples for a composite. 504 :param masks: a list of masks to apply to the montages. Must be the same size as the crops. 505 :param labels: the labels to subtitle montage images, usually the channel names 506 :param crop_size: the square size of the image crop to get for this event. 507 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 508 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 509 :param apply_gain: whether to apply scanner-calculated gain to the images, if 510 not already applied. If a list, matches the channels. 511 :param kwargs: montage options. See csi_images.make_montage() for more details. 512 :return: a list of numpy arrays representing the montages. 513 """ 514 if len(events) == 0: 515 return [] 516 # Adapt singular inputs to lists of appropriate length 517 if isinstance(crop_size, int): 518 crop_size = [crop_size] * len(events) 519 if input_path is None or isinstance(input_path, str): 520 input_path = [input_path] * len(events) 521 if masks is None or isinstance(masks, np.ndarray): 522 masks = [masks] * len(events) 523 524 # Get the order of the events when sorted by slide/tile 525 event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 526 527 # Allocate the list to size 528 montages = [np.empty(0)] * len(events) 529 # Placeholder variables to avoid rereading the same tile 530 images = None # Holds large numpy arrays, so expensive to compare 531 order = None 532 rel_composites = None 533 last_tile = None 534 # Iterate through in slide/tile sorted order 535 for i in event_order: 536 if last_tile != events[i].tile: 537 channels_to_get, order, rel_composites = events[i].get_montage_channels( 538 channels, composites 539 ) 540 # Gather the frame images, preserving them for the next event 541 frames = Frame.get_frames(events[i].tile, channels_to_get) 542 if isinstance(apply_gain, bool): 543 apply = [apply_gain] * len(frames) 544 else: 545 apply = apply_gain 546 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 547 last_tile = events[i].tile 548 # Use the frame images to crop the event images and make montages 549 crops = events[i].crop(images, crop_size[i], in_pixels) 550 montages[i] = csi_images.make_montage( 551 crops, order, rel_composites, masks[i], labels, **kwargs 552 ) 553 554 return montages 555 556 @classmethod 557 def get_and_save_many_crops( 558 cls, 559 events: list[Self], 560 output_path: str, 561 labels: Sequence[str], 562 ext: str = "auto", 563 additional_gain: Sequence[float] = None, 564 **kwargs, 565 ) -> None: 566 """ 567 Get and save the crops for a list of events, ensuring that there is no wasteful 568 reading and limiting the image data in memory to 1 tile at a time. This function 569 is more efficient that chaining get_crops() and save_crops() for each event or 570 get_many_crops() and then save_crops(). 571 :param events: list of events to get, crop, and save. 572 :param output_path: the folder to save the crops in. Will make if needed. 573 :param labels: the labels to save the crops with. See save_crops(). 574 :param ext: the file extension to save the crops as. See save_crops(). 575 :param additional_gain: additional gain to apply to the crops. If not None, must 576 match the length of the number of crop channels. 577 :param kwargs: see get_many_crops() for more parameters. 578 :return: 579 """ 580 unique_tiles = set([event.tile for event in events]) 581 582 for tile in unique_tiles: 583 # Get one tile's worth of event crops 584 tile_events = [e for e in events if e.tile == tile] 585 crops_list = cls.get_many_crops(tile_events, **kwargs) 586 for event, crops in zip(tile_events, crops_list): 587 # Apply any additional gains 588 if additional_gain is not None: 589 crops = [gain * crop for gain, crop in zip(additional_gain, crops)] 590 event.save_crops(crops, output_path, labels, ext) 591 592 @classmethod 593 def get_and_save_many_montages( 594 cls, 595 events: list[Self], 596 output_path: str, 597 ocular_names: bool = False, 598 tag: str = "", 599 **kwargs, 600 ) -> None: 601 """ 602 Save montages of the events to image files. 603 :param events: the events to get, montage, and save. 604 :param output_path: the folder to save the montages to. Will make if needed. 605 :param ocular_names: whether to use the OCULAR naming convention. 606 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 607 :param kwargs: see get_many_montages() for more parameters. 608 """ 609 unique_tiles = set([event.tile for event in events]) 610 611 for tile in unique_tiles: 612 # Get one tile's worth of event crops 613 tile_events = [e for e in events if e.tile == tile] 614 montages = cls.get_many_montages(tile_events, **kwargs) 615 for event, montage in zip(tile_events, montages): 616 event.save_montage(montage, output_path, ocular_names, tag) 617 618 619class EventArray: 620 """ 621 A class that holds a large number of events' data, making it easy to analyze and 622 manipulate many events at once. A more separated version of the Event class. 623 """ 624 625 INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y"] 626 627 def __init__( 628 self, 629 info: pd.DataFrame = None, 630 metadata: pd.DataFrame = None, 631 features: pd.DataFrame = None, 632 ): 633 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y" 634 if info is not None: 635 # Special case: "roi" is often not required, so we'll fill in if its missing 636 if "roi" not in info.columns: 637 info["roi"] = 0 638 if set(info.columns) != set(self.INFO_COLUMNS): 639 raise ValueError( 640 f"EventArray.info must have columns:" 641 f"{self.INFO_COLUMNS}; had {list(info.columns)}" 642 ) 643 # Copy first to avoid modifying the original 644 info = info.copy() 645 # Ensure that the columns are the right types 646 info["slide_id"] = info["slide_id"].astype(str) 647 info["tile"] = info["tile"].astype(np.uint16) 648 info["roi"] = info["roi"].astype(np.uint8) 649 info["x"] = info["x"].round().astype(np.uint16) 650 info["y"] = info["y"].round().astype(np.uint16) 651 # Ensure that the columns are in the right order 652 info = info[self.INFO_COLUMNS] 653 # All DataFrames must all have the same number of rows 654 if metadata is not None and (info is None or len(info) != len(metadata)): 655 raise ValueError( 656 "If EventArray.metadata is not None, it should match rows with .info" 657 ) 658 if features is not None and (info is None or len(info) != len(features)): 659 raise ValueError( 660 "If EventArray.features is not None, it should match rows with .info" 661 ) 662 # No columns named "metadata_", "features_", or "None" 663 column_names = [] 664 if metadata is not None: 665 column_names += metadata.columns.tolist() 666 if features is not None: 667 column_names += features.columns.tolist() 668 if any([col.lower().startswith("metadata_") for col in column_names]): 669 raise ValueError("EventArray column names cannot start with 'metadata_'") 670 if any([col.lower().startswith("features_") for col in column_names]): 671 raise ValueError("EventArray column names cannot start with 'features_'") 672 if any([col.lower() == "none" for col in column_names]): 673 raise ValueError("EventArray column names cannot be 'none'") 674 675 self.info = info 676 self.metadata = metadata 677 self.features = features 678 679 def __len__(self) -> int: 680 # Convenience method to get the number of events 681 if self.info is None: 682 return 0 683 else: 684 return len(self.info) 685 686 def __eq__(self, other): 687 # Parse all possibilities for info 688 if isinstance(self.info, pd.DataFrame): 689 if isinstance(other.info, pd.DataFrame): 690 if not self.info.equals(other.info): 691 return False 692 else: 693 return False 694 elif self.info is None: 695 if other.info is not None: 696 return False 697 698 # Parse all possibilities for metadata 699 if isinstance(self.metadata, pd.DataFrame): 700 if isinstance(other.metadata, pd.DataFrame): 701 is_equal = self.metadata.equals(other.metadata) 702 if not is_equal: 703 return False 704 else: 705 return False 706 elif self.metadata is None: 707 if other.metadata is not None: 708 return False 709 710 # Parse all possibilities for features 711 if isinstance(self.features, pd.DataFrame): 712 if isinstance(other.features, pd.DataFrame): 713 is_equal = self.features.equals(other.features) 714 if not is_equal: 715 return False 716 else: 717 return False 718 elif self.features is None: 719 if other.features is not None: 720 return False 721 722 return is_equal 723 724 def get_sort_order( 725 self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True 726 ): 727 """ 728 Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames. 729 :param by: name of the column(s) to sort by. 730 :param ascending: whether to sort in ascending order; can be a list to match by 731 :return: the order of the indices to sort by. 732 """ 733 columns = self.get(by) 734 return columns.sort_values(by=by, ascending=ascending).index 735 736 def sort( 737 self, 738 by: Hashable | Sequence[Hashable], 739 ascending: bool | Sequence[bool] = True, 740 ) -> Self: 741 """ 742 Sort the EventArray by column(s) in the info, metadata, or features DataFrames. 743 :param by: name of the column(s) to sort by. 744 :param ascending: whether to sort in ascending order; can be a list to match by 745 :return: a new, sorted EventArray. 746 """ 747 order = self.get_sort_order(by, ascending) 748 info = self.info.loc[order].reset_index(drop=True) 749 if self.metadata is not None: 750 metadata = self.metadata.loc[order].reset_index(drop=True) 751 else: 752 metadata = None 753 if self.features is not None: 754 features = self.features.loc[order].reset_index(drop=True) 755 else: 756 features = None 757 return EventArray(info, metadata, features) 758 759 def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame: 760 """ 761 Get a DataFrame with the specified columns from the EventArray, by value. 762 :param column_names: the names of the columns to get. 763 :return: a DataFrame with the specified columns. 764 """ 765 if isinstance(column_names, Hashable): 766 column_names = [column_names] # Drop into a list for the loop 767 columns = [] 768 for column_name in column_names: 769 if column_name in self.info.columns: 770 columns.append(self.info[column_name]) 771 elif self.metadata is not None and column_name in self.metadata.columns: 772 columns.append(self.metadata[column_name]) 773 elif self.features is not None and column_name in self.features.columns: 774 columns.append(self.features[column_name]) 775 else: 776 raise ValueError(f"Column {column_name} not found in EventArray") 777 return pd.concat(columns, axis=1) 778 779 def rows(self, rows: Sequence[Hashable]) -> Self: 780 """ 781 Get a subset of the EventArray rows based on a boolean or integer index, by value. 782 :param rows: row labels, indices, or boolean mask; anything for .loc[] 783 :return: a new EventArray with the subset of events. 784 """ 785 info = self.info.loc[rows].reset_index(drop=True) 786 if self.metadata is not None: 787 metadata = self.metadata.loc[rows].reset_index(drop=True) 788 else: 789 metadata = None 790 if self.features is not None: 791 features = self.features.loc[rows].reset_index(drop=True) 792 else: 793 features = None 794 return EventArray(info, metadata, features) 795 796 def copy(self) -> Self: 797 """ 798 Create a deep copy of the EventArray. 799 :return: a deep copy of the EventArray. 800 """ 801 return EventArray( 802 info=self.info.copy(), 803 metadata=None if self.metadata is None else self.metadata.copy(), 804 features=None if self.features is None else self.features.copy(), 805 ) 806 807 # TODO: add a "filter" convenience function that takes a column name and values to filter by 808 809 def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None: 810 """ 811 Add metadata to the EventArray. Removes the need to check if metadata is None. 812 Overwrites any existing metadata with the same column names as the new metadata. 813 :param new_metadata: the metadata to add. 814 """ 815 if len(self) != len(new_metadata): 816 raise ValueError("New metadata must match length of existing info") 817 818 if self.metadata is None: 819 self.metadata = new_metadata 820 else: 821 if isinstance(new_metadata, pd.Series): 822 self.metadata[new_metadata.name] = new_metadata 823 else: 824 # It's a DataFrame 825 self.metadata[new_metadata.columns] = new_metadata 826 827 def add_features(self, new_features: pd.Series | pd.DataFrame) -> None: 828 """ 829 Add features to the EventArray. Removes the need to check if features is None. 830 Overwrites any existing features with the same column names as the new features. 831 :param new_features: the features to add. 832 """ 833 if len(self) != len(new_features): 834 raise ValueError("New features must match length of existing info") 835 836 if self.features is None: 837 self.features = new_features 838 else: 839 if isinstance(new_features, pd.Series): 840 self.features[new_features.name] = new_features 841 else: 842 # It's a DataFrame 843 self.features[new_features.columns] = new_features 844 845 @classmethod 846 def merge(cls, events: Iterable[Self]) -> Self: 847 """ 848 Combine EventArrays in a list into a single EventArray. 849 :param events: the new list of events. 850 """ 851 all_info = [] 852 all_metadata = [] 853 all_features = [] 854 for event_array in events: 855 # Skip empty EventArrays 856 if event_array.info is not None: 857 all_info.append(event_array.info) 858 if event_array.metadata is not None: 859 all_metadata.append(event_array.metadata) 860 if event_array.features is not None: 861 all_features.append(event_array.features) 862 if len(all_info) == 0: 863 return EventArray() 864 else: 865 all_info = pd.concat(all_info, ignore_index=True) 866 if len(all_metadata) == 0: 867 all_metadata = None 868 else: 869 all_metadata = pd.concat(all_metadata, ignore_index=True) 870 if len(all_features) == 0: 871 all_features = None 872 else: 873 all_features = pd.concat(all_features, ignore_index=True) 874 875 return EventArray(all_info, all_metadata, all_features) 876 877 def to_events( 878 self, 879 scans: Scan | Iterable[Scan], 880 ignore_missing_scans=True, 881 ignore_metadata=False, 882 ignore_features=False, 883 ) -> list[Event]: 884 """ 885 Get the events in the EventArray as a list of events. Returns [] if empty. 886 :param scans: the scans that the events belong to, auto-matched by slide_id. 887 Pass None if you don't care about scan metadata (pass ignore_missing_scans). 888 :param ignore_missing_scans: whether to create blank scans for events without scans. 889 :param ignore_metadata: whether to ignore metadata or not 890 :param ignore_features: whether to ignore features or not 891 :return: 892 """ 893 if len(self) == 0: 894 return [] 895 if isinstance(scans, Scan): 896 scans = [scans] 897 scans = {scan.slide_id: scan for scan in scans} 898 events = [] 899 for i in range(len(self.info)): 900 # Determine the associated scan 901 slide_id = self.info["slide_id"][i] 902 if slide_id not in scans: 903 if ignore_missing_scans: 904 # Create a placeholder scan if the scan is missing 905 scan = Scan.make_placeholder( 906 slide_id, 907 self.info["tile"][i], 908 self.info["roi"][i], 909 ) 910 else: 911 raise ValueError( 912 f"Scan {self.info['slide_id'][i]} not found for event {i}." 913 ) 914 else: 915 scan = scans[slide_id] 916 917 # Prepare the metadata and features 918 if ignore_metadata or self.metadata is None: 919 metadata = None 920 else: 921 # This Series creation method is less efficient, 922 # but required for preserving dtypes 923 metadata = pd.Series( 924 {col: self.metadata.loc[i, col] for col in self.metadata.columns}, 925 dtype=object, 926 ) 927 if ignore_features or self.features is None: 928 features = None 929 else: 930 features = pd.Series( 931 {col: self.features.loc[i, col] for col in self.features.columns}, 932 dtype=object, 933 ) 934 # Create the event and append it to the list 935 events.append( 936 Event( 937 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 938 self.info["x"][i], 939 self.info["y"][i], 940 metadata=metadata, 941 features=features, 942 ) 943 ) 944 return events 945 946 @classmethod 947 def from_events(cls, events: Iterable[Event]) -> Self: 948 """ 949 Set the events in the EventArray to a new list of events. 950 :param events: the new list of events. 951 """ 952 info = pd.DataFrame( 953 { 954 "slide_id": [event.tile.scan.slide_id for event in events], 955 "tile": [event.tile.n for event in events], 956 "roi": [event.tile.n_roi for event in events], 957 "x": [event.x for event in events], 958 "y": [event.y for event in events], 959 } 960 ) 961 metadata_list = [event.metadata for event in events] 962 # Iterate through and ensure that all metadata is the same shape 963 for metadata in metadata_list: 964 if type(metadata) != type(metadata_list[0]): 965 raise ValueError("All metadata must be the same type.") 966 if metadata is not None and metadata.shape != metadata_list[0].shape: 967 raise ValueError("All metadata must be the same shape.") 968 if metadata_list[0] is None: 969 metadata = None 970 else: 971 metadata = pd.DataFrame(metadata_list) 972 features_list = [event.features for event in events] 973 # Iterate through and ensure that all features are the same shape 974 for features in features_list: 975 if type(features) != type(features_list[0]): 976 raise ValueError("All features must be the same type.") 977 if features is not None and features.shape != features_list[0].shape: 978 raise ValueError("All features must be the same shape.") 979 if features_list[0] is None: 980 features = None 981 else: 982 features = pd.DataFrame(features_list) 983 return EventArray(info=info, metadata=metadata, features=features) 984 985 def to_dataframe(self) -> pd.DataFrame: 986 """ 987 Convert all the data in the EventArray to a single DataFrame. 988 :return: a DataFrame with all the data in the EventArray. 989 """ 990 # Make a copy of the info DataFrame and prepend "info_" to the column names 991 output = self.info.copy() 992 # Combine with the metadata and prepend "metadata_" to the column names 993 if self.metadata is not None: 994 metadata = self.metadata.copy() 995 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 996 output = pd.concat([output, metadata], axis=1) 997 # Combine with the features and prepend "features_" to the column names 998 if self.features is not None: 999 features = self.features.copy() 1000 features.columns = [f"features_{col}" for col in features.columns] 1001 output = pd.concat([output, features], axis=1) 1002 return output 1003 1004 @classmethod 1005 def from_dataframe( 1006 cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_" 1007 ) -> Self: 1008 """ 1009 From a single, special DataFrame, create an EventArray. 1010 :param df: the DataFrame to convert to an EventArray. 1011 :param metadata_prefix: the prefix for metadata columns. 1012 :param features_prefix: the prefix for features columns. 1013 :return: a DataFrame with all the data in the EventArray. 1014 """ 1015 # Split the columns into info, metadata, and features and strip prefix 1016 info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy() 1017 if info.size == 0: 1018 info = None 1019 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 1020 metadata.columns = [ 1021 col.replace(metadata_prefix, "") for col in metadata.columns 1022 ] 1023 if metadata.size == 0: 1024 metadata = None 1025 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 1026 features.columns = [ 1027 col.replace(features_prefix, "") for col in features.columns 1028 ] 1029 if features.size == 0: 1030 features = None 1031 return cls(info=info, metadata=metadata, features=features) 1032 1033 @classmethod 1034 def from_mask( 1035 cls, 1036 mask: np.ndarray, 1037 tile: Tile, 1038 include_cell_id: bool = True, 1039 images: list[np.ndarray] = None, 1040 image_labels: list[str] = None, 1041 properties: list[str] = None, 1042 ) -> Self: 1043 """ 1044 Extract events from a mask DataFrame, including metadata and features. 1045 :param mask: the mask to extract events from. 1046 :param tile: the Tile object associated with this mask. 1047 :param include_cell_id: whether to include the cell_id, or numerical 1048 mask label, as metadata in the EventArray. 1049 :param images: the intensity images to extract features from. 1050 :param image_labels: the labels for the intensity images. 1051 :param properties: list of properties to extract in addition to the defaults: 1052 :return: EventArray corresponding to the mask labels. 1053 """ 1054 if csi_images is None: 1055 raise ModuleNotFoundError( 1056 "imageio libraries not installed! " 1057 "run `pip install csi_images[imageio]` to resolve." 1058 ) 1059 # Gather mask_info 1060 if images is not None and image_labels is not None: 1061 if len(images) != len(image_labels): 1062 raise ValueError("Intensity images and labels must match lengths.") 1063 1064 mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties) 1065 1066 if len(mask_info) == 0: 1067 return EventArray() 1068 1069 # Combine provided info and mask info 1070 info = pd.DataFrame( 1071 { 1072 "slide_id": tile.scan.slide_id, 1073 "tile": tile.n, 1074 "roi": tile.n_roi, 1075 "x": mask_info["x"], 1076 "y": mask_info["y"], 1077 }, 1078 ) 1079 # Extract a metadata column if desired 1080 if include_cell_id: 1081 metadata = pd.DataFrame({"cell_id": mask_info["id"]}) 1082 else: 1083 metadata = None 1084 # If any additional properties were extracted, add them as features 1085 mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore") 1086 if len(mask_info.columns) > 0: 1087 features = mask_info 1088 features.columns = [col.lower() for col in features.columns] 1089 else: 1090 features = None 1091 return EventArray(info, metadata, features) 1092 1093 def save_csv(self, output_path: str) -> bool: 1094 """ 1095 Save the events to an CSV file, including metadata and features. 1096 :param output_path: 1097 :return: 1098 """ 1099 if not output_path.endswith(".csv"): 1100 output_path += ".csv" 1101 self.to_dataframe().to_csv(output_path, index=False) 1102 return os.path.exists(output_path) 1103 1104 @classmethod 1105 def load_csv( 1106 cls, 1107 input_path: str, 1108 metadata_prefix: str = "metadata_", 1109 features_prefix: str = "features_", 1110 ) -> Self: 1111 """ 1112 Load the events from an CSV file, including metadata and features. 1113 :param input_path: 1114 :param metadata_prefix: 1115 :param features_prefix: 1116 :return: 1117 """ 1118 # Load the CSV file 1119 df = pd.read_csv(input_path) 1120 return cls.from_dataframe(df, metadata_prefix, features_prefix) 1121 1122 def save_hdf5(self, output_path: str) -> bool: 1123 """ 1124 Save the events to an HDF5 file, including metadata and features. 1125 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 1126 though these files are slightly harder to view in HDFView or similar. 1127 :param output_path: 1128 :return: 1129 """ 1130 if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"): 1131 output_path += ".hdf5" 1132 # Open the output_path as an HDF5 file 1133 with pd.HDFStore(output_path) as store: 1134 # Store the dataframes in the HDF5 file 1135 if self.info is not None: 1136 store.put("info", self.info, index=False) 1137 if self.metadata is not None: 1138 store.put("metadata", self.metadata, index=False) 1139 if self.features is not None: 1140 store.put("features", self.features, index=False) 1141 return os.path.exists(output_path) 1142 1143 @classmethod 1144 def load_hdf5(cls, input_path: str) -> Self: 1145 """ 1146 Load the events from an HDF5 file, including metadata and features. 1147 :param input_path: 1148 :return: 1149 """ 1150 # Open the input_path as an HDF5 file 1151 with pd.HDFStore(input_path, "r") as store: 1152 # Load the dataframes from the HDF5 file 1153 info = store.get("info") if "info" in store else None 1154 metadata = store.get("metadata") if "metadata" in store else None 1155 features = store.get("features") if "features" in store else None 1156 return cls(info=info, metadata=metadata, features=features) 1157 1158 def save_ocular(self, output_path: str, event_type: str = "cells"): 1159 """ 1160 Save the events to an OCULAR file. Relies on the dataframe originating 1161 from an OCULAR file (same columns; duplicate metadata/info). 1162 :param output_path: 1163 :param event_type: 1164 :return: 1165 """ 1166 if pyreadr is None: 1167 raise ModuleNotFoundError( 1168 "pyreadr not installed! Install pyreadr directly " 1169 "or run `pip install csi-images[rds]` option to resolve." 1170 ) 1171 if event_type == "cells": 1172 file_stub = "rc-final" 1173 elif event_type == "others": 1174 file_stub = "others-final" 1175 else: 1176 raise ValueError("Invalid event type. Must be cells or others.") 1177 1178 # Ensure good metadata 1179 metadata = pd.DataFrame( 1180 { 1181 "slide_id": self.info["slide_id"], 1182 "frame_id": self.info["tile"] + 1, # Convert to 1-indexed for R 1183 "cell_id": ( 1184 self.metadata["cell_id"] 1185 if "cell_id" in self.metadata.columns 1186 else range(len(self.info)) 1187 ), 1188 "cellx": self.info["x"], 1189 "celly": self.info["y"], 1190 } 1191 ) 1192 if self.metadata is not None: 1193 metadata[self.metadata.columns] = self.metadata.copy() 1194 1195 # Check for the "ocular_interesting" column 1196 if event_type == "cells": 1197 if "ocular_interesting" in metadata.columns: 1198 interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool) 1199 elif "hcpc" in metadata.columns: 1200 # Interesting cells don't get an hcpc designation, leaving them as -1 1201 interesting_rows = ( 1202 metadata["hcpc"].to_numpy() == -1 1203 ) # interesting cells 1204 else: 1205 interesting_rows = [] 1206 if sum(interesting_rows) > 0: 1207 # Split the metadata into interesting and regular 1208 interesting_events = self.rows(interesting_rows) 1209 interesting_df = pd.concat( 1210 [interesting_events.features, interesting_events.metadata], axis=1 1211 ) 1212 data_events = self.rows(~interesting_rows) 1213 data_df = pd.concat( 1214 [data_events.features, data_events.metadata], axis=1 1215 ) 1216 data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore") 1217 1218 # Drop particular columns for "interesting" 1219 interesting_df = interesting_df.drop( 1220 [ 1221 "clust", 1222 "hcpc", 1223 "frame_id", 1224 "cell_id", 1225 "unique_id", 1226 "ocular_interesting", 1227 ], 1228 axis=1, 1229 errors="ignore", 1230 ) 1231 # Save both .csv and .rds 1232 interesting_stub = os.path.join(output_path, "ocular_interesting") 1233 interesting_df.to_csv(f"{interesting_stub}.csv") 1234 # Suppress pandas FutureWarning 1235 with warnings.catch_warnings(): 1236 warnings.simplefilter(action="ignore", category=FutureWarning) 1237 pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df) 1238 else: 1239 data_df = pd.concat([self.features, metadata], axis=1) 1240 else: 1241 # Get all data and reset_index (will copy it) 1242 data_df = pd.concat([self.features, metadata], axis=1) 1243 1244 # Split based on cluster number to conform to *-final[1-4].rds 1245 n_clusters = max(data_df["clust"]) + 1 1246 split_idx = [round(i * n_clusters / 4) for i in range(5)] 1247 for i in range(4): 1248 subset = (split_idx[i] <= data_df["clust"]) & ( 1249 data_df["clust"] < split_idx[i + 1] 1250 ) 1251 data_df.loc[subset, "hcpc"] = i + 1 1252 subset = data_df[subset].reset_index(drop=True) 1253 # Suppress pandas FutureWarning 1254 with warnings.catch_warnings(): 1255 warnings.simplefilter(action="ignore", category=FutureWarning) 1256 pyreadr.write_rds( 1257 os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset 1258 ) 1259 1260 # Create new example cell strings 1261 data_df["example_cell_id"] = ( 1262 data_df["slide_id"] 1263 + " " 1264 + data_df["frame_id"].astype(str) 1265 + " " 1266 + data_df["cell_id"].astype(str) 1267 + " " 1268 + data_df["cellx"].astype(int).astype(str) 1269 + " " 1270 + data_df["celly"].astype(int).astype(str) 1271 ) 1272 # Find averagable data columns 1273 if "cellcluster_id" in data_df.columns: 1274 end_idx = data_df.columns.get_loc("cellcluster_id") 1275 else: 1276 end_idx = data_df.columns.get_loc("slide_id") 1277 avg_cols = data_df.columns[:end_idx].tolist() 1278 # Group by cluster and average 1279 data_df = data_df.groupby("clust").agg( 1280 **{col: (col, "mean") for col in avg_cols}, 1281 count=("clust", "size"), # count rows in each cluster 1282 example_cells=("example_cell_id", lambda x: ",".join(x)), 1283 hcpc=("hcpc", lambda x: x.iloc[0]), 1284 ) 1285 data_df = data_df.reset_index() # Do NOT drop, index is "clust" 1286 # Create new columns 1287 metadata = pd.DataFrame( 1288 { 1289 "count": data_df["count"], 1290 "example_cells": data_df["example_cells"], 1291 "clust": data_df["clust"].astype(int), 1292 "hcpc": data_df["hcpc"].astype(int), 1293 "id": data_df["clust"].astype(int).astype(str), 1294 "cccluster": "0", # Dummy value 1295 "ccdistance": 0.0, # Dummy value 1296 "rownum": list(range(len(data_df))), 1297 "framegroup": 0, # Dummy value 1298 } 1299 ) 1300 # Need to pad the features to 761 columns, as per OCULAR report needs 1301 additional_columns = range(len(avg_cols), 761) 1302 if len(additional_columns) > 0: 1303 padding = pd.DataFrame( 1304 np.zeros((len(data_df), len(additional_columns))), 1305 columns=[f"pad{i}" for i in additional_columns], 1306 ) 1307 data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1) 1308 else: 1309 data_df = pd.concat([data_df[avg_cols], metadata], axis=1) 1310 1311 # Save the cluster data 1312 data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv")) 1313 # Suppress pandas FutureWarning 1314 with warnings.catch_warnings(): 1315 warnings.simplefilter(action="ignore", category=FutureWarning) 1316 pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df) 1317 1318 @classmethod 1319 def load_ocular( 1320 cls, 1321 input_path: str, 1322 event_type="cells", 1323 cell_data_files=( 1324 "rc-final1.rds", 1325 "rc-final2.rds", 1326 "rc-final3.rds", 1327 "rc-final4.rds", 1328 "ocular_interesting.rds", 1329 ), 1330 others_data_files=( 1331 "others-final1.rds", 1332 "others-final2.rds", 1333 "others-final3.rds", 1334 "others-final4.rds", 1335 ), 1336 atlas_data_files=( 1337 "ocular_interesting.rds", 1338 "ocular_not_interesting.rds", 1339 ), 1340 drop_common_events=True, 1341 ) -> Self: 1342 """ 1343 1344 :param input_path: 1345 :param event_type: 1346 :param cell_data_files: 1347 :param others_data_files: 1348 :param atlas_data_files: 1349 :param drop_common_events: 1350 :return: 1351 """ 1352 if pyreadr is None: 1353 raise ModuleNotFoundError( 1354 "pyreadr not installed! Install pyreadr directly " 1355 "or run `pip install csi-images[rds]` option to resolve." 1356 ) 1357 # Check if the input path is a directory or a file 1358 if os.path.isfile(input_path): 1359 data_files = [os.path.basename(input_path)] 1360 input_path = os.path.dirname(input_path) 1361 if event_type == "cells": 1362 data_files = cell_data_files 1363 elif event_type == "others": 1364 data_files = others_data_files 1365 else: 1366 raise ValueError("Invalid event type.") 1367 1368 # Load the data from the OCULAR files 1369 file_data = {} 1370 for file in data_files: 1371 file_path = os.path.join(input_path, file) 1372 if not os.path.isfile(file_path): 1373 warnings.warn(f"{file} not found for in {input_path}") 1374 continue 1375 file_data[file] = pyreadr.read_r(file_path) 1376 # Get the DataFrame associated with None (pyreadr dict quirk) 1377 file_data[file] = file_data[file][None] 1378 if len(file_data[file]) == 0: 1379 # File gets dropped from the dict 1380 file_data.pop(file) 1381 warnings.warn(f"{file} has no cells") 1382 continue 1383 1384 # Drop common cells if requested and in this file 1385 if ( 1386 file in atlas_data_files 1387 and drop_common_events 1388 and "catalogue_classification" in file_data[file] 1389 ): 1390 common_cell_indices = ( 1391 file_data[file]["catalogue_classification"] == "common_cell" 1392 ) 1393 file_data[file] = file_data[file][common_cell_indices == False] 1394 1395 if len(file_data[file]) == 0: 1396 # File gets dropped from the dict 1397 file_data.pop(file) 1398 warnings.warn(f"{file} has no cells after dropping common cells") 1399 continue 1400 1401 # Extract frame_id and cell_id 1402 # DAPI- events already have frame_id cell_id outside rowname 1403 if event_type == "cells" and "frame_id" not in file_data[file].columns: 1404 file_data[file]["rowname"] = file_data[file]["rowname"].astype("str") 1405 # get frame_id cell_id from rownames column and split into two columns 1406 split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True) 1407 if len(split_res.columns) != 2: 1408 warnings.warn( 1409 f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}' 1410 ) 1411 # then assign it back to the dataframe 1412 file_data[file][["frame_id", "cell_id"]] = split_res.astype("int") 1413 # Ensure frame_id and cell_id are integers 1414 file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int") 1415 file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int") 1416 # reset indexes since they can cause NaN values in concat 1417 file_data[file] = file_data[file].reset_index(drop=True) 1418 1419 # Merge the data from all files 1420 if len(file_data) == 0: 1421 return EventArray() 1422 elif len(file_data) == 1: 1423 data = [file_data[file] for file in file_data.keys()][0] 1424 else: 1425 data = pd.concat(file_data.values()) 1426 1427 # Others is missing the "slide_id". Insert it right before "frame_id" column 1428 if event_type == "others" and "slide_id" not in data.columns: 1429 if os.path.basename(input_path) == "ocular": 1430 slide_id = os.path.basename(os.path.dirname(input_path)) 1431 else: 1432 slide_id = "UNKNOWN" 1433 data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id) 1434 1435 # Sort according to ascending cell_id to keep the original, which is in manual_df 1436 data = data.sort_values(by=["cell_id"], ascending=True) 1437 # Filter out duplicates by x & y 1438 data = data.assign( 1439 unique_id=data["slide_id"] 1440 + "_" 1441 + data["frame_id"].astype(str) 1442 + "_" 1443 + data["cellx"].astype(int).astype(str) 1444 + "_" 1445 + data["celly"].astype(int).astype(str) 1446 ) 1447 data = data.drop_duplicates(subset=["unique_id"], keep="first") 1448 # Normal unique_id is with cell_id 1449 data = data.assign( 1450 unique_id=data["slide_id"] 1451 + "_" 1452 + data["frame_id"].astype(str) 1453 + "_" 1454 + data["cell_id"].astype(str) 1455 ) 1456 data = data.reset_index(drop=True) 1457 # All columns up to "slide_id" are features; drop the "slide_id" 1458 features = data.loc[:, :"slide_id"].iloc[:, :-1] 1459 data = data.loc[:, "slide_id":] 1460 # Grab the info columns 1461 info = data[["slide_id", "frame_id", "cellx", "celly"]] 1462 info.columns = ["slide_id", "tile", "x", "y"] 1463 info = info.assign(roi=0) # OCULAR only works on 1 ROI, as far as known 1464 info = info[["slide_id", "tile", "roi", "x", "y"]] 1465 # Metadata has duplicate columns for later convenience 1466 metadata = data 1467 # Certain columns tend to be problematic with mixed data formats... 1468 for col in ["TRITC", "CY5", "FITC"]: 1469 if col in metadata: 1470 labels = { 1471 "False": False, 1472 "True": True, 1473 "FALSE": False, 1474 "TRUE": True, 1475 False: False, 1476 True: True, 1477 } 1478 metadata[col] = metadata[col].map(labels).astype(bool) 1479 for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]: 1480 if col in metadata: 1481 metadata[col] = metadata[col].fillna(-1).astype(int) 1482 info["tile"] = info["tile"] - 1 # Convert to 0-based indexing 1483 return EventArray(info, metadata, features)
40class Event: 41 """ 42 A class that represents a single event in a scan, making it easy to evaluate 43 singular events. Required metadata is exposed as attributes, and optional 44 metadata and features are stored as DataFrames. 45 """ 46 47 SCAN_TO_SLIDE_TRANSFORM = { 48 # Axioscan zero is in the top-right corner instead of top-left 49 Scan.Type.AXIOSCAN7: np.array( 50 [ 51 [1, 0, 75000], 52 [0, 1, 0], 53 [0, 0, 1], 54 ] 55 ), 56 # BZScanner coordinates are a special kind of messed up: 57 # - The slide is upside-down. 58 # - The slide is oriented vertically, with the barcode at the bottom. 59 # - Tiles are numbered from the top-right 60 Scan.Type.BZSCANNER: np.array( 61 [ 62 [0, -1, 75000], 63 [-1, 0, 25000], 64 [0, 0, 1], 65 ] 66 ), 67 } 68 """ 69 Homogeneous transformation matrices for converting between scanner and slide 70 coordinates. The matrices are 3x3, with the final column representing the 71 translation in micrometers (um). For more information, see 72 [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations). 73 74 Transformations are nominal, and accuracy is not guaranteed; this is due to 75 imperfections in slides and alignment in the scanners. Units are in micrometers. 76 """ 77 78 def __init__( 79 self, 80 tile: Tile, 81 x: int, 82 y: int, 83 metadata: pd.Series = None, 84 features: pd.Series = None, 85 ): 86 self.tile = tile 87 self.x = int(x) 88 self.y = int(y) 89 self.metadata = metadata 90 self.features = features 91 92 def __repr__(self) -> str: 93 return f"{self.tile}-{self.x}-{self.y}" 94 95 def __eq__(self, other) -> bool: 96 return self.__repr__() == other.__repr__() 97 98 def __lt__(self, other): 99 return self.__repr__() < other.__repr__() 100 101 def get_scan_position(self) -> tuple[float, float]: 102 """ 103 Get the position of the event in the scanner's coordinate frame. 104 :return: the scan position of the event in micrometers (um). 105 """ 106 # Get overall pixel position 107 real_tile_height, real_tile_width = self.tile.scan.get_image_size() 108 pixel_x = self.x + (real_tile_width * self.tile.x) 109 pixel_y = self.y + (real_tile_height * self.tile.y) 110 # Convert to micrometers 111 x_um = pixel_x * self.tile.scan.pixel_size_um 112 y_um = pixel_y * self.tile.scan.pixel_size_um 113 # Add the scan's origin in the scanner frame 114 x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um 115 y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um 116 return x_um, y_um 117 118 def get_slide_position(self) -> tuple[float, float]: 119 """ 120 Get the slide position of the event in micrometers (um). 121 :return: the slide position of the event. 122 """ 123 # Turn scan_position into a 3x1 vector 124 scan_position = self.get_scan_position() 125 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 126 127 # Multiply by the appropriate homogeneous matrix 128 if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value): 129 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7] 130 elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value): 131 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER] 132 else: 133 raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.") 134 slide_position = np.matmul(transform, scan_position) 135 return float(slide_position[0][0]), float(slide_position[1][0]) 136 137 def crop( 138 self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True 139 ) -> list[np.ndarray]: 140 """ 141 Crop the event from the provided frame images. Use if you have already gotten 142 frame images; useful for cropping multiple events from the same frame image. 143 :param images: the frame images. 144 :param crop_size: the square size of the image crop to get for this event. 145 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 146 :return: image_size x image_size crops of the event in the provided frames. If 147 the event is too close to the edge, the crop will be smaller and not centered. 148 """ 149 # Convert a crop size in micrometers to pixels 150 if not in_pixels: 151 crop_size = round(crop_size / self.tile.scan.pixel_size_um) 152 image_height, image_width = 0, 0 153 for image in images: 154 if image_height == 0 and image_width == 0: 155 image_height, image_width = image.shape 156 else: 157 if image_height != image.shape[0] or image_width != image.shape[1]: 158 raise ValueError("All images must be the same size") 159 if image_height == 0 or image_width == 0: 160 raise ValueError("No images provided") 161 162 # Find the crop bounds 163 bounds = [ 164 self.x - (crop_size // 2) + 1, 165 self.y - (crop_size // 2) + 1, 166 self.x + math.ceil(crop_size / 2) + 1, 167 self.y + math.ceil(crop_size / 2) + 1, 168 ] 169 # Determine how much the bounds violate the image size 170 displacements = [ 171 max(0, -bounds[0]), 172 max(0, -bounds[1]), 173 max(0, bounds[2] - image_width), 174 max(0, bounds[3] - image_height), 175 ] 176 # Cap off the bounds 177 bounds = [ 178 max(0, bounds[0]), 179 max(0, bounds[1]), 180 min(image_width, bounds[2]), 181 min(image_height, bounds[3]), 182 ] 183 184 # Crop the images 185 crops = [] 186 for image in images: 187 # Create a blank image of the right size 188 crop = np.zeros((crop_size, crop_size), dtype=image.dtype) 189 190 # Insert the cropped image into the blank image, leaving a black buffer 191 # around the edges if the crop would go beyond the original image bounds 192 crop[ 193 displacements[1] : crop_size - displacements[3], 194 displacements[0] : crop_size - displacements[2], 195 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 196 crops.append(crop) 197 return crops 198 199 def get_crops( 200 self, 201 crop_size: int = 100, 202 in_pixels: bool = True, 203 input_path: str = None, 204 channels: Iterable[int | str] = None, 205 apply_gain: bool | Iterable[bool] = True, 206 ) -> list[np.ndarray]: 207 """ 208 Gets the frame images for this event and then crops the event from the images. 209 Convenient for retrieving a single event's crops, but less efficient when 210 retrieving multiple events from the same tile as it will reread the images. 211 :param crop_size: the square size of the image crop to get for this event. 212 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 213 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 214 :param channels: the channels to extract images for. Defaults to all channels. 215 :param apply_gain: whether to apply scanner-calculated gain to the images, if 216 not already applied. If a list, matches the channels. 217 :return: a list of cropped images from the scan in the order of the channels. 218 """ 219 # This function validates channels 220 frames = Frame.get_frames(self.tile, channels) 221 # Convert individual inputs to lists of appropriate length 222 if isinstance(apply_gain, bool): 223 apply_gain = [apply_gain] * len(frames) 224 images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)] 225 return self.crop(images, crop_size, in_pixels) 226 227 def save_crops( 228 self, 229 crops: Sequence[np.ndarray], 230 output_path: str, 231 labels: Sequence[str], 232 ext: str = "auto", 233 ): 234 """ 235 Save the crops to image files. 236 :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or 237 grayscale if 1 channel [h, w] or [h, w, 1]. 238 :param labels: the labels to append to the file name, usually the channel names 239 associated with each crop. 240 :param output_path: the folder to save the crops to. Will make if needed. 241 :param ext: the file extension to save the crops as. Defaults to "auto", which 242 will save as .tif for grayscale images and .jpg for RGB images. 243 :return: None 244 """ 245 if len(crops) != len(labels): 246 raise ValueError("Crops and labels must be the same length") 247 248 if csi_images is None or imageio is None: 249 raise ModuleNotFoundError( 250 "imageio libraries not installed! " 251 "run `pip install csi_images[imageio]` to resolve." 252 ) 253 254 os.makedirs(output_path, exist_ok=True) 255 256 for crop, label in zip(crops, labels): 257 if ext == "auto": 258 if len(crop.shape) == 2 or crop.shape[2] == 1: 259 file_extension = ".tif" 260 elif crop.shape[2] == 3: 261 file_extension = ".jpg" 262 else: 263 warnings.warn( 264 f"Image shape {crop.shape} not recognized; saving as .tif" 265 ) 266 file_extension = ".tif" 267 else: 268 file_extension = ext 269 file = os.path.join(output_path, f"{self}-{label}{file_extension}") 270 # TODO: add more file types here 271 if file_extension == ".tif": 272 imageio.imwrite(file, crop, compression="deflate") 273 elif file_extension in [".jpg", ".jpeg"]: 274 crop = csi_images.scale_bit_depth(crop, np.uint8) 275 imageio.imwrite(file, crop, quality=80) 276 else: 277 imageio.imwrite(file, crop) 278 279 def load_crops( 280 self, input_path: str, labels: list[str] = None 281 ) -> dict[str, np.ndarray]: 282 """ 283 Loads previously saved crop files from a folder. 284 :param input_path: folder containing crop files. 285 :param labels: optional label filter, will only return crops with these labels. 286 :return: a tuple of lists containing the crops and their labels. 287 """ 288 crops = {} 289 for file in glob.glob(os.path.join(input_path, f"{self}-*")): 290 label = os.path.splitext(os.path.basename(file))[0].split("-")[-1] 291 # Skip if we have labels to target 292 if labels is not None and label not in labels: 293 continue 294 crops[label] = imageio.imread(file) 295 return crops 296 297 def get_montage_channels( 298 self, 299 channels: Sequence[int | str] | None, 300 composites: dict[int | str, tuple[float, float, float]] | None, 301 ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]: 302 """ 303 Get the channel names for the montage from the event's tile. 304 :param channels: channel indices or names for grayscale channels 305 :param composites: dictionary of channel indices or names and RGB values 306 :return: (1) channel indices to retrieve, 307 (2) relative grayscale channel indices, and 308 (3) composite channel indices and RGB values. 309 """ 310 if channels is None: 311 channels = list(range(len(self.tile.scan.channels))) 312 if (len(channels) == 0) and (composites is None or len(composites) == 0): 313 raise ValueError("Must provide at least one channel type to montage") 314 315 channels_to_get = [] 316 317 # Build the list of channels to retrieve 318 if channels is not None: 319 if isinstance(channels[0], str): 320 channels = self.tile.scan.get_channel_indices(channels) 321 channels_to_get += channels 322 order = list(range(len(channels))) # Always the first n channels 323 else: 324 order = None 325 326 if composites is not None: 327 relative_composites = {} # Relative indices for retrieved channels 328 # Convert to scan indices 329 rgb_channels = list(composites.keys()) 330 if isinstance(rgb_channels[0], str): 331 rgb_channels = self.tile.scan.get_channel_indices(rgb_channels) 332 # Find the index or add to the end 333 for channel, rgb in zip(rgb_channels, composites.values()): 334 if channel not in channels_to_get: 335 channels_to_get.append(channel) 336 relative_composites[channel] = rgb 337 else: 338 relative_composites[channels_to_get.index(channel)] = rgb 339 else: 340 relative_composites = None 341 342 return channels_to_get, order, relative_composites 343 344 def get_montage( 345 self, 346 channels: Sequence[int | str] = None, 347 composites: dict[int | str, tuple[float, float, float]] = None, 348 mask: np.ndarray[np.uint8] = None, 349 labels: Sequence[str] = None, 350 crop_size: int = 100, 351 in_pixels: bool = True, 352 input_path: str = None, 353 apply_gain: bool = True, 354 **kwargs, 355 ) -> np.ndarray: 356 """ 357 Convenience function for getting frame images and creating a montage. Mirrors 358 csi_images.make_montage(). Convenient for a single event's montage, but less 359 efficient when for multiple events from the same tile. 360 :param channels: the channels to use for black-and-white montages. 361 :param composites: dictionary of indices and RGB tuples for a composite. 362 :param mask: a mask to apply to the montage. Must be the same size as the crop. 363 :param crop_size: the square size of the image crop to get for this event. 364 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 365 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 366 :param apply_gain: whether to apply scanner-calculated gain to the images, if 367 not already applied. If a list, matches the channels. 368 :param kwargs: montage options. See csi_images.make_montage() for more details. 369 :return: numpy array representing the montage. 370 """ 371 channels, order, composites = self.get_montage_channels(channels, composites) 372 images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain) 373 return csi_images.make_montage( 374 images, order, composites, mask, labels, **kwargs 375 ) 376 377 def save_montage( 378 self, 379 montage: np.ndarray, 380 output_path: str, 381 ocular_names: bool = False, 382 tag: str = "", 383 file_extension: str = ".jpeg", 384 **kwargs, 385 ): 386 """ 387 Save the montage as a JPEG image with a set name. 388 :param montage: the montage to save. 389 :param output_path: the folder to save the montage in. Will make if needed. 390 :param ocular_names: whether to use the OCULAR naming convention. 391 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 392 :param file_extension: the file extension to save the montage as. Defaults to .jpeg. 393 :param kwargs: additional arguments to pass to imageio.imwrite(). 394 :return: None 395 """ 396 if csi_images is None or imageio is None: 397 raise ModuleNotFoundError( 398 "imageio libraries not installed! " 399 "run `pip install csi_images[imageio]` to resolve." 400 ) 401 402 montage = csi_images.scale_bit_depth(montage, np.uint8) 403 404 if not file_extension.startswith("."): 405 file_extension = f".{file_extension}" 406 407 if ocular_names: 408 if "cell_id" not in self.metadata.index: 409 raise ValueError( 410 "Event metadata must include 'cell_id' for OCULAR naming." 411 ) 412 file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}" 413 else: 414 file = f"{self}{tag}{file_extension}" 415 416 os.makedirs(output_path, exist_ok=True) 417 imageio.imwrite(os.path.join(output_path, file), montage, **kwargs) 418 419 def load_montage(self, input_path: str, tag: str = "") -> np.ndarray: 420 """ 421 Loads the montage from a file saved by Event.save_montage. 422 :param input_path: the path to the folder where the montage was saved. 423 :param tag: a string to add to the file name, before the extension. 424 :return: 425 """ 426 file = f"{self}{tag}.jpeg" 427 return imageio.imread(os.path.join(input_path, file)) 428 429 @classmethod 430 def get_many_crops( 431 cls, 432 events: Sequence[Self], 433 crop_size: int | Sequence[int] = 100, 434 in_pixels: bool = True, 435 input_path: str | Sequence[str] = None, 436 channels: Sequence[int | str] = None, 437 apply_gain: bool | Sequence[bool] = True, 438 ) -> list[list[np.ndarray]]: 439 """ 440 Get the crops for a list of events, ensuring that there is no wasteful reading 441 of the same tile multiple times. This function is more efficient than calling 442 get_crops() for each event. 443 :param events: the events to get crops for. 444 :param crop_size: the square size of the image crop to get for this event. 445 Defaults to four times the size of the event. 446 :param in_pixels: whether the crop size is in pixels or micrometers. 447 Defaults to pixels, and is ignored if crop_size is None. 448 :param input_path: the path to the input images. Will only work for lists of events 449 from the same scan. Defaults to None (uses the scan's path). 450 :param channels: the channels to extract images for. Defaults to all channels. 451 :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True. 452 Can be supplied as a list to apply gain to individual channels. 453 :return: a list of lists of cropped images for each event. 454 """ 455 if len(events) == 0: 456 return [] 457 # Adapt singular inputs to lists of appropriate length 458 if isinstance(crop_size, int): 459 crop_size = [crop_size] * len(events) 460 if input_path is None or isinstance(input_path, str): 461 input_path = [input_path] * len(events) 462 463 # Get the order of the events when sorted by slide/tile 464 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 465 466 # Allocate the list to size 467 crops = [[]] * len(events) 468 last_tile = None 469 images = None # Holds large numpy arrays, so expensive to compare 470 # Iterate through in slide/tile sorted order 471 for i in order: 472 if last_tile != events[i].tile: 473 # Gather the frame images, preserving them for the next event 474 frames = Frame.get_frames(events[i].tile, channels) 475 if isinstance(apply_gain, bool): 476 apply = [apply_gain] * len(frames) 477 else: 478 apply = apply_gain 479 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 480 last_tile = events[i].tile 481 # Use the frame images to crop the event images 482 crops[i] = events[i].crop(images, crop_size[i], in_pixels) 483 return crops 484 485 @classmethod 486 def get_many_montages( 487 cls, 488 events: Sequence[Self], 489 channels: Sequence[int | str] = None, 490 composites: dict[int | str, tuple[float, float, float]] = None, 491 masks: Sequence[np.ndarray[np.uint8]] = None, 492 labels: Sequence[str] = None, 493 crop_size: int = 100, 494 in_pixels: bool = True, 495 input_path: str = None, 496 apply_gain: bool | Iterable[bool] = True, 497 **kwargs, 498 ) -> list[np.ndarray]: 499 """ 500 Convenience function for get_montage(), but for a list of events. More efficient 501 than get_montage() when working with multiple events from the same tile. 502 :param events: a list of Event objects. 503 :param channels: the channels to extract images for. Defaults to all channels. 504 :param composites: dictionary of indices and RGB tuples for a composite. 505 :param masks: a list of masks to apply to the montages. Must be the same size as the crops. 506 :param labels: the labels to subtitle montage images, usually the channel names 507 :param crop_size: the square size of the image crop to get for this event. 508 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 509 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 510 :param apply_gain: whether to apply scanner-calculated gain to the images, if 511 not already applied. If a list, matches the channels. 512 :param kwargs: montage options. See csi_images.make_montage() for more details. 513 :return: a list of numpy arrays representing the montages. 514 """ 515 if len(events) == 0: 516 return [] 517 # Adapt singular inputs to lists of appropriate length 518 if isinstance(crop_size, int): 519 crop_size = [crop_size] * len(events) 520 if input_path is None or isinstance(input_path, str): 521 input_path = [input_path] * len(events) 522 if masks is None or isinstance(masks, np.ndarray): 523 masks = [masks] * len(events) 524 525 # Get the order of the events when sorted by slide/tile 526 event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 527 528 # Allocate the list to size 529 montages = [np.empty(0)] * len(events) 530 # Placeholder variables to avoid rereading the same tile 531 images = None # Holds large numpy arrays, so expensive to compare 532 order = None 533 rel_composites = None 534 last_tile = None 535 # Iterate through in slide/tile sorted order 536 for i in event_order: 537 if last_tile != events[i].tile: 538 channels_to_get, order, rel_composites = events[i].get_montage_channels( 539 channels, composites 540 ) 541 # Gather the frame images, preserving them for the next event 542 frames = Frame.get_frames(events[i].tile, channels_to_get) 543 if isinstance(apply_gain, bool): 544 apply = [apply_gain] * len(frames) 545 else: 546 apply = apply_gain 547 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 548 last_tile = events[i].tile 549 # Use the frame images to crop the event images and make montages 550 crops = events[i].crop(images, crop_size[i], in_pixels) 551 montages[i] = csi_images.make_montage( 552 crops, order, rel_composites, masks[i], labels, **kwargs 553 ) 554 555 return montages 556 557 @classmethod 558 def get_and_save_many_crops( 559 cls, 560 events: list[Self], 561 output_path: str, 562 labels: Sequence[str], 563 ext: str = "auto", 564 additional_gain: Sequence[float] = None, 565 **kwargs, 566 ) -> None: 567 """ 568 Get and save the crops for a list of events, ensuring that there is no wasteful 569 reading and limiting the image data in memory to 1 tile at a time. This function 570 is more efficient that chaining get_crops() and save_crops() for each event or 571 get_many_crops() and then save_crops(). 572 :param events: list of events to get, crop, and save. 573 :param output_path: the folder to save the crops in. Will make if needed. 574 :param labels: the labels to save the crops with. See save_crops(). 575 :param ext: the file extension to save the crops as. See save_crops(). 576 :param additional_gain: additional gain to apply to the crops. If not None, must 577 match the length of the number of crop channels. 578 :param kwargs: see get_many_crops() for more parameters. 579 :return: 580 """ 581 unique_tiles = set([event.tile for event in events]) 582 583 for tile in unique_tiles: 584 # Get one tile's worth of event crops 585 tile_events = [e for e in events if e.tile == tile] 586 crops_list = cls.get_many_crops(tile_events, **kwargs) 587 for event, crops in zip(tile_events, crops_list): 588 # Apply any additional gains 589 if additional_gain is not None: 590 crops = [gain * crop for gain, crop in zip(additional_gain, crops)] 591 event.save_crops(crops, output_path, labels, ext) 592 593 @classmethod 594 def get_and_save_many_montages( 595 cls, 596 events: list[Self], 597 output_path: str, 598 ocular_names: bool = False, 599 tag: str = "", 600 **kwargs, 601 ) -> None: 602 """ 603 Save montages of the events to image files. 604 :param events: the events to get, montage, and save. 605 :param output_path: the folder to save the montages to. Will make if needed. 606 :param ocular_names: whether to use the OCULAR naming convention. 607 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 608 :param kwargs: see get_many_montages() for more parameters. 609 """ 610 unique_tiles = set([event.tile for event in events]) 611 612 for tile in unique_tiles: 613 # Get one tile's worth of event crops 614 tile_events = [e for e in events if e.tile == tile] 615 montages = cls.get_many_montages(tile_events, **kwargs) 616 for event, montage in zip(tile_events, montages): 617 event.save_montage(montage, output_path, ocular_names, tag)
A class that represents a single event in a scan, making it easy to evaluate singular events. Required metadata is exposed as attributes, and optional metadata and features are stored as DataFrames.
Homogeneous transformation matrices for converting between scanner and slide coordinates. The matrices are 3x3, with the final column representing the translation in micrometers (um). For more information, see affine transformations.
Transformations are nominal, and accuracy is not guaranteed; this is due to imperfections in slides and alignment in the scanners. Units are in micrometers.
101 def get_scan_position(self) -> tuple[float, float]: 102 """ 103 Get the position of the event in the scanner's coordinate frame. 104 :return: the scan position of the event in micrometers (um). 105 """ 106 # Get overall pixel position 107 real_tile_height, real_tile_width = self.tile.scan.get_image_size() 108 pixel_x = self.x + (real_tile_width * self.tile.x) 109 pixel_y = self.y + (real_tile_height * self.tile.y) 110 # Convert to micrometers 111 x_um = pixel_x * self.tile.scan.pixel_size_um 112 y_um = pixel_y * self.tile.scan.pixel_size_um 113 # Add the scan's origin in the scanner frame 114 x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um 115 y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um 116 return x_um, y_um
Get the position of the event in the scanner's coordinate frame.
Returns
the scan position of the event in micrometers (um).
118 def get_slide_position(self) -> tuple[float, float]: 119 """ 120 Get the slide position of the event in micrometers (um). 121 :return: the slide position of the event. 122 """ 123 # Turn scan_position into a 3x1 vector 124 scan_position = self.get_scan_position() 125 scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]]) 126 127 # Multiply by the appropriate homogeneous matrix 128 if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value): 129 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7] 130 elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value): 131 transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER] 132 else: 133 raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.") 134 slide_position = np.matmul(transform, scan_position) 135 return float(slide_position[0][0]), float(slide_position[1][0])
Get the slide position of the event in micrometers (um).
Returns
the slide position of the event.
137 def crop( 138 self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True 139 ) -> list[np.ndarray]: 140 """ 141 Crop the event from the provided frame images. Use if you have already gotten 142 frame images; useful for cropping multiple events from the same frame image. 143 :param images: the frame images. 144 :param crop_size: the square size of the image crop to get for this event. 145 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 146 :return: image_size x image_size crops of the event in the provided frames. If 147 the event is too close to the edge, the crop will be smaller and not centered. 148 """ 149 # Convert a crop size in micrometers to pixels 150 if not in_pixels: 151 crop_size = round(crop_size / self.tile.scan.pixel_size_um) 152 image_height, image_width = 0, 0 153 for image in images: 154 if image_height == 0 and image_width == 0: 155 image_height, image_width = image.shape 156 else: 157 if image_height != image.shape[0] or image_width != image.shape[1]: 158 raise ValueError("All images must be the same size") 159 if image_height == 0 or image_width == 0: 160 raise ValueError("No images provided") 161 162 # Find the crop bounds 163 bounds = [ 164 self.x - (crop_size // 2) + 1, 165 self.y - (crop_size // 2) + 1, 166 self.x + math.ceil(crop_size / 2) + 1, 167 self.y + math.ceil(crop_size / 2) + 1, 168 ] 169 # Determine how much the bounds violate the image size 170 displacements = [ 171 max(0, -bounds[0]), 172 max(0, -bounds[1]), 173 max(0, bounds[2] - image_width), 174 max(0, bounds[3] - image_height), 175 ] 176 # Cap off the bounds 177 bounds = [ 178 max(0, bounds[0]), 179 max(0, bounds[1]), 180 min(image_width, bounds[2]), 181 min(image_height, bounds[3]), 182 ] 183 184 # Crop the images 185 crops = [] 186 for image in images: 187 # Create a blank image of the right size 188 crop = np.zeros((crop_size, crop_size), dtype=image.dtype) 189 190 # Insert the cropped image into the blank image, leaving a black buffer 191 # around the edges if the crop would go beyond the original image bounds 192 crop[ 193 displacements[1] : crop_size - displacements[3], 194 displacements[0] : crop_size - displacements[2], 195 ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]] 196 crops.append(crop) 197 return crops
Crop the event from the provided frame images. Use if you have already gotten frame images; useful for cropping multiple events from the same frame image.
Parameters
- images: the frame images.
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns
image_size x image_size crops of the event in the provided frames. If the event is too close to the edge, the crop will be smaller and not centered.
199 def get_crops( 200 self, 201 crop_size: int = 100, 202 in_pixels: bool = True, 203 input_path: str = None, 204 channels: Iterable[int | str] = None, 205 apply_gain: bool | Iterable[bool] = True, 206 ) -> list[np.ndarray]: 207 """ 208 Gets the frame images for this event and then crops the event from the images. 209 Convenient for retrieving a single event's crops, but less efficient when 210 retrieving multiple events from the same tile as it will reread the images. 211 :param crop_size: the square size of the image crop to get for this event. 212 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 213 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 214 :param channels: the channels to extract images for. Defaults to all channels. 215 :param apply_gain: whether to apply scanner-calculated gain to the images, if 216 not already applied. If a list, matches the channels. 217 :return: a list of cropped images from the scan in the order of the channels. 218 """ 219 # This function validates channels 220 frames = Frame.get_frames(self.tile, channels) 221 # Convert individual inputs to lists of appropriate length 222 if isinstance(apply_gain, bool): 223 apply_gain = [apply_gain] * len(frames) 224 images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)] 225 return self.crop(images, crop_size, in_pixels)
Gets the frame images for this event and then crops the event from the images. Convenient for retrieving a single event's crops, but less efficient when retrieving multiple events from the same tile as it will reread the images.
Parameters
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
- input_path: the path to the input images. Defaults to None (uses the scan's path).
- channels: the channels to extract images for. Defaults to all channels.
- apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
Returns
a list of cropped images from the scan in the order of the channels.
227 def save_crops( 228 self, 229 crops: Sequence[np.ndarray], 230 output_path: str, 231 labels: Sequence[str], 232 ext: str = "auto", 233 ): 234 """ 235 Save the crops to image files. 236 :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or 237 grayscale if 1 channel [h, w] or [h, w, 1]. 238 :param labels: the labels to append to the file name, usually the channel names 239 associated with each crop. 240 :param output_path: the folder to save the crops to. Will make if needed. 241 :param ext: the file extension to save the crops as. Defaults to "auto", which 242 will save as .tif for grayscale images and .jpg for RGB images. 243 :return: None 244 """ 245 if len(crops) != len(labels): 246 raise ValueError("Crops and labels must be the same length") 247 248 if csi_images is None or imageio is None: 249 raise ModuleNotFoundError( 250 "imageio libraries not installed! " 251 "run `pip install csi_images[imageio]` to resolve." 252 ) 253 254 os.makedirs(output_path, exist_ok=True) 255 256 for crop, label in zip(crops, labels): 257 if ext == "auto": 258 if len(crop.shape) == 2 or crop.shape[2] == 1: 259 file_extension = ".tif" 260 elif crop.shape[2] == 3: 261 file_extension = ".jpg" 262 else: 263 warnings.warn( 264 f"Image shape {crop.shape} not recognized; saving as .tif" 265 ) 266 file_extension = ".tif" 267 else: 268 file_extension = ext 269 file = os.path.join(output_path, f"{self}-{label}{file_extension}") 270 # TODO: add more file types here 271 if file_extension == ".tif": 272 imageio.imwrite(file, crop, compression="deflate") 273 elif file_extension in [".jpg", ".jpeg"]: 274 crop = csi_images.scale_bit_depth(crop, np.uint8) 275 imageio.imwrite(file, crop, quality=80) 276 else: 277 imageio.imwrite(file, crop)
Save the crops to image files.
Parameters
- crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or grayscale if 1 channel [h, w] or [h, w, 1].
- labels: the labels to append to the file name, usually the channel names associated with each crop.
- output_path: the folder to save the crops to. Will make if needed.
- ext: the file extension to save the crops as. Defaults to "auto", which will save as .tif for grayscale images and .jpg for RGB images.
Returns
None
279 def load_crops( 280 self, input_path: str, labels: list[str] = None 281 ) -> dict[str, np.ndarray]: 282 """ 283 Loads previously saved crop files from a folder. 284 :param input_path: folder containing crop files. 285 :param labels: optional label filter, will only return crops with these labels. 286 :return: a tuple of lists containing the crops and their labels. 287 """ 288 crops = {} 289 for file in glob.glob(os.path.join(input_path, f"{self}-*")): 290 label = os.path.splitext(os.path.basename(file))[0].split("-")[-1] 291 # Skip if we have labels to target 292 if labels is not None and label not in labels: 293 continue 294 crops[label] = imageio.imread(file) 295 return crops
Loads previously saved crop files from a folder.
Parameters
- input_path: folder containing crop files.
- labels: optional label filter, will only return crops with these labels.
Returns
a tuple of lists containing the crops and their labels.
297 def get_montage_channels( 298 self, 299 channels: Sequence[int | str] | None, 300 composites: dict[int | str, tuple[float, float, float]] | None, 301 ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]: 302 """ 303 Get the channel names for the montage from the event's tile. 304 :param channels: channel indices or names for grayscale channels 305 :param composites: dictionary of channel indices or names and RGB values 306 :return: (1) channel indices to retrieve, 307 (2) relative grayscale channel indices, and 308 (3) composite channel indices and RGB values. 309 """ 310 if channels is None: 311 channels = list(range(len(self.tile.scan.channels))) 312 if (len(channels) == 0) and (composites is None or len(composites) == 0): 313 raise ValueError("Must provide at least one channel type to montage") 314 315 channels_to_get = [] 316 317 # Build the list of channels to retrieve 318 if channels is not None: 319 if isinstance(channels[0], str): 320 channels = self.tile.scan.get_channel_indices(channels) 321 channels_to_get += channels 322 order = list(range(len(channels))) # Always the first n channels 323 else: 324 order = None 325 326 if composites is not None: 327 relative_composites = {} # Relative indices for retrieved channels 328 # Convert to scan indices 329 rgb_channels = list(composites.keys()) 330 if isinstance(rgb_channels[0], str): 331 rgb_channels = self.tile.scan.get_channel_indices(rgb_channels) 332 # Find the index or add to the end 333 for channel, rgb in zip(rgb_channels, composites.values()): 334 if channel not in channels_to_get: 335 channels_to_get.append(channel) 336 relative_composites[channel] = rgb 337 else: 338 relative_composites[channels_to_get.index(channel)] = rgb 339 else: 340 relative_composites = None 341 342 return channels_to_get, order, relative_composites
Get the channel names for the montage from the event's tile.
Parameters
- channels: channel indices or names for grayscale channels
- composites: dictionary of channel indices or names and RGB values
Returns
(1) channel indices to retrieve, (2) relative grayscale channel indices, and (3) composite channel indices and RGB values.
344 def get_montage( 345 self, 346 channels: Sequence[int | str] = None, 347 composites: dict[int | str, tuple[float, float, float]] = None, 348 mask: np.ndarray[np.uint8] = None, 349 labels: Sequence[str] = None, 350 crop_size: int = 100, 351 in_pixels: bool = True, 352 input_path: str = None, 353 apply_gain: bool = True, 354 **kwargs, 355 ) -> np.ndarray: 356 """ 357 Convenience function for getting frame images and creating a montage. Mirrors 358 csi_images.make_montage(). Convenient for a single event's montage, but less 359 efficient when for multiple events from the same tile. 360 :param channels: the channels to use for black-and-white montages. 361 :param composites: dictionary of indices and RGB tuples for a composite. 362 :param mask: a mask to apply to the montage. Must be the same size as the crop. 363 :param crop_size: the square size of the image crop to get for this event. 364 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 365 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 366 :param apply_gain: whether to apply scanner-calculated gain to the images, if 367 not already applied. If a list, matches the channels. 368 :param kwargs: montage options. See csi_images.make_montage() for more details. 369 :return: numpy array representing the montage. 370 """ 371 channels, order, composites = self.get_montage_channels(channels, composites) 372 images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain) 373 return csi_images.make_montage( 374 images, order, composites, mask, labels, **kwargs 375 )
Convenience function for getting frame images and creating a montage. Mirrors csi_images.make_montage(). Convenient for a single event's montage, but less efficient when for multiple events from the same tile.
Parameters
- channels: the channels to use for black-and-white montages.
- composites: dictionary of indices and RGB tuples for a composite.
- mask: a mask to apply to the montage. Must be the same size as the crop.
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
- input_path: the path to the input images. Defaults to None (uses the scan's path).
- apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
- kwargs: montage options. See csi_images.make_montage() for more details.
Returns
numpy array representing the montage.
377 def save_montage( 378 self, 379 montage: np.ndarray, 380 output_path: str, 381 ocular_names: bool = False, 382 tag: str = "", 383 file_extension: str = ".jpeg", 384 **kwargs, 385 ): 386 """ 387 Save the montage as a JPEG image with a set name. 388 :param montage: the montage to save. 389 :param output_path: the folder to save the montage in. Will make if needed. 390 :param ocular_names: whether to use the OCULAR naming convention. 391 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 392 :param file_extension: the file extension to save the montage as. Defaults to .jpeg. 393 :param kwargs: additional arguments to pass to imageio.imwrite(). 394 :return: None 395 """ 396 if csi_images is None or imageio is None: 397 raise ModuleNotFoundError( 398 "imageio libraries not installed! " 399 "run `pip install csi_images[imageio]` to resolve." 400 ) 401 402 montage = csi_images.scale_bit_depth(montage, np.uint8) 403 404 if not file_extension.startswith("."): 405 file_extension = f".{file_extension}" 406 407 if ocular_names: 408 if "cell_id" not in self.metadata.index: 409 raise ValueError( 410 "Event metadata must include 'cell_id' for OCULAR naming." 411 ) 412 file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}" 413 else: 414 file = f"{self}{tag}{file_extension}" 415 416 os.makedirs(output_path, exist_ok=True) 417 imageio.imwrite(os.path.join(output_path, file), montage, **kwargs)
Save the montage as a JPEG image with a set name.
Parameters
- montage: the montage to save.
- output_path: the folder to save the montage in. Will make if needed.
- ocular_names: whether to use the OCULAR naming convention.
- tag: a tag to append to the file name. Ignored if ocular_names is True.
- file_extension: the file extension to save the montage as. Defaults to .jpeg.
- kwargs: additional arguments to pass to imageio.imwrite().
Returns
None
419 def load_montage(self, input_path: str, tag: str = "") -> np.ndarray: 420 """ 421 Loads the montage from a file saved by Event.save_montage. 422 :param input_path: the path to the folder where the montage was saved. 423 :param tag: a string to add to the file name, before the extension. 424 :return: 425 """ 426 file = f"{self}{tag}.jpeg" 427 return imageio.imread(os.path.join(input_path, file))
Loads the montage from a file saved by Event.save_montage.
Parameters
- input_path: the path to the folder where the montage was saved.
- tag: a string to add to the file name, before the extension.
Returns
429 @classmethod 430 def get_many_crops( 431 cls, 432 events: Sequence[Self], 433 crop_size: int | Sequence[int] = 100, 434 in_pixels: bool = True, 435 input_path: str | Sequence[str] = None, 436 channels: Sequence[int | str] = None, 437 apply_gain: bool | Sequence[bool] = True, 438 ) -> list[list[np.ndarray]]: 439 """ 440 Get the crops for a list of events, ensuring that there is no wasteful reading 441 of the same tile multiple times. This function is more efficient than calling 442 get_crops() for each event. 443 :param events: the events to get crops for. 444 :param crop_size: the square size of the image crop to get for this event. 445 Defaults to four times the size of the event. 446 :param in_pixels: whether the crop size is in pixels or micrometers. 447 Defaults to pixels, and is ignored if crop_size is None. 448 :param input_path: the path to the input images. Will only work for lists of events 449 from the same scan. Defaults to None (uses the scan's path). 450 :param channels: the channels to extract images for. Defaults to all channels. 451 :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True. 452 Can be supplied as a list to apply gain to individual channels. 453 :return: a list of lists of cropped images for each event. 454 """ 455 if len(events) == 0: 456 return [] 457 # Adapt singular inputs to lists of appropriate length 458 if isinstance(crop_size, int): 459 crop_size = [crop_size] * len(events) 460 if input_path is None or isinstance(input_path, str): 461 input_path = [input_path] * len(events) 462 463 # Get the order of the events when sorted by slide/tile 464 order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 465 466 # Allocate the list to size 467 crops = [[]] * len(events) 468 last_tile = None 469 images = None # Holds large numpy arrays, so expensive to compare 470 # Iterate through in slide/tile sorted order 471 for i in order: 472 if last_tile != events[i].tile: 473 # Gather the frame images, preserving them for the next event 474 frames = Frame.get_frames(events[i].tile, channels) 475 if isinstance(apply_gain, bool): 476 apply = [apply_gain] * len(frames) 477 else: 478 apply = apply_gain 479 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 480 last_tile = events[i].tile 481 # Use the frame images to crop the event images 482 crops[i] = events[i].crop(images, crop_size[i], in_pixels) 483 return crops
Get the crops for a list of events, ensuring that there is no wasteful reading of the same tile multiple times. This function is more efficient than calling get_crops() for each event.
Parameters
- events: the events to get crops for.
- crop_size: the square size of the image crop to get for this event. Defaults to four times the size of the event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels, and is ignored if crop_size is None.
- input_path: the path to the input images. Will only work for lists of events from the same scan. Defaults to None (uses the scan's path).
- channels: the channels to extract images for. Defaults to all channels.
- apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True. Can be supplied as a list to apply gain to individual channels.
Returns
a list of lists of cropped images for each event.
485 @classmethod 486 def get_many_montages( 487 cls, 488 events: Sequence[Self], 489 channels: Sequence[int | str] = None, 490 composites: dict[int | str, tuple[float, float, float]] = None, 491 masks: Sequence[np.ndarray[np.uint8]] = None, 492 labels: Sequence[str] = None, 493 crop_size: int = 100, 494 in_pixels: bool = True, 495 input_path: str = None, 496 apply_gain: bool | Iterable[bool] = True, 497 **kwargs, 498 ) -> list[np.ndarray]: 499 """ 500 Convenience function for get_montage(), but for a list of events. More efficient 501 than get_montage() when working with multiple events from the same tile. 502 :param events: a list of Event objects. 503 :param channels: the channels to extract images for. Defaults to all channels. 504 :param composites: dictionary of indices and RGB tuples for a composite. 505 :param masks: a list of masks to apply to the montages. Must be the same size as the crops. 506 :param labels: the labels to subtitle montage images, usually the channel names 507 :param crop_size: the square size of the image crop to get for this event. 508 :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels. 509 :param input_path: the path to the input images. Defaults to None (uses the scan's path). 510 :param apply_gain: whether to apply scanner-calculated gain to the images, if 511 not already applied. If a list, matches the channels. 512 :param kwargs: montage options. See csi_images.make_montage() for more details. 513 :return: a list of numpy arrays representing the montages. 514 """ 515 if len(events) == 0: 516 return [] 517 # Adapt singular inputs to lists of appropriate length 518 if isinstance(crop_size, int): 519 crop_size = [crop_size] * len(events) 520 if input_path is None or isinstance(input_path, str): 521 input_path = [input_path] * len(events) 522 if masks is None or isinstance(masks, np.ndarray): 523 masks = [masks] * len(events) 524 525 # Get the order of the events when sorted by slide/tile 526 event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__())) 527 528 # Allocate the list to size 529 montages = [np.empty(0)] * len(events) 530 # Placeholder variables to avoid rereading the same tile 531 images = None # Holds large numpy arrays, so expensive to compare 532 order = None 533 rel_composites = None 534 last_tile = None 535 # Iterate through in slide/tile sorted order 536 for i in event_order: 537 if last_tile != events[i].tile: 538 channels_to_get, order, rel_composites = events[i].get_montage_channels( 539 channels, composites 540 ) 541 # Gather the frame images, preserving them for the next event 542 frames = Frame.get_frames(events[i].tile, channels_to_get) 543 if isinstance(apply_gain, bool): 544 apply = [apply_gain] * len(frames) 545 else: 546 apply = apply_gain 547 images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)] 548 last_tile = events[i].tile 549 # Use the frame images to crop the event images and make montages 550 crops = events[i].crop(images, crop_size[i], in_pixels) 551 montages[i] = csi_images.make_montage( 552 crops, order, rel_composites, masks[i], labels, **kwargs 553 ) 554 555 return montages
Convenience function for get_montage(), but for a list of events. More efficient than get_montage() when working with multiple events from the same tile.
Parameters
- events: a list of Event objects.
- channels: the channels to extract images for. Defaults to all channels.
- composites: dictionary of indices and RGB tuples for a composite.
- masks: a list of masks to apply to the montages. Must be the same size as the crops.
- labels: the labels to subtitle montage images, usually the channel names
- crop_size: the square size of the image crop to get for this event.
- in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
- input_path: the path to the input images. Defaults to None (uses the scan's path).
- apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
- kwargs: montage options. See csi_images.make_montage() for more details.
Returns
a list of numpy arrays representing the montages.
557 @classmethod 558 def get_and_save_many_crops( 559 cls, 560 events: list[Self], 561 output_path: str, 562 labels: Sequence[str], 563 ext: str = "auto", 564 additional_gain: Sequence[float] = None, 565 **kwargs, 566 ) -> None: 567 """ 568 Get and save the crops for a list of events, ensuring that there is no wasteful 569 reading and limiting the image data in memory to 1 tile at a time. This function 570 is more efficient that chaining get_crops() and save_crops() for each event or 571 get_many_crops() and then save_crops(). 572 :param events: list of events to get, crop, and save. 573 :param output_path: the folder to save the crops in. Will make if needed. 574 :param labels: the labels to save the crops with. See save_crops(). 575 :param ext: the file extension to save the crops as. See save_crops(). 576 :param additional_gain: additional gain to apply to the crops. If not None, must 577 match the length of the number of crop channels. 578 :param kwargs: see get_many_crops() for more parameters. 579 :return: 580 """ 581 unique_tiles = set([event.tile for event in events]) 582 583 for tile in unique_tiles: 584 # Get one tile's worth of event crops 585 tile_events = [e for e in events if e.tile == tile] 586 crops_list = cls.get_many_crops(tile_events, **kwargs) 587 for event, crops in zip(tile_events, crops_list): 588 # Apply any additional gains 589 if additional_gain is not None: 590 crops = [gain * crop for gain, crop in zip(additional_gain, crops)] 591 event.save_crops(crops, output_path, labels, ext)
Get and save the crops for a list of events, ensuring that there is no wasteful reading and limiting the image data in memory to 1 tile at a time. This function is more efficient that chaining get_crops() and save_crops() for each event or get_many_crops() and then save_crops().
Parameters
- events: list of events to get, crop, and save.
- output_path: the folder to save the crops in. Will make if needed.
- labels: the labels to save the crops with. See save_crops().
- ext: the file extension to save the crops as. See save_crops().
- additional_gain: additional gain to apply to the crops. If not None, must match the length of the number of crop channels.
- kwargs: see get_many_crops() for more parameters.
Returns
593 @classmethod 594 def get_and_save_many_montages( 595 cls, 596 events: list[Self], 597 output_path: str, 598 ocular_names: bool = False, 599 tag: str = "", 600 **kwargs, 601 ) -> None: 602 """ 603 Save montages of the events to image files. 604 :param events: the events to get, montage, and save. 605 :param output_path: the folder to save the montages to. Will make if needed. 606 :param ocular_names: whether to use the OCULAR naming convention. 607 :param tag: a tag to append to the file name. Ignored if ocular_names is True. 608 :param kwargs: see get_many_montages() for more parameters. 609 """ 610 unique_tiles = set([event.tile for event in events]) 611 612 for tile in unique_tiles: 613 # Get one tile's worth of event crops 614 tile_events = [e for e in events if e.tile == tile] 615 montages = cls.get_many_montages(tile_events, **kwargs) 616 for event, montage in zip(tile_events, montages): 617 event.save_montage(montage, output_path, ocular_names, tag)
Save montages of the events to image files.
Parameters
- events: the events to get, montage, and save.
- output_path: the folder to save the montages to. Will make if needed.
- ocular_names: whether to use the OCULAR naming convention.
- tag: a tag to append to the file name. Ignored if ocular_names is True.
- kwargs: see get_many_montages() for more parameters.
620class EventArray: 621 """ 622 A class that holds a large number of events' data, making it easy to analyze and 623 manipulate many events at once. A more separated version of the Event class. 624 """ 625 626 INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y"] 627 628 def __init__( 629 self, 630 info: pd.DataFrame = None, 631 metadata: pd.DataFrame = None, 632 features: pd.DataFrame = None, 633 ): 634 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y" 635 if info is not None: 636 # Special case: "roi" is often not required, so we'll fill in if its missing 637 if "roi" not in info.columns: 638 info["roi"] = 0 639 if set(info.columns) != set(self.INFO_COLUMNS): 640 raise ValueError( 641 f"EventArray.info must have columns:" 642 f"{self.INFO_COLUMNS}; had {list(info.columns)}" 643 ) 644 # Copy first to avoid modifying the original 645 info = info.copy() 646 # Ensure that the columns are the right types 647 info["slide_id"] = info["slide_id"].astype(str) 648 info["tile"] = info["tile"].astype(np.uint16) 649 info["roi"] = info["roi"].astype(np.uint8) 650 info["x"] = info["x"].round().astype(np.uint16) 651 info["y"] = info["y"].round().astype(np.uint16) 652 # Ensure that the columns are in the right order 653 info = info[self.INFO_COLUMNS] 654 # All DataFrames must all have the same number of rows 655 if metadata is not None and (info is None or len(info) != len(metadata)): 656 raise ValueError( 657 "If EventArray.metadata is not None, it should match rows with .info" 658 ) 659 if features is not None and (info is None or len(info) != len(features)): 660 raise ValueError( 661 "If EventArray.features is not None, it should match rows with .info" 662 ) 663 # No columns named "metadata_", "features_", or "None" 664 column_names = [] 665 if metadata is not None: 666 column_names += metadata.columns.tolist() 667 if features is not None: 668 column_names += features.columns.tolist() 669 if any([col.lower().startswith("metadata_") for col in column_names]): 670 raise ValueError("EventArray column names cannot start with 'metadata_'") 671 if any([col.lower().startswith("features_") for col in column_names]): 672 raise ValueError("EventArray column names cannot start with 'features_'") 673 if any([col.lower() == "none" for col in column_names]): 674 raise ValueError("EventArray column names cannot be 'none'") 675 676 self.info = info 677 self.metadata = metadata 678 self.features = features 679 680 def __len__(self) -> int: 681 # Convenience method to get the number of events 682 if self.info is None: 683 return 0 684 else: 685 return len(self.info) 686 687 def __eq__(self, other): 688 # Parse all possibilities for info 689 if isinstance(self.info, pd.DataFrame): 690 if isinstance(other.info, pd.DataFrame): 691 if not self.info.equals(other.info): 692 return False 693 else: 694 return False 695 elif self.info is None: 696 if other.info is not None: 697 return False 698 699 # Parse all possibilities for metadata 700 if isinstance(self.metadata, pd.DataFrame): 701 if isinstance(other.metadata, pd.DataFrame): 702 is_equal = self.metadata.equals(other.metadata) 703 if not is_equal: 704 return False 705 else: 706 return False 707 elif self.metadata is None: 708 if other.metadata is not None: 709 return False 710 711 # Parse all possibilities for features 712 if isinstance(self.features, pd.DataFrame): 713 if isinstance(other.features, pd.DataFrame): 714 is_equal = self.features.equals(other.features) 715 if not is_equal: 716 return False 717 else: 718 return False 719 elif self.features is None: 720 if other.features is not None: 721 return False 722 723 return is_equal 724 725 def get_sort_order( 726 self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True 727 ): 728 """ 729 Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames. 730 :param by: name of the column(s) to sort by. 731 :param ascending: whether to sort in ascending order; can be a list to match by 732 :return: the order of the indices to sort by. 733 """ 734 columns = self.get(by) 735 return columns.sort_values(by=by, ascending=ascending).index 736 737 def sort( 738 self, 739 by: Hashable | Sequence[Hashable], 740 ascending: bool | Sequence[bool] = True, 741 ) -> Self: 742 """ 743 Sort the EventArray by column(s) in the info, metadata, or features DataFrames. 744 :param by: name of the column(s) to sort by. 745 :param ascending: whether to sort in ascending order; can be a list to match by 746 :return: a new, sorted EventArray. 747 """ 748 order = self.get_sort_order(by, ascending) 749 info = self.info.loc[order].reset_index(drop=True) 750 if self.metadata is not None: 751 metadata = self.metadata.loc[order].reset_index(drop=True) 752 else: 753 metadata = None 754 if self.features is not None: 755 features = self.features.loc[order].reset_index(drop=True) 756 else: 757 features = None 758 return EventArray(info, metadata, features) 759 760 def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame: 761 """ 762 Get a DataFrame with the specified columns from the EventArray, by value. 763 :param column_names: the names of the columns to get. 764 :return: a DataFrame with the specified columns. 765 """ 766 if isinstance(column_names, Hashable): 767 column_names = [column_names] # Drop into a list for the loop 768 columns = [] 769 for column_name in column_names: 770 if column_name in self.info.columns: 771 columns.append(self.info[column_name]) 772 elif self.metadata is not None and column_name in self.metadata.columns: 773 columns.append(self.metadata[column_name]) 774 elif self.features is not None and column_name in self.features.columns: 775 columns.append(self.features[column_name]) 776 else: 777 raise ValueError(f"Column {column_name} not found in EventArray") 778 return pd.concat(columns, axis=1) 779 780 def rows(self, rows: Sequence[Hashable]) -> Self: 781 """ 782 Get a subset of the EventArray rows based on a boolean or integer index, by value. 783 :param rows: row labels, indices, or boolean mask; anything for .loc[] 784 :return: a new EventArray with the subset of events. 785 """ 786 info = self.info.loc[rows].reset_index(drop=True) 787 if self.metadata is not None: 788 metadata = self.metadata.loc[rows].reset_index(drop=True) 789 else: 790 metadata = None 791 if self.features is not None: 792 features = self.features.loc[rows].reset_index(drop=True) 793 else: 794 features = None 795 return EventArray(info, metadata, features) 796 797 def copy(self) -> Self: 798 """ 799 Create a deep copy of the EventArray. 800 :return: a deep copy of the EventArray. 801 """ 802 return EventArray( 803 info=self.info.copy(), 804 metadata=None if self.metadata is None else self.metadata.copy(), 805 features=None if self.features is None else self.features.copy(), 806 ) 807 808 # TODO: add a "filter" convenience function that takes a column name and values to filter by 809 810 def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None: 811 """ 812 Add metadata to the EventArray. Removes the need to check if metadata is None. 813 Overwrites any existing metadata with the same column names as the new metadata. 814 :param new_metadata: the metadata to add. 815 """ 816 if len(self) != len(new_metadata): 817 raise ValueError("New metadata must match length of existing info") 818 819 if self.metadata is None: 820 self.metadata = new_metadata 821 else: 822 if isinstance(new_metadata, pd.Series): 823 self.metadata[new_metadata.name] = new_metadata 824 else: 825 # It's a DataFrame 826 self.metadata[new_metadata.columns] = new_metadata 827 828 def add_features(self, new_features: pd.Series | pd.DataFrame) -> None: 829 """ 830 Add features to the EventArray. Removes the need to check if features is None. 831 Overwrites any existing features with the same column names as the new features. 832 :param new_features: the features to add. 833 """ 834 if len(self) != len(new_features): 835 raise ValueError("New features must match length of existing info") 836 837 if self.features is None: 838 self.features = new_features 839 else: 840 if isinstance(new_features, pd.Series): 841 self.features[new_features.name] = new_features 842 else: 843 # It's a DataFrame 844 self.features[new_features.columns] = new_features 845 846 @classmethod 847 def merge(cls, events: Iterable[Self]) -> Self: 848 """ 849 Combine EventArrays in a list into a single EventArray. 850 :param events: the new list of events. 851 """ 852 all_info = [] 853 all_metadata = [] 854 all_features = [] 855 for event_array in events: 856 # Skip empty EventArrays 857 if event_array.info is not None: 858 all_info.append(event_array.info) 859 if event_array.metadata is not None: 860 all_metadata.append(event_array.metadata) 861 if event_array.features is not None: 862 all_features.append(event_array.features) 863 if len(all_info) == 0: 864 return EventArray() 865 else: 866 all_info = pd.concat(all_info, ignore_index=True) 867 if len(all_metadata) == 0: 868 all_metadata = None 869 else: 870 all_metadata = pd.concat(all_metadata, ignore_index=True) 871 if len(all_features) == 0: 872 all_features = None 873 else: 874 all_features = pd.concat(all_features, ignore_index=True) 875 876 return EventArray(all_info, all_metadata, all_features) 877 878 def to_events( 879 self, 880 scans: Scan | Iterable[Scan], 881 ignore_missing_scans=True, 882 ignore_metadata=False, 883 ignore_features=False, 884 ) -> list[Event]: 885 """ 886 Get the events in the EventArray as a list of events. Returns [] if empty. 887 :param scans: the scans that the events belong to, auto-matched by slide_id. 888 Pass None if you don't care about scan metadata (pass ignore_missing_scans). 889 :param ignore_missing_scans: whether to create blank scans for events without scans. 890 :param ignore_metadata: whether to ignore metadata or not 891 :param ignore_features: whether to ignore features or not 892 :return: 893 """ 894 if len(self) == 0: 895 return [] 896 if isinstance(scans, Scan): 897 scans = [scans] 898 scans = {scan.slide_id: scan for scan in scans} 899 events = [] 900 for i in range(len(self.info)): 901 # Determine the associated scan 902 slide_id = self.info["slide_id"][i] 903 if slide_id not in scans: 904 if ignore_missing_scans: 905 # Create a placeholder scan if the scan is missing 906 scan = Scan.make_placeholder( 907 slide_id, 908 self.info["tile"][i], 909 self.info["roi"][i], 910 ) 911 else: 912 raise ValueError( 913 f"Scan {self.info['slide_id'][i]} not found for event {i}." 914 ) 915 else: 916 scan = scans[slide_id] 917 918 # Prepare the metadata and features 919 if ignore_metadata or self.metadata is None: 920 metadata = None 921 else: 922 # This Series creation method is less efficient, 923 # but required for preserving dtypes 924 metadata = pd.Series( 925 {col: self.metadata.loc[i, col] for col in self.metadata.columns}, 926 dtype=object, 927 ) 928 if ignore_features or self.features is None: 929 features = None 930 else: 931 features = pd.Series( 932 {col: self.features.loc[i, col] for col in self.features.columns}, 933 dtype=object, 934 ) 935 # Create the event and append it to the list 936 events.append( 937 Event( 938 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 939 self.info["x"][i], 940 self.info["y"][i], 941 metadata=metadata, 942 features=features, 943 ) 944 ) 945 return events 946 947 @classmethod 948 def from_events(cls, events: Iterable[Event]) -> Self: 949 """ 950 Set the events in the EventArray to a new list of events. 951 :param events: the new list of events. 952 """ 953 info = pd.DataFrame( 954 { 955 "slide_id": [event.tile.scan.slide_id for event in events], 956 "tile": [event.tile.n for event in events], 957 "roi": [event.tile.n_roi for event in events], 958 "x": [event.x for event in events], 959 "y": [event.y for event in events], 960 } 961 ) 962 metadata_list = [event.metadata for event in events] 963 # Iterate through and ensure that all metadata is the same shape 964 for metadata in metadata_list: 965 if type(metadata) != type(metadata_list[0]): 966 raise ValueError("All metadata must be the same type.") 967 if metadata is not None and metadata.shape != metadata_list[0].shape: 968 raise ValueError("All metadata must be the same shape.") 969 if metadata_list[0] is None: 970 metadata = None 971 else: 972 metadata = pd.DataFrame(metadata_list) 973 features_list = [event.features for event in events] 974 # Iterate through and ensure that all features are the same shape 975 for features in features_list: 976 if type(features) != type(features_list[0]): 977 raise ValueError("All features must be the same type.") 978 if features is not None and features.shape != features_list[0].shape: 979 raise ValueError("All features must be the same shape.") 980 if features_list[0] is None: 981 features = None 982 else: 983 features = pd.DataFrame(features_list) 984 return EventArray(info=info, metadata=metadata, features=features) 985 986 def to_dataframe(self) -> pd.DataFrame: 987 """ 988 Convert all the data in the EventArray to a single DataFrame. 989 :return: a DataFrame with all the data in the EventArray. 990 """ 991 # Make a copy of the info DataFrame and prepend "info_" to the column names 992 output = self.info.copy() 993 # Combine with the metadata and prepend "metadata_" to the column names 994 if self.metadata is not None: 995 metadata = self.metadata.copy() 996 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 997 output = pd.concat([output, metadata], axis=1) 998 # Combine with the features and prepend "features_" to the column names 999 if self.features is not None: 1000 features = self.features.copy() 1001 features.columns = [f"features_{col}" for col in features.columns] 1002 output = pd.concat([output, features], axis=1) 1003 return output 1004 1005 @classmethod 1006 def from_dataframe( 1007 cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_" 1008 ) -> Self: 1009 """ 1010 From a single, special DataFrame, create an EventArray. 1011 :param df: the DataFrame to convert to an EventArray. 1012 :param metadata_prefix: the prefix for metadata columns. 1013 :param features_prefix: the prefix for features columns. 1014 :return: a DataFrame with all the data in the EventArray. 1015 """ 1016 # Split the columns into info, metadata, and features and strip prefix 1017 info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy() 1018 if info.size == 0: 1019 info = None 1020 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 1021 metadata.columns = [ 1022 col.replace(metadata_prefix, "") for col in metadata.columns 1023 ] 1024 if metadata.size == 0: 1025 metadata = None 1026 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 1027 features.columns = [ 1028 col.replace(features_prefix, "") for col in features.columns 1029 ] 1030 if features.size == 0: 1031 features = None 1032 return cls(info=info, metadata=metadata, features=features) 1033 1034 @classmethod 1035 def from_mask( 1036 cls, 1037 mask: np.ndarray, 1038 tile: Tile, 1039 include_cell_id: bool = True, 1040 images: list[np.ndarray] = None, 1041 image_labels: list[str] = None, 1042 properties: list[str] = None, 1043 ) -> Self: 1044 """ 1045 Extract events from a mask DataFrame, including metadata and features. 1046 :param mask: the mask to extract events from. 1047 :param tile: the Tile object associated with this mask. 1048 :param include_cell_id: whether to include the cell_id, or numerical 1049 mask label, as metadata in the EventArray. 1050 :param images: the intensity images to extract features from. 1051 :param image_labels: the labels for the intensity images. 1052 :param properties: list of properties to extract in addition to the defaults: 1053 :return: EventArray corresponding to the mask labels. 1054 """ 1055 if csi_images is None: 1056 raise ModuleNotFoundError( 1057 "imageio libraries not installed! " 1058 "run `pip install csi_images[imageio]` to resolve." 1059 ) 1060 # Gather mask_info 1061 if images is not None and image_labels is not None: 1062 if len(images) != len(image_labels): 1063 raise ValueError("Intensity images and labels must match lengths.") 1064 1065 mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties) 1066 1067 if len(mask_info) == 0: 1068 return EventArray() 1069 1070 # Combine provided info and mask info 1071 info = pd.DataFrame( 1072 { 1073 "slide_id": tile.scan.slide_id, 1074 "tile": tile.n, 1075 "roi": tile.n_roi, 1076 "x": mask_info["x"], 1077 "y": mask_info["y"], 1078 }, 1079 ) 1080 # Extract a metadata column if desired 1081 if include_cell_id: 1082 metadata = pd.DataFrame({"cell_id": mask_info["id"]}) 1083 else: 1084 metadata = None 1085 # If any additional properties were extracted, add them as features 1086 mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore") 1087 if len(mask_info.columns) > 0: 1088 features = mask_info 1089 features.columns = [col.lower() for col in features.columns] 1090 else: 1091 features = None 1092 return EventArray(info, metadata, features) 1093 1094 def save_csv(self, output_path: str) -> bool: 1095 """ 1096 Save the events to an CSV file, including metadata and features. 1097 :param output_path: 1098 :return: 1099 """ 1100 if not output_path.endswith(".csv"): 1101 output_path += ".csv" 1102 self.to_dataframe().to_csv(output_path, index=False) 1103 return os.path.exists(output_path) 1104 1105 @classmethod 1106 def load_csv( 1107 cls, 1108 input_path: str, 1109 metadata_prefix: str = "metadata_", 1110 features_prefix: str = "features_", 1111 ) -> Self: 1112 """ 1113 Load the events from an CSV file, including metadata and features. 1114 :param input_path: 1115 :param metadata_prefix: 1116 :param features_prefix: 1117 :return: 1118 """ 1119 # Load the CSV file 1120 df = pd.read_csv(input_path) 1121 return cls.from_dataframe(df, metadata_prefix, features_prefix) 1122 1123 def save_hdf5(self, output_path: str) -> bool: 1124 """ 1125 Save the events to an HDF5 file, including metadata and features. 1126 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 1127 though these files are slightly harder to view in HDFView or similar. 1128 :param output_path: 1129 :return: 1130 """ 1131 if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"): 1132 output_path += ".hdf5" 1133 # Open the output_path as an HDF5 file 1134 with pd.HDFStore(output_path) as store: 1135 # Store the dataframes in the HDF5 file 1136 if self.info is not None: 1137 store.put("info", self.info, index=False) 1138 if self.metadata is not None: 1139 store.put("metadata", self.metadata, index=False) 1140 if self.features is not None: 1141 store.put("features", self.features, index=False) 1142 return os.path.exists(output_path) 1143 1144 @classmethod 1145 def load_hdf5(cls, input_path: str) -> Self: 1146 """ 1147 Load the events from an HDF5 file, including metadata and features. 1148 :param input_path: 1149 :return: 1150 """ 1151 # Open the input_path as an HDF5 file 1152 with pd.HDFStore(input_path, "r") as store: 1153 # Load the dataframes from the HDF5 file 1154 info = store.get("info") if "info" in store else None 1155 metadata = store.get("metadata") if "metadata" in store else None 1156 features = store.get("features") if "features" in store else None 1157 return cls(info=info, metadata=metadata, features=features) 1158 1159 def save_ocular(self, output_path: str, event_type: str = "cells"): 1160 """ 1161 Save the events to an OCULAR file. Relies on the dataframe originating 1162 from an OCULAR file (same columns; duplicate metadata/info). 1163 :param output_path: 1164 :param event_type: 1165 :return: 1166 """ 1167 if pyreadr is None: 1168 raise ModuleNotFoundError( 1169 "pyreadr not installed! Install pyreadr directly " 1170 "or run `pip install csi-images[rds]` option to resolve." 1171 ) 1172 if event_type == "cells": 1173 file_stub = "rc-final" 1174 elif event_type == "others": 1175 file_stub = "others-final" 1176 else: 1177 raise ValueError("Invalid event type. Must be cells or others.") 1178 1179 # Ensure good metadata 1180 metadata = pd.DataFrame( 1181 { 1182 "slide_id": self.info["slide_id"], 1183 "frame_id": self.info["tile"] + 1, # Convert to 1-indexed for R 1184 "cell_id": ( 1185 self.metadata["cell_id"] 1186 if "cell_id" in self.metadata.columns 1187 else range(len(self.info)) 1188 ), 1189 "cellx": self.info["x"], 1190 "celly": self.info["y"], 1191 } 1192 ) 1193 if self.metadata is not None: 1194 metadata[self.metadata.columns] = self.metadata.copy() 1195 1196 # Check for the "ocular_interesting" column 1197 if event_type == "cells": 1198 if "ocular_interesting" in metadata.columns: 1199 interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool) 1200 elif "hcpc" in metadata.columns: 1201 # Interesting cells don't get an hcpc designation, leaving them as -1 1202 interesting_rows = ( 1203 metadata["hcpc"].to_numpy() == -1 1204 ) # interesting cells 1205 else: 1206 interesting_rows = [] 1207 if sum(interesting_rows) > 0: 1208 # Split the metadata into interesting and regular 1209 interesting_events = self.rows(interesting_rows) 1210 interesting_df = pd.concat( 1211 [interesting_events.features, interesting_events.metadata], axis=1 1212 ) 1213 data_events = self.rows(~interesting_rows) 1214 data_df = pd.concat( 1215 [data_events.features, data_events.metadata], axis=1 1216 ) 1217 data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore") 1218 1219 # Drop particular columns for "interesting" 1220 interesting_df = interesting_df.drop( 1221 [ 1222 "clust", 1223 "hcpc", 1224 "frame_id", 1225 "cell_id", 1226 "unique_id", 1227 "ocular_interesting", 1228 ], 1229 axis=1, 1230 errors="ignore", 1231 ) 1232 # Save both .csv and .rds 1233 interesting_stub = os.path.join(output_path, "ocular_interesting") 1234 interesting_df.to_csv(f"{interesting_stub}.csv") 1235 # Suppress pandas FutureWarning 1236 with warnings.catch_warnings(): 1237 warnings.simplefilter(action="ignore", category=FutureWarning) 1238 pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df) 1239 else: 1240 data_df = pd.concat([self.features, metadata], axis=1) 1241 else: 1242 # Get all data and reset_index (will copy it) 1243 data_df = pd.concat([self.features, metadata], axis=1) 1244 1245 # Split based on cluster number to conform to *-final[1-4].rds 1246 n_clusters = max(data_df["clust"]) + 1 1247 split_idx = [round(i * n_clusters / 4) for i in range(5)] 1248 for i in range(4): 1249 subset = (split_idx[i] <= data_df["clust"]) & ( 1250 data_df["clust"] < split_idx[i + 1] 1251 ) 1252 data_df.loc[subset, "hcpc"] = i + 1 1253 subset = data_df[subset].reset_index(drop=True) 1254 # Suppress pandas FutureWarning 1255 with warnings.catch_warnings(): 1256 warnings.simplefilter(action="ignore", category=FutureWarning) 1257 pyreadr.write_rds( 1258 os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset 1259 ) 1260 1261 # Create new example cell strings 1262 data_df["example_cell_id"] = ( 1263 data_df["slide_id"] 1264 + " " 1265 + data_df["frame_id"].astype(str) 1266 + " " 1267 + data_df["cell_id"].astype(str) 1268 + " " 1269 + data_df["cellx"].astype(int).astype(str) 1270 + " " 1271 + data_df["celly"].astype(int).astype(str) 1272 ) 1273 # Find averagable data columns 1274 if "cellcluster_id" in data_df.columns: 1275 end_idx = data_df.columns.get_loc("cellcluster_id") 1276 else: 1277 end_idx = data_df.columns.get_loc("slide_id") 1278 avg_cols = data_df.columns[:end_idx].tolist() 1279 # Group by cluster and average 1280 data_df = data_df.groupby("clust").agg( 1281 **{col: (col, "mean") for col in avg_cols}, 1282 count=("clust", "size"), # count rows in each cluster 1283 example_cells=("example_cell_id", lambda x: ",".join(x)), 1284 hcpc=("hcpc", lambda x: x.iloc[0]), 1285 ) 1286 data_df = data_df.reset_index() # Do NOT drop, index is "clust" 1287 # Create new columns 1288 metadata = pd.DataFrame( 1289 { 1290 "count": data_df["count"], 1291 "example_cells": data_df["example_cells"], 1292 "clust": data_df["clust"].astype(int), 1293 "hcpc": data_df["hcpc"].astype(int), 1294 "id": data_df["clust"].astype(int).astype(str), 1295 "cccluster": "0", # Dummy value 1296 "ccdistance": 0.0, # Dummy value 1297 "rownum": list(range(len(data_df))), 1298 "framegroup": 0, # Dummy value 1299 } 1300 ) 1301 # Need to pad the features to 761 columns, as per OCULAR report needs 1302 additional_columns = range(len(avg_cols), 761) 1303 if len(additional_columns) > 0: 1304 padding = pd.DataFrame( 1305 np.zeros((len(data_df), len(additional_columns))), 1306 columns=[f"pad{i}" for i in additional_columns], 1307 ) 1308 data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1) 1309 else: 1310 data_df = pd.concat([data_df[avg_cols], metadata], axis=1) 1311 1312 # Save the cluster data 1313 data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv")) 1314 # Suppress pandas FutureWarning 1315 with warnings.catch_warnings(): 1316 warnings.simplefilter(action="ignore", category=FutureWarning) 1317 pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df) 1318 1319 @classmethod 1320 def load_ocular( 1321 cls, 1322 input_path: str, 1323 event_type="cells", 1324 cell_data_files=( 1325 "rc-final1.rds", 1326 "rc-final2.rds", 1327 "rc-final3.rds", 1328 "rc-final4.rds", 1329 "ocular_interesting.rds", 1330 ), 1331 others_data_files=( 1332 "others-final1.rds", 1333 "others-final2.rds", 1334 "others-final3.rds", 1335 "others-final4.rds", 1336 ), 1337 atlas_data_files=( 1338 "ocular_interesting.rds", 1339 "ocular_not_interesting.rds", 1340 ), 1341 drop_common_events=True, 1342 ) -> Self: 1343 """ 1344 1345 :param input_path: 1346 :param event_type: 1347 :param cell_data_files: 1348 :param others_data_files: 1349 :param atlas_data_files: 1350 :param drop_common_events: 1351 :return: 1352 """ 1353 if pyreadr is None: 1354 raise ModuleNotFoundError( 1355 "pyreadr not installed! Install pyreadr directly " 1356 "or run `pip install csi-images[rds]` option to resolve." 1357 ) 1358 # Check if the input path is a directory or a file 1359 if os.path.isfile(input_path): 1360 data_files = [os.path.basename(input_path)] 1361 input_path = os.path.dirname(input_path) 1362 if event_type == "cells": 1363 data_files = cell_data_files 1364 elif event_type == "others": 1365 data_files = others_data_files 1366 else: 1367 raise ValueError("Invalid event type.") 1368 1369 # Load the data from the OCULAR files 1370 file_data = {} 1371 for file in data_files: 1372 file_path = os.path.join(input_path, file) 1373 if not os.path.isfile(file_path): 1374 warnings.warn(f"{file} not found for in {input_path}") 1375 continue 1376 file_data[file] = pyreadr.read_r(file_path) 1377 # Get the DataFrame associated with None (pyreadr dict quirk) 1378 file_data[file] = file_data[file][None] 1379 if len(file_data[file]) == 0: 1380 # File gets dropped from the dict 1381 file_data.pop(file) 1382 warnings.warn(f"{file} has no cells") 1383 continue 1384 1385 # Drop common cells if requested and in this file 1386 if ( 1387 file in atlas_data_files 1388 and drop_common_events 1389 and "catalogue_classification" in file_data[file] 1390 ): 1391 common_cell_indices = ( 1392 file_data[file]["catalogue_classification"] == "common_cell" 1393 ) 1394 file_data[file] = file_data[file][common_cell_indices == False] 1395 1396 if len(file_data[file]) == 0: 1397 # File gets dropped from the dict 1398 file_data.pop(file) 1399 warnings.warn(f"{file} has no cells after dropping common cells") 1400 continue 1401 1402 # Extract frame_id and cell_id 1403 # DAPI- events already have frame_id cell_id outside rowname 1404 if event_type == "cells" and "frame_id" not in file_data[file].columns: 1405 file_data[file]["rowname"] = file_data[file]["rowname"].astype("str") 1406 # get frame_id cell_id from rownames column and split into two columns 1407 split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True) 1408 if len(split_res.columns) != 2: 1409 warnings.warn( 1410 f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}' 1411 ) 1412 # then assign it back to the dataframe 1413 file_data[file][["frame_id", "cell_id"]] = split_res.astype("int") 1414 # Ensure frame_id and cell_id are integers 1415 file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int") 1416 file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int") 1417 # reset indexes since they can cause NaN values in concat 1418 file_data[file] = file_data[file].reset_index(drop=True) 1419 1420 # Merge the data from all files 1421 if len(file_data) == 0: 1422 return EventArray() 1423 elif len(file_data) == 1: 1424 data = [file_data[file] for file in file_data.keys()][0] 1425 else: 1426 data = pd.concat(file_data.values()) 1427 1428 # Others is missing the "slide_id". Insert it right before "frame_id" column 1429 if event_type == "others" and "slide_id" not in data.columns: 1430 if os.path.basename(input_path) == "ocular": 1431 slide_id = os.path.basename(os.path.dirname(input_path)) 1432 else: 1433 slide_id = "UNKNOWN" 1434 data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id) 1435 1436 # Sort according to ascending cell_id to keep the original, which is in manual_df 1437 data = data.sort_values(by=["cell_id"], ascending=True) 1438 # Filter out duplicates by x & y 1439 data = data.assign( 1440 unique_id=data["slide_id"] 1441 + "_" 1442 + data["frame_id"].astype(str) 1443 + "_" 1444 + data["cellx"].astype(int).astype(str) 1445 + "_" 1446 + data["celly"].astype(int).astype(str) 1447 ) 1448 data = data.drop_duplicates(subset=["unique_id"], keep="first") 1449 # Normal unique_id is with cell_id 1450 data = data.assign( 1451 unique_id=data["slide_id"] 1452 + "_" 1453 + data["frame_id"].astype(str) 1454 + "_" 1455 + data["cell_id"].astype(str) 1456 ) 1457 data = data.reset_index(drop=True) 1458 # All columns up to "slide_id" are features; drop the "slide_id" 1459 features = data.loc[:, :"slide_id"].iloc[:, :-1] 1460 data = data.loc[:, "slide_id":] 1461 # Grab the info columns 1462 info = data[["slide_id", "frame_id", "cellx", "celly"]] 1463 info.columns = ["slide_id", "tile", "x", "y"] 1464 info = info.assign(roi=0) # OCULAR only works on 1 ROI, as far as known 1465 info = info[["slide_id", "tile", "roi", "x", "y"]] 1466 # Metadata has duplicate columns for later convenience 1467 metadata = data 1468 # Certain columns tend to be problematic with mixed data formats... 1469 for col in ["TRITC", "CY5", "FITC"]: 1470 if col in metadata: 1471 labels = { 1472 "False": False, 1473 "True": True, 1474 "FALSE": False, 1475 "TRUE": True, 1476 False: False, 1477 True: True, 1478 } 1479 metadata[col] = metadata[col].map(labels).astype(bool) 1480 for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]: 1481 if col in metadata: 1482 metadata[col] = metadata[col].fillna(-1).astype(int) 1483 info["tile"] = info["tile"] - 1 # Convert to 0-based indexing 1484 return EventArray(info, metadata, features)
A class that holds a large number of events' data, making it easy to analyze and manipulate many events at once. A more separated version of the Event class.
628 def __init__( 629 self, 630 info: pd.DataFrame = None, 631 metadata: pd.DataFrame = None, 632 features: pd.DataFrame = None, 633 ): 634 # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y" 635 if info is not None: 636 # Special case: "roi" is often not required, so we'll fill in if its missing 637 if "roi" not in info.columns: 638 info["roi"] = 0 639 if set(info.columns) != set(self.INFO_COLUMNS): 640 raise ValueError( 641 f"EventArray.info must have columns:" 642 f"{self.INFO_COLUMNS}; had {list(info.columns)}" 643 ) 644 # Copy first to avoid modifying the original 645 info = info.copy() 646 # Ensure that the columns are the right types 647 info["slide_id"] = info["slide_id"].astype(str) 648 info["tile"] = info["tile"].astype(np.uint16) 649 info["roi"] = info["roi"].astype(np.uint8) 650 info["x"] = info["x"].round().astype(np.uint16) 651 info["y"] = info["y"].round().astype(np.uint16) 652 # Ensure that the columns are in the right order 653 info = info[self.INFO_COLUMNS] 654 # All DataFrames must all have the same number of rows 655 if metadata is not None and (info is None or len(info) != len(metadata)): 656 raise ValueError( 657 "If EventArray.metadata is not None, it should match rows with .info" 658 ) 659 if features is not None and (info is None or len(info) != len(features)): 660 raise ValueError( 661 "If EventArray.features is not None, it should match rows with .info" 662 ) 663 # No columns named "metadata_", "features_", or "None" 664 column_names = [] 665 if metadata is not None: 666 column_names += metadata.columns.tolist() 667 if features is not None: 668 column_names += features.columns.tolist() 669 if any([col.lower().startswith("metadata_") for col in column_names]): 670 raise ValueError("EventArray column names cannot start with 'metadata_'") 671 if any([col.lower().startswith("features_") for col in column_names]): 672 raise ValueError("EventArray column names cannot start with 'features_'") 673 if any([col.lower() == "none" for col in column_names]): 674 raise ValueError("EventArray column names cannot be 'none'") 675 676 self.info = info 677 self.metadata = metadata 678 self.features = features
725 def get_sort_order( 726 self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True 727 ): 728 """ 729 Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames. 730 :param by: name of the column(s) to sort by. 731 :param ascending: whether to sort in ascending order; can be a list to match by 732 :return: the order of the indices to sort by. 733 """ 734 columns = self.get(by) 735 return columns.sort_values(by=by, ascending=ascending).index
Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames.
Parameters
- by: name of the column(s) to sort by.
- ascending: whether to sort in ascending order; can be a list to match by
Returns
the order of the indices to sort by.
737 def sort( 738 self, 739 by: Hashable | Sequence[Hashable], 740 ascending: bool | Sequence[bool] = True, 741 ) -> Self: 742 """ 743 Sort the EventArray by column(s) in the info, metadata, or features DataFrames. 744 :param by: name of the column(s) to sort by. 745 :param ascending: whether to sort in ascending order; can be a list to match by 746 :return: a new, sorted EventArray. 747 """ 748 order = self.get_sort_order(by, ascending) 749 info = self.info.loc[order].reset_index(drop=True) 750 if self.metadata is not None: 751 metadata = self.metadata.loc[order].reset_index(drop=True) 752 else: 753 metadata = None 754 if self.features is not None: 755 features = self.features.loc[order].reset_index(drop=True) 756 else: 757 features = None 758 return EventArray(info, metadata, features)
Sort the EventArray by column(s) in the info, metadata, or features DataFrames.
Parameters
- by: name of the column(s) to sort by.
- ascending: whether to sort in ascending order; can be a list to match by
Returns
a new, sorted EventArray.
760 def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame: 761 """ 762 Get a DataFrame with the specified columns from the EventArray, by value. 763 :param column_names: the names of the columns to get. 764 :return: a DataFrame with the specified columns. 765 """ 766 if isinstance(column_names, Hashable): 767 column_names = [column_names] # Drop into a list for the loop 768 columns = [] 769 for column_name in column_names: 770 if column_name in self.info.columns: 771 columns.append(self.info[column_name]) 772 elif self.metadata is not None and column_name in self.metadata.columns: 773 columns.append(self.metadata[column_name]) 774 elif self.features is not None and column_name in self.features.columns: 775 columns.append(self.features[column_name]) 776 else: 777 raise ValueError(f"Column {column_name} not found in EventArray") 778 return pd.concat(columns, axis=1)
Get a DataFrame with the specified columns from the EventArray, by value.
Parameters
- column_names: the names of the columns to get.
Returns
a DataFrame with the specified columns.
780 def rows(self, rows: Sequence[Hashable]) -> Self: 781 """ 782 Get a subset of the EventArray rows based on a boolean or integer index, by value. 783 :param rows: row labels, indices, or boolean mask; anything for .loc[] 784 :return: a new EventArray with the subset of events. 785 """ 786 info = self.info.loc[rows].reset_index(drop=True) 787 if self.metadata is not None: 788 metadata = self.metadata.loc[rows].reset_index(drop=True) 789 else: 790 metadata = None 791 if self.features is not None: 792 features = self.features.loc[rows].reset_index(drop=True) 793 else: 794 features = None 795 return EventArray(info, metadata, features)
Get a subset of the EventArray rows based on a boolean or integer index, by value.
Parameters
- rows: row labels, indices, or boolean mask; anything for .loc[]
Returns
a new EventArray with the subset of events.
797 def copy(self) -> Self: 798 """ 799 Create a deep copy of the EventArray. 800 :return: a deep copy of the EventArray. 801 """ 802 return EventArray( 803 info=self.info.copy(), 804 metadata=None if self.metadata is None else self.metadata.copy(), 805 features=None if self.features is None else self.features.copy(), 806 )
Create a deep copy of the EventArray.
Returns
a deep copy of the EventArray.
810 def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None: 811 """ 812 Add metadata to the EventArray. Removes the need to check if metadata is None. 813 Overwrites any existing metadata with the same column names as the new metadata. 814 :param new_metadata: the metadata to add. 815 """ 816 if len(self) != len(new_metadata): 817 raise ValueError("New metadata must match length of existing info") 818 819 if self.metadata is None: 820 self.metadata = new_metadata 821 else: 822 if isinstance(new_metadata, pd.Series): 823 self.metadata[new_metadata.name] = new_metadata 824 else: 825 # It's a DataFrame 826 self.metadata[new_metadata.columns] = new_metadata
Add metadata to the EventArray. Removes the need to check if metadata is None. Overwrites any existing metadata with the same column names as the new metadata.
Parameters
- new_metadata: the metadata to add.
828 def add_features(self, new_features: pd.Series | pd.DataFrame) -> None: 829 """ 830 Add features to the EventArray. Removes the need to check if features is None. 831 Overwrites any existing features with the same column names as the new features. 832 :param new_features: the features to add. 833 """ 834 if len(self) != len(new_features): 835 raise ValueError("New features must match length of existing info") 836 837 if self.features is None: 838 self.features = new_features 839 else: 840 if isinstance(new_features, pd.Series): 841 self.features[new_features.name] = new_features 842 else: 843 # It's a DataFrame 844 self.features[new_features.columns] = new_features
Add features to the EventArray. Removes the need to check if features is None. Overwrites any existing features with the same column names as the new features.
Parameters
- new_features: the features to add.
846 @classmethod 847 def merge(cls, events: Iterable[Self]) -> Self: 848 """ 849 Combine EventArrays in a list into a single EventArray. 850 :param events: the new list of events. 851 """ 852 all_info = [] 853 all_metadata = [] 854 all_features = [] 855 for event_array in events: 856 # Skip empty EventArrays 857 if event_array.info is not None: 858 all_info.append(event_array.info) 859 if event_array.metadata is not None: 860 all_metadata.append(event_array.metadata) 861 if event_array.features is not None: 862 all_features.append(event_array.features) 863 if len(all_info) == 0: 864 return EventArray() 865 else: 866 all_info = pd.concat(all_info, ignore_index=True) 867 if len(all_metadata) == 0: 868 all_metadata = None 869 else: 870 all_metadata = pd.concat(all_metadata, ignore_index=True) 871 if len(all_features) == 0: 872 all_features = None 873 else: 874 all_features = pd.concat(all_features, ignore_index=True) 875 876 return EventArray(all_info, all_metadata, all_features)
Combine EventArrays in a list into a single EventArray.
Parameters
- events: the new list of events.
878 def to_events( 879 self, 880 scans: Scan | Iterable[Scan], 881 ignore_missing_scans=True, 882 ignore_metadata=False, 883 ignore_features=False, 884 ) -> list[Event]: 885 """ 886 Get the events in the EventArray as a list of events. Returns [] if empty. 887 :param scans: the scans that the events belong to, auto-matched by slide_id. 888 Pass None if you don't care about scan metadata (pass ignore_missing_scans). 889 :param ignore_missing_scans: whether to create blank scans for events without scans. 890 :param ignore_metadata: whether to ignore metadata or not 891 :param ignore_features: whether to ignore features or not 892 :return: 893 """ 894 if len(self) == 0: 895 return [] 896 if isinstance(scans, Scan): 897 scans = [scans] 898 scans = {scan.slide_id: scan for scan in scans} 899 events = [] 900 for i in range(len(self.info)): 901 # Determine the associated scan 902 slide_id = self.info["slide_id"][i] 903 if slide_id not in scans: 904 if ignore_missing_scans: 905 # Create a placeholder scan if the scan is missing 906 scan = Scan.make_placeholder( 907 slide_id, 908 self.info["tile"][i], 909 self.info["roi"][i], 910 ) 911 else: 912 raise ValueError( 913 f"Scan {self.info['slide_id'][i]} not found for event {i}." 914 ) 915 else: 916 scan = scans[slide_id] 917 918 # Prepare the metadata and features 919 if ignore_metadata or self.metadata is None: 920 metadata = None 921 else: 922 # This Series creation method is less efficient, 923 # but required for preserving dtypes 924 metadata = pd.Series( 925 {col: self.metadata.loc[i, col] for col in self.metadata.columns}, 926 dtype=object, 927 ) 928 if ignore_features or self.features is None: 929 features = None 930 else: 931 features = pd.Series( 932 {col: self.features.loc[i, col] for col in self.features.columns}, 933 dtype=object, 934 ) 935 # Create the event and append it to the list 936 events.append( 937 Event( 938 Tile(scan, self.info["tile"][i], self.info["roi"][i]), 939 self.info["x"][i], 940 self.info["y"][i], 941 metadata=metadata, 942 features=features, 943 ) 944 ) 945 return events
Get the events in the EventArray as a list of events. Returns [] if empty.
Parameters
- scans: the scans that the events belong to, auto-matched by slide_id. Pass None if you don't care about scan metadata (pass ignore_missing_scans).
- ignore_missing_scans: whether to create blank scans for events without scans.
- ignore_metadata: whether to ignore metadata or not
- ignore_features: whether to ignore features or not
Returns
947 @classmethod 948 def from_events(cls, events: Iterable[Event]) -> Self: 949 """ 950 Set the events in the EventArray to a new list of events. 951 :param events: the new list of events. 952 """ 953 info = pd.DataFrame( 954 { 955 "slide_id": [event.tile.scan.slide_id for event in events], 956 "tile": [event.tile.n for event in events], 957 "roi": [event.tile.n_roi for event in events], 958 "x": [event.x for event in events], 959 "y": [event.y for event in events], 960 } 961 ) 962 metadata_list = [event.metadata for event in events] 963 # Iterate through and ensure that all metadata is the same shape 964 for metadata in metadata_list: 965 if type(metadata) != type(metadata_list[0]): 966 raise ValueError("All metadata must be the same type.") 967 if metadata is not None and metadata.shape != metadata_list[0].shape: 968 raise ValueError("All metadata must be the same shape.") 969 if metadata_list[0] is None: 970 metadata = None 971 else: 972 metadata = pd.DataFrame(metadata_list) 973 features_list = [event.features for event in events] 974 # Iterate through and ensure that all features are the same shape 975 for features in features_list: 976 if type(features) != type(features_list[0]): 977 raise ValueError("All features must be the same type.") 978 if features is not None and features.shape != features_list[0].shape: 979 raise ValueError("All features must be the same shape.") 980 if features_list[0] is None: 981 features = None 982 else: 983 features = pd.DataFrame(features_list) 984 return EventArray(info=info, metadata=metadata, features=features)
Set the events in the EventArray to a new list of events.
Parameters
- events: the new list of events.
986 def to_dataframe(self) -> pd.DataFrame: 987 """ 988 Convert all the data in the EventArray to a single DataFrame. 989 :return: a DataFrame with all the data in the EventArray. 990 """ 991 # Make a copy of the info DataFrame and prepend "info_" to the column names 992 output = self.info.copy() 993 # Combine with the metadata and prepend "metadata_" to the column names 994 if self.metadata is not None: 995 metadata = self.metadata.copy() 996 metadata.columns = [f"metadata_{col}" for col in metadata.columns] 997 output = pd.concat([output, metadata], axis=1) 998 # Combine with the features and prepend "features_" to the column names 999 if self.features is not None: 1000 features = self.features.copy() 1001 features.columns = [f"features_{col}" for col in features.columns] 1002 output = pd.concat([output, features], axis=1) 1003 return output
Convert all the data in the EventArray to a single DataFrame.
Returns
a DataFrame with all the data in the EventArray.
1005 @classmethod 1006 def from_dataframe( 1007 cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_" 1008 ) -> Self: 1009 """ 1010 From a single, special DataFrame, create an EventArray. 1011 :param df: the DataFrame to convert to an EventArray. 1012 :param metadata_prefix: the prefix for metadata columns. 1013 :param features_prefix: the prefix for features columns. 1014 :return: a DataFrame with all the data in the EventArray. 1015 """ 1016 # Split the columns into info, metadata, and features and strip prefix 1017 info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy() 1018 if info.size == 0: 1019 info = None 1020 metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy() 1021 metadata.columns = [ 1022 col.replace(metadata_prefix, "") for col in metadata.columns 1023 ] 1024 if metadata.size == 0: 1025 metadata = None 1026 features = df[[col for col in df.columns if col.startswith("features_")]].copy() 1027 features.columns = [ 1028 col.replace(features_prefix, "") for col in features.columns 1029 ] 1030 if features.size == 0: 1031 features = None 1032 return cls(info=info, metadata=metadata, features=features)
From a single, special DataFrame, create an EventArray.
Parameters
- df: the DataFrame to convert to an EventArray.
- metadata_prefix: the prefix for metadata columns.
- features_prefix: the prefix for features columns.
Returns
a DataFrame with all the data in the EventArray.
1034 @classmethod 1035 def from_mask( 1036 cls, 1037 mask: np.ndarray, 1038 tile: Tile, 1039 include_cell_id: bool = True, 1040 images: list[np.ndarray] = None, 1041 image_labels: list[str] = None, 1042 properties: list[str] = None, 1043 ) -> Self: 1044 """ 1045 Extract events from a mask DataFrame, including metadata and features. 1046 :param mask: the mask to extract events from. 1047 :param tile: the Tile object associated with this mask. 1048 :param include_cell_id: whether to include the cell_id, or numerical 1049 mask label, as metadata in the EventArray. 1050 :param images: the intensity images to extract features from. 1051 :param image_labels: the labels for the intensity images. 1052 :param properties: list of properties to extract in addition to the defaults: 1053 :return: EventArray corresponding to the mask labels. 1054 """ 1055 if csi_images is None: 1056 raise ModuleNotFoundError( 1057 "imageio libraries not installed! " 1058 "run `pip install csi_images[imageio]` to resolve." 1059 ) 1060 # Gather mask_info 1061 if images is not None and image_labels is not None: 1062 if len(images) != len(image_labels): 1063 raise ValueError("Intensity images and labels must match lengths.") 1064 1065 mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties) 1066 1067 if len(mask_info) == 0: 1068 return EventArray() 1069 1070 # Combine provided info and mask info 1071 info = pd.DataFrame( 1072 { 1073 "slide_id": tile.scan.slide_id, 1074 "tile": tile.n, 1075 "roi": tile.n_roi, 1076 "x": mask_info["x"], 1077 "y": mask_info["y"], 1078 }, 1079 ) 1080 # Extract a metadata column if desired 1081 if include_cell_id: 1082 metadata = pd.DataFrame({"cell_id": mask_info["id"]}) 1083 else: 1084 metadata = None 1085 # If any additional properties were extracted, add them as features 1086 mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore") 1087 if len(mask_info.columns) > 0: 1088 features = mask_info 1089 features.columns = [col.lower() for col in features.columns] 1090 else: 1091 features = None 1092 return EventArray(info, metadata, features)
Extract events from a mask DataFrame, including metadata and features.
Parameters
- mask: the mask to extract events from.
- tile: the Tile object associated with this mask.
- include_cell_id: whether to include the cell_id, or numerical mask label, as metadata in the EventArray.
- images: the intensity images to extract features from.
- image_labels: the labels for the intensity images.
- properties: list of properties to extract in addition to the defaults:
Returns
EventArray corresponding to the mask labels.
1094 def save_csv(self, output_path: str) -> bool: 1095 """ 1096 Save the events to an CSV file, including metadata and features. 1097 :param output_path: 1098 :return: 1099 """ 1100 if not output_path.endswith(".csv"): 1101 output_path += ".csv" 1102 self.to_dataframe().to_csv(output_path, index=False) 1103 return os.path.exists(output_path)
Save the events to an CSV file, including metadata and features.
Parameters
- output_path:
Returns
1105 @classmethod 1106 def load_csv( 1107 cls, 1108 input_path: str, 1109 metadata_prefix: str = "metadata_", 1110 features_prefix: str = "features_", 1111 ) -> Self: 1112 """ 1113 Load the events from an CSV file, including metadata and features. 1114 :param input_path: 1115 :param metadata_prefix: 1116 :param features_prefix: 1117 :return: 1118 """ 1119 # Load the CSV file 1120 df = pd.read_csv(input_path) 1121 return cls.from_dataframe(df, metadata_prefix, features_prefix)
Load the events from an CSV file, including metadata and features.
Parameters
- input_path:
- metadata_prefix:
- features_prefix:
Returns
1123 def save_hdf5(self, output_path: str) -> bool: 1124 """ 1125 Save the events to an HDF5 file, including metadata and features. 1126 Uses the pandas-provided HDF5 functions for ease, and external compatibility, 1127 though these files are slightly harder to view in HDFView or similar. 1128 :param output_path: 1129 :return: 1130 """ 1131 if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"): 1132 output_path += ".hdf5" 1133 # Open the output_path as an HDF5 file 1134 with pd.HDFStore(output_path) as store: 1135 # Store the dataframes in the HDF5 file 1136 if self.info is not None: 1137 store.put("info", self.info, index=False) 1138 if self.metadata is not None: 1139 store.put("metadata", self.metadata, index=False) 1140 if self.features is not None: 1141 store.put("features", self.features, index=False) 1142 return os.path.exists(output_path)
Save the events to an HDF5 file, including metadata and features. Uses the pandas-provided HDF5 functions for ease, and external compatibility, though these files are slightly harder to view in HDFView or similar.
Parameters
- output_path:
Returns
1144 @classmethod 1145 def load_hdf5(cls, input_path: str) -> Self: 1146 """ 1147 Load the events from an HDF5 file, including metadata and features. 1148 :param input_path: 1149 :return: 1150 """ 1151 # Open the input_path as an HDF5 file 1152 with pd.HDFStore(input_path, "r") as store: 1153 # Load the dataframes from the HDF5 file 1154 info = store.get("info") if "info" in store else None 1155 metadata = store.get("metadata") if "metadata" in store else None 1156 features = store.get("features") if "features" in store else None 1157 return cls(info=info, metadata=metadata, features=features)
Load the events from an HDF5 file, including metadata and features.
Parameters
- input_path:
Returns
1159 def save_ocular(self, output_path: str, event_type: str = "cells"): 1160 """ 1161 Save the events to an OCULAR file. Relies on the dataframe originating 1162 from an OCULAR file (same columns; duplicate metadata/info). 1163 :param output_path: 1164 :param event_type: 1165 :return: 1166 """ 1167 if pyreadr is None: 1168 raise ModuleNotFoundError( 1169 "pyreadr not installed! Install pyreadr directly " 1170 "or run `pip install csi-images[rds]` option to resolve." 1171 ) 1172 if event_type == "cells": 1173 file_stub = "rc-final" 1174 elif event_type == "others": 1175 file_stub = "others-final" 1176 else: 1177 raise ValueError("Invalid event type. Must be cells or others.") 1178 1179 # Ensure good metadata 1180 metadata = pd.DataFrame( 1181 { 1182 "slide_id": self.info["slide_id"], 1183 "frame_id": self.info["tile"] + 1, # Convert to 1-indexed for R 1184 "cell_id": ( 1185 self.metadata["cell_id"] 1186 if "cell_id" in self.metadata.columns 1187 else range(len(self.info)) 1188 ), 1189 "cellx": self.info["x"], 1190 "celly": self.info["y"], 1191 } 1192 ) 1193 if self.metadata is not None: 1194 metadata[self.metadata.columns] = self.metadata.copy() 1195 1196 # Check for the "ocular_interesting" column 1197 if event_type == "cells": 1198 if "ocular_interesting" in metadata.columns: 1199 interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool) 1200 elif "hcpc" in metadata.columns: 1201 # Interesting cells don't get an hcpc designation, leaving them as -1 1202 interesting_rows = ( 1203 metadata["hcpc"].to_numpy() == -1 1204 ) # interesting cells 1205 else: 1206 interesting_rows = [] 1207 if sum(interesting_rows) > 0: 1208 # Split the metadata into interesting and regular 1209 interesting_events = self.rows(interesting_rows) 1210 interesting_df = pd.concat( 1211 [interesting_events.features, interesting_events.metadata], axis=1 1212 ) 1213 data_events = self.rows(~interesting_rows) 1214 data_df = pd.concat( 1215 [data_events.features, data_events.metadata], axis=1 1216 ) 1217 data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore") 1218 1219 # Drop particular columns for "interesting" 1220 interesting_df = interesting_df.drop( 1221 [ 1222 "clust", 1223 "hcpc", 1224 "frame_id", 1225 "cell_id", 1226 "unique_id", 1227 "ocular_interesting", 1228 ], 1229 axis=1, 1230 errors="ignore", 1231 ) 1232 # Save both .csv and .rds 1233 interesting_stub = os.path.join(output_path, "ocular_interesting") 1234 interesting_df.to_csv(f"{interesting_stub}.csv") 1235 # Suppress pandas FutureWarning 1236 with warnings.catch_warnings(): 1237 warnings.simplefilter(action="ignore", category=FutureWarning) 1238 pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df) 1239 else: 1240 data_df = pd.concat([self.features, metadata], axis=1) 1241 else: 1242 # Get all data and reset_index (will copy it) 1243 data_df = pd.concat([self.features, metadata], axis=1) 1244 1245 # Split based on cluster number to conform to *-final[1-4].rds 1246 n_clusters = max(data_df["clust"]) + 1 1247 split_idx = [round(i * n_clusters / 4) for i in range(5)] 1248 for i in range(4): 1249 subset = (split_idx[i] <= data_df["clust"]) & ( 1250 data_df["clust"] < split_idx[i + 1] 1251 ) 1252 data_df.loc[subset, "hcpc"] = i + 1 1253 subset = data_df[subset].reset_index(drop=True) 1254 # Suppress pandas FutureWarning 1255 with warnings.catch_warnings(): 1256 warnings.simplefilter(action="ignore", category=FutureWarning) 1257 pyreadr.write_rds( 1258 os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset 1259 ) 1260 1261 # Create new example cell strings 1262 data_df["example_cell_id"] = ( 1263 data_df["slide_id"] 1264 + " " 1265 + data_df["frame_id"].astype(str) 1266 + " " 1267 + data_df["cell_id"].astype(str) 1268 + " " 1269 + data_df["cellx"].astype(int).astype(str) 1270 + " " 1271 + data_df["celly"].astype(int).astype(str) 1272 ) 1273 # Find averagable data columns 1274 if "cellcluster_id" in data_df.columns: 1275 end_idx = data_df.columns.get_loc("cellcluster_id") 1276 else: 1277 end_idx = data_df.columns.get_loc("slide_id") 1278 avg_cols = data_df.columns[:end_idx].tolist() 1279 # Group by cluster and average 1280 data_df = data_df.groupby("clust").agg( 1281 **{col: (col, "mean") for col in avg_cols}, 1282 count=("clust", "size"), # count rows in each cluster 1283 example_cells=("example_cell_id", lambda x: ",".join(x)), 1284 hcpc=("hcpc", lambda x: x.iloc[0]), 1285 ) 1286 data_df = data_df.reset_index() # Do NOT drop, index is "clust" 1287 # Create new columns 1288 metadata = pd.DataFrame( 1289 { 1290 "count": data_df["count"], 1291 "example_cells": data_df["example_cells"], 1292 "clust": data_df["clust"].astype(int), 1293 "hcpc": data_df["hcpc"].astype(int), 1294 "id": data_df["clust"].astype(int).astype(str), 1295 "cccluster": "0", # Dummy value 1296 "ccdistance": 0.0, # Dummy value 1297 "rownum": list(range(len(data_df))), 1298 "framegroup": 0, # Dummy value 1299 } 1300 ) 1301 # Need to pad the features to 761 columns, as per OCULAR report needs 1302 additional_columns = range(len(avg_cols), 761) 1303 if len(additional_columns) > 0: 1304 padding = pd.DataFrame( 1305 np.zeros((len(data_df), len(additional_columns))), 1306 columns=[f"pad{i}" for i in additional_columns], 1307 ) 1308 data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1) 1309 else: 1310 data_df = pd.concat([data_df[avg_cols], metadata], axis=1) 1311 1312 # Save the cluster data 1313 data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv")) 1314 # Suppress pandas FutureWarning 1315 with warnings.catch_warnings(): 1316 warnings.simplefilter(action="ignore", category=FutureWarning) 1317 pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df)
Save the events to an OCULAR file. Relies on the dataframe originating from an OCULAR file (same columns; duplicate metadata/info).
Parameters
- output_path:
- event_type:
Returns
1319 @classmethod 1320 def load_ocular( 1321 cls, 1322 input_path: str, 1323 event_type="cells", 1324 cell_data_files=( 1325 "rc-final1.rds", 1326 "rc-final2.rds", 1327 "rc-final3.rds", 1328 "rc-final4.rds", 1329 "ocular_interesting.rds", 1330 ), 1331 others_data_files=( 1332 "others-final1.rds", 1333 "others-final2.rds", 1334 "others-final3.rds", 1335 "others-final4.rds", 1336 ), 1337 atlas_data_files=( 1338 "ocular_interesting.rds", 1339 "ocular_not_interesting.rds", 1340 ), 1341 drop_common_events=True, 1342 ) -> Self: 1343 """ 1344 1345 :param input_path: 1346 :param event_type: 1347 :param cell_data_files: 1348 :param others_data_files: 1349 :param atlas_data_files: 1350 :param drop_common_events: 1351 :return: 1352 """ 1353 if pyreadr is None: 1354 raise ModuleNotFoundError( 1355 "pyreadr not installed! Install pyreadr directly " 1356 "or run `pip install csi-images[rds]` option to resolve." 1357 ) 1358 # Check if the input path is a directory or a file 1359 if os.path.isfile(input_path): 1360 data_files = [os.path.basename(input_path)] 1361 input_path = os.path.dirname(input_path) 1362 if event_type == "cells": 1363 data_files = cell_data_files 1364 elif event_type == "others": 1365 data_files = others_data_files 1366 else: 1367 raise ValueError("Invalid event type.") 1368 1369 # Load the data from the OCULAR files 1370 file_data = {} 1371 for file in data_files: 1372 file_path = os.path.join(input_path, file) 1373 if not os.path.isfile(file_path): 1374 warnings.warn(f"{file} not found for in {input_path}") 1375 continue 1376 file_data[file] = pyreadr.read_r(file_path) 1377 # Get the DataFrame associated with None (pyreadr dict quirk) 1378 file_data[file] = file_data[file][None] 1379 if len(file_data[file]) == 0: 1380 # File gets dropped from the dict 1381 file_data.pop(file) 1382 warnings.warn(f"{file} has no cells") 1383 continue 1384 1385 # Drop common cells if requested and in this file 1386 if ( 1387 file in atlas_data_files 1388 and drop_common_events 1389 and "catalogue_classification" in file_data[file] 1390 ): 1391 common_cell_indices = ( 1392 file_data[file]["catalogue_classification"] == "common_cell" 1393 ) 1394 file_data[file] = file_data[file][common_cell_indices == False] 1395 1396 if len(file_data[file]) == 0: 1397 # File gets dropped from the dict 1398 file_data.pop(file) 1399 warnings.warn(f"{file} has no cells after dropping common cells") 1400 continue 1401 1402 # Extract frame_id and cell_id 1403 # DAPI- events already have frame_id cell_id outside rowname 1404 if event_type == "cells" and "frame_id" not in file_data[file].columns: 1405 file_data[file]["rowname"] = file_data[file]["rowname"].astype("str") 1406 # get frame_id cell_id from rownames column and split into two columns 1407 split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True) 1408 if len(split_res.columns) != 2: 1409 warnings.warn( 1410 f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}' 1411 ) 1412 # then assign it back to the dataframe 1413 file_data[file][["frame_id", "cell_id"]] = split_res.astype("int") 1414 # Ensure frame_id and cell_id are integers 1415 file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int") 1416 file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int") 1417 # reset indexes since they can cause NaN values in concat 1418 file_data[file] = file_data[file].reset_index(drop=True) 1419 1420 # Merge the data from all files 1421 if len(file_data) == 0: 1422 return EventArray() 1423 elif len(file_data) == 1: 1424 data = [file_data[file] for file in file_data.keys()][0] 1425 else: 1426 data = pd.concat(file_data.values()) 1427 1428 # Others is missing the "slide_id". Insert it right before "frame_id" column 1429 if event_type == "others" and "slide_id" not in data.columns: 1430 if os.path.basename(input_path) == "ocular": 1431 slide_id = os.path.basename(os.path.dirname(input_path)) 1432 else: 1433 slide_id = "UNKNOWN" 1434 data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id) 1435 1436 # Sort according to ascending cell_id to keep the original, which is in manual_df 1437 data = data.sort_values(by=["cell_id"], ascending=True) 1438 # Filter out duplicates by x & y 1439 data = data.assign( 1440 unique_id=data["slide_id"] 1441 + "_" 1442 + data["frame_id"].astype(str) 1443 + "_" 1444 + data["cellx"].astype(int).astype(str) 1445 + "_" 1446 + data["celly"].astype(int).astype(str) 1447 ) 1448 data = data.drop_duplicates(subset=["unique_id"], keep="first") 1449 # Normal unique_id is with cell_id 1450 data = data.assign( 1451 unique_id=data["slide_id"] 1452 + "_" 1453 + data["frame_id"].astype(str) 1454 + "_" 1455 + data["cell_id"].astype(str) 1456 ) 1457 data = data.reset_index(drop=True) 1458 # All columns up to "slide_id" are features; drop the "slide_id" 1459 features = data.loc[:, :"slide_id"].iloc[:, :-1] 1460 data = data.loc[:, "slide_id":] 1461 # Grab the info columns 1462 info = data[["slide_id", "frame_id", "cellx", "celly"]] 1463 info.columns = ["slide_id", "tile", "x", "y"] 1464 info = info.assign(roi=0) # OCULAR only works on 1 ROI, as far as known 1465 info = info[["slide_id", "tile", "roi", "x", "y"]] 1466 # Metadata has duplicate columns for later convenience 1467 metadata = data 1468 # Certain columns tend to be problematic with mixed data formats... 1469 for col in ["TRITC", "CY5", "FITC"]: 1470 if col in metadata: 1471 labels = { 1472 "False": False, 1473 "True": True, 1474 "FALSE": False, 1475 "TRUE": True, 1476 False: False, 1477 True: True, 1478 } 1479 metadata[col] = metadata[col].map(labels).astype(bool) 1480 for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]: 1481 if col in metadata: 1482 metadata[col] = metadata[col].fillna(-1).astype(int) 1483 info["tile"] = info["tile"] - 1 # Convert to 0-based indexing 1484 return EventArray(info, metadata, features)
Parameters
- input_path:
- event_type:
- cell_data_files:
- others_data_files:
- atlas_data_files:
- drop_common_events: