csi_images.csi_events

Contains the Event class, which represents a single event in a scan. The Event class optionally holds metadata and features. Lists of events with similar metadata or features can be combined into DataFrames for analysis.

The Event class holds the position of the event in the frame, which can be converted to the position in the scanner or slide coordinate positions. See the csi_utils.csi_scans documentation page for more information on the coordinate systems.

   1"""
   2Contains the Event class, which represents a single event in a scan.
   3The Event class optionally holds metadata and features. Lists of events with
   4similar metadata or features can be combined into DataFrames for analysis.
   5
   6The Event class holds the position of the event in the frame, which can be converted
   7to the position in the scanner or slide coordinate positions. See the
   8csi_utils.csi_scans documentation page for more information on the coordinate systems.
   9"""
  10
  11import os
  12import glob
  13import math
  14import warnings
  15from typing import Self, Iterable, Hashable, Sequence
  16
  17import numpy as np
  18import pandas as pd
  19
  20from .csi_scans import Scan
  21from .csi_tiles import Tile
  22from .csi_frames import Frame
  23
  24# Optional dependencies; will raise errors in particular functions if not installed
  25try:
  26    from . import csi_images
  27except ImportError:
  28    csi_images = None
  29try:
  30    import imageio.v3 as imageio
  31except ImportError:
  32    imageio = None
  33try:
  34    import pyreadr
  35except ImportError:
  36    pyreadr = None
  37
  38
  39class Event:
  40    """
  41    A class that represents a single event in a scan, making it easy to evaluate
  42    singular events. Required metadata is exposed as attributes, and optional
  43    metadata and features are stored as DataFrames.
  44    """
  45
  46    SCAN_TO_SLIDE_TRANSFORM = {
  47        # Axioscan zero is in the top-right corner instead of top-left
  48        Scan.Type.AXIOSCAN7: np.array(
  49            [
  50                [1, 0, 75000],
  51                [0, 1, 0],
  52                [0, 0, 1],
  53            ]
  54        ),
  55        # BZScanner coordinates are a special kind of messed up:
  56        # - The slide is upside-down.
  57        # - The slide is oriented vertically, with the barcode at the bottom.
  58        # - Tiles are numbered from the top-right
  59        Scan.Type.BZSCANNER: np.array(
  60            [
  61                [0, -1, 75000],
  62                [-1, 0, 25000],
  63                [0, 0, 1],
  64            ]
  65        ),
  66    }
  67    """
  68    Homogeneous transformation matrices for converting between scanner and slide
  69    coordinates. The matrices are 3x3, with the final column representing the
  70    translation in micrometers (um). For more information, see 
  71    [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations).
  72    
  73    Transformations are nominal, and accuracy is not guaranteed; this is due to 
  74    imperfections in slides and alignment in the scanners. Units are in micrometers.
  75    """
  76
  77    def __init__(
  78        self,
  79        tile: Tile,
  80        x: int,
  81        y: int,
  82        metadata: pd.Series = None,
  83        features: pd.Series = None,
  84    ):
  85        self.tile = tile
  86        self.x = int(x)
  87        self.y = int(y)
  88        self.metadata = metadata
  89        self.features = features
  90
  91    def __repr__(self) -> str:
  92        return f"{self.tile}-{self.x}-{self.y}"
  93
  94    def __eq__(self, other) -> bool:
  95        return self.__repr__() == other.__repr__()
  96
  97    def __lt__(self, other):
  98        return self.__repr__() < other.__repr__()
  99
 100    def get_scan_position(self) -> tuple[float, float]:
 101        """
 102        Get the position of the event in the scanner's coordinate frame.
 103        :return: the scan position of the event in micrometers (um).
 104        """
 105        # Get overall pixel position
 106        real_tile_height, real_tile_width = self.tile.scan.get_image_size()
 107        pixel_x = self.x + (real_tile_width * self.tile.x)
 108        pixel_y = self.y + (real_tile_height * self.tile.y)
 109        # Convert to micrometers
 110        x_um = pixel_x * self.tile.scan.pixel_size_um
 111        y_um = pixel_y * self.tile.scan.pixel_size_um
 112        # Add the scan's origin in the scanner frame
 113        x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um
 114        y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um
 115        return x_um, y_um
 116
 117    def get_slide_position(self) -> tuple[float, float]:
 118        """
 119        Get the slide position of the event in micrometers (um).
 120        :return: the slide position of the event.
 121        """
 122        # Turn scan_position into a 3x1 vector
 123        scan_position = self.get_scan_position()
 124        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
 125
 126        # Multiply by the appropriate homogeneous matrix
 127        if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value):
 128            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7]
 129        elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value):
 130            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER]
 131        else:
 132            raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.")
 133        slide_position = np.matmul(transform, scan_position)
 134        return float(slide_position[0][0]), float(slide_position[1][0])
 135
 136    def crop(
 137        self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True
 138    ) -> list[np.ndarray]:
 139        """
 140        Crop the event from the provided frame images. Use if you have already gotten
 141        frame images; useful for cropping multiple events from the same frame image.
 142        :param images: the frame images.
 143        :param crop_size: the square size of the image crop to get for this event.
 144        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
 145        :return: image_size x image_size crops of the event in the provided frames. If
 146        the event is too close to the edge, the crop will be smaller and not centered.
 147        """
 148        # Convert a crop size in micrometers to pixels
 149        if not in_pixels:
 150            crop_size = round(crop_size / self.tile.scan.pixel_size_um)
 151        image_height, image_width = 0, 0
 152        for image in images:
 153            if image_height == 0 and image_width == 0:
 154                image_height, image_width = image.shape
 155            else:
 156                if image_height != image.shape[0] or image_width != image.shape[1]:
 157                    raise ValueError("All images must be the same size")
 158        if image_height == 0 or image_width == 0:
 159            raise ValueError("No images provided")
 160
 161        # Find the crop bounds
 162        bounds = [
 163            self.x - (crop_size // 2) + 1,
 164            self.y - (crop_size // 2) + 1,
 165            self.x + math.ceil(crop_size / 2) + 1,
 166            self.y + math.ceil(crop_size / 2) + 1,
 167        ]
 168        # Determine how much the bounds violate the image size
 169        displacements = [
 170            max(0, -bounds[0]),
 171            max(0, -bounds[1]),
 172            max(0, bounds[2] - image_width),
 173            max(0, bounds[3] - image_height),
 174        ]
 175        # Cap off the bounds
 176        bounds = [
 177            max(0, bounds[0]),
 178            max(0, bounds[1]),
 179            min(image_width, bounds[2]),
 180            min(image_height, bounds[3]),
 181        ]
 182
 183        # Crop the images
 184        crops = []
 185        for image in images:
 186            # Create a blank image of the right size
 187            crop = np.zeros((crop_size, crop_size), dtype=image.dtype)
 188
 189            # Insert the cropped image into the blank image, leaving a black buffer
 190            # around the edges if the crop would go beyond the original image bounds
 191            crop[
 192                displacements[1] : crop_size - displacements[3],
 193                displacements[0] : crop_size - displacements[2],
 194            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
 195            crops.append(crop)
 196        return crops
 197
 198    def get_crops(
 199        self,
 200        crop_size: int = 100,
 201        in_pixels: bool = True,
 202        input_path: str = None,
 203        channels: Iterable[int | str] = None,
 204        apply_gain: bool | Iterable[bool] = True,
 205    ) -> list[np.ndarray]:
 206        """
 207        Gets the frame images for this event and then crops the event from the images.
 208        Convenient for retrieving a single event's crops, but less efficient when
 209        retrieving multiple events from the same tile as it will reread the images.
 210        :param crop_size: the square size of the image crop to get for this event.
 211        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
 212        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
 213        :param channels: the channels to extract images for. Defaults to all channels.
 214        :param apply_gain: whether to apply scanner-calculated gain to the images, if
 215        not already applied. If a list, matches the channels.
 216        :return: a list of cropped images from the scan in the order of the channels.
 217        """
 218        # This function validates channels
 219        frames = Frame.get_frames(self.tile, channels)
 220        # Convert individual inputs to lists of appropriate length
 221        if isinstance(apply_gain, bool):
 222            apply_gain = [apply_gain] * len(frames)
 223        images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)]
 224        return self.crop(images, crop_size, in_pixels)
 225
 226    def save_crops(
 227        self,
 228        crops: Sequence[np.ndarray],
 229        output_path: str,
 230        labels: Sequence[str],
 231        ext: str = "auto",
 232    ):
 233        """
 234        Save the crops to image files.
 235        :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or
 236        grayscale if 1 channel [h, w] or [h, w, 1].
 237        :param labels: the labels to append to the file name, usually the channel names
 238        associated with each crop.
 239        :param output_path: the folder to save the crops to. Will make if needed.
 240        :param ext: the file extension to save the crops as. Defaults to "auto", which
 241        will save as .tif for grayscale images and .jpg for RGB images.
 242        :return: None
 243        """
 244        if len(crops) != len(labels):
 245            raise ValueError("Crops and labels must be the same length")
 246
 247        if csi_images is None or imageio is None:
 248            raise ModuleNotFoundError(
 249                "imageio libraries not installed! "
 250                "run `pip install csi_images[imageio]` to resolve."
 251            )
 252
 253        os.makedirs(output_path, exist_ok=True)
 254
 255        for crop, label in zip(crops, labels):
 256            if ext == "auto":
 257                if len(crop.shape) == 2 or crop.shape[2] == 1:
 258                    file_extension = ".tif"
 259                elif crop.shape[2] == 3:
 260                    file_extension = ".jpg"
 261                else:
 262                    warnings.warn(
 263                        f"Image shape {crop.shape} not recognized; saving as .tif"
 264                    )
 265                    file_extension = ".tif"
 266            else:
 267                file_extension = ext
 268            file = os.path.join(output_path, f"{self}-{label}{file_extension}")
 269            # TODO: add more file types here
 270            if file_extension == ".tif":
 271                imageio.imwrite(file, crop, compression="deflate")
 272            elif file_extension in [".jpg", ".jpeg"]:
 273                crop = csi_images.scale_bit_depth(crop, np.uint8)
 274                imageio.imwrite(file, crop, quality=80)
 275            else:
 276                imageio.imwrite(file, crop)
 277
 278    def load_crops(
 279        self, input_path: str, labels: list[str] = None
 280    ) -> dict[str, np.ndarray]:
 281        """
 282        Loads previously saved crop files from a folder.
 283        :param input_path: folder containing crop files.
 284        :param labels: optional label filter, will only return crops with these labels.
 285        :return: a tuple of lists containing the crops and their labels.
 286        """
 287        crops = {}
 288        for file in glob.glob(os.path.join(input_path, f"{self}-*")):
 289            label = os.path.splitext(os.path.basename(file))[0].split("-")[-1]
 290            # Skip if we have labels to target
 291            if labels is not None and label not in labels:
 292                continue
 293            crops[label] = imageio.imread(file)
 294        return crops
 295
 296    def get_montage_channels(
 297        self,
 298        channels: Sequence[int | str] | None,
 299        composites: dict[int | str, tuple[float, float, float]] | None,
 300    ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]:
 301        """
 302        Get the channel names for the montage from the event's tile.
 303        :param channels: channel indices or names for grayscale channels
 304        :param composites: dictionary of channel indices or names and RGB values
 305        :return: (1) channel indices to retrieve,
 306                 (2) relative grayscale channel indices, and
 307                 (3) composite channel indices and RGB values.
 308        """
 309        if channels is None:
 310            channels = list(range(len(self.tile.scan.channels)))
 311        if (len(channels) == 0) and (composites is None or len(composites) == 0):
 312            raise ValueError("Must provide at least one channel type to montage")
 313
 314        channels_to_get = []
 315
 316        # Build the list of channels to retrieve
 317        if channels is not None:
 318            if isinstance(channels[0], str):
 319                channels = self.tile.scan.get_channel_indices(channels)
 320            channels_to_get += channels
 321            order = list(range(len(channels)))  # Always the first n channels
 322        else:
 323            order = None
 324
 325        if composites is not None:
 326            relative_composites = {}  # Relative indices for retrieved channels
 327            # Convert to scan indices
 328            rgb_channels = list(composites.keys())
 329            if isinstance(rgb_channels[0], str):
 330                rgb_channels = self.tile.scan.get_channel_indices(rgb_channels)
 331            # Find the index or add to the end
 332            for channel, rgb in zip(rgb_channels, composites.values()):
 333                if channel not in channels_to_get:
 334                    channels_to_get.append(channel)
 335                    relative_composites[channel] = rgb
 336                else:
 337                    relative_composites[channels_to_get.index(channel)] = rgb
 338        else:
 339            relative_composites = None
 340
 341        return channels_to_get, order, relative_composites
 342
 343    def get_montage(
 344        self,
 345        channels: Sequence[int | str] = None,
 346        composites: dict[int | str, tuple[float, float, float]] = None,
 347        mask: np.ndarray[np.uint8] = None,
 348        labels: Sequence[str] = None,
 349        crop_size: int = 100,
 350        in_pixels: bool = True,
 351        input_path: str = None,
 352        apply_gain: bool = True,
 353        **kwargs,
 354    ) -> np.ndarray:
 355        """
 356        Convenience function for getting frame images and creating a montage. Mirrors
 357        csi_images.make_montage(). Convenient for a single event's montage, but less
 358        efficient when for multiple events from the same tile.
 359        :param channels: the channels to use for black-and-white montages.
 360        :param composites: dictionary of indices and RGB tuples for a composite.
 361        :param mask: a mask to apply to the montage. Must be the same size as the crop.
 362        :param crop_size: the square size of the image crop to get for this event.
 363        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
 364        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
 365        :param apply_gain: whether to apply scanner-calculated gain to the images, if
 366        not already applied. If a list, matches the channels.
 367        :param kwargs: montage options. See csi_images.make_montage() for more details.
 368        :return: numpy array representing the montage.
 369        """
 370        channels, order, composites = self.get_montage_channels(channels, composites)
 371        images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain)
 372        return csi_images.make_montage(
 373            images, order, composites, mask, labels, **kwargs
 374        )
 375
 376    def save_montage(
 377        self,
 378        montage: np.ndarray,
 379        output_path: str,
 380        ocular_names: bool = False,
 381        tag: str = "",
 382        file_extension: str = ".jpeg",
 383        **kwargs,
 384    ):
 385        """
 386        Save the montage as a JPEG image with a set name.
 387        :param montage: the montage to save.
 388        :param output_path: the folder to save the montage in. Will make if needed.
 389        :param ocular_names: whether to use the OCULAR naming convention.
 390        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
 391        :param file_extension: the file extension to save the montage as. Defaults to .jpeg.
 392        :param kwargs: additional arguments to pass to imageio.imwrite().
 393        :return: None
 394        """
 395        if csi_images is None or imageio is None:
 396            raise ModuleNotFoundError(
 397                "imageio libraries not installed! "
 398                "run `pip install csi_images[imageio]` to resolve."
 399            )
 400
 401        montage = csi_images.scale_bit_depth(montage, np.uint8)
 402
 403        if not file_extension.startswith("."):
 404            file_extension = f".{file_extension}"
 405
 406        if ocular_names:
 407            if "cell_id" not in self.metadata.index:
 408                raise ValueError(
 409                    "Event metadata must include 'cell_id' for OCULAR naming."
 410                )
 411            file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}"
 412        else:
 413            file = f"{self}{tag}{file_extension}"
 414
 415        os.makedirs(output_path, exist_ok=True)
 416        imageio.imwrite(os.path.join(output_path, file), montage, **kwargs)
 417
 418    def load_montage(self, input_path: str, tag: str = "") -> np.ndarray:
 419        """
 420        Loads the montage from a file saved by Event.save_montage.
 421        :param input_path: the path to the folder where the montage was saved.
 422        :param tag: a string to add to the file name, before the extension.
 423        :return:
 424        """
 425        file = f"{self}{tag}.jpeg"
 426        return imageio.imread(os.path.join(input_path, file))
 427
 428    @classmethod
 429    def get_many_crops(
 430        cls,
 431        events: Sequence[Self],
 432        crop_size: int | Sequence[int] = 100,
 433        in_pixels: bool = True,
 434        input_path: str | Sequence[str] = None,
 435        channels: Sequence[int | str] = None,
 436        apply_gain: bool | Sequence[bool] = True,
 437    ) -> list[list[np.ndarray]]:
 438        """
 439        Get the crops for a list of events, ensuring that there is no wasteful reading
 440        of the same tile multiple times. This function is more efficient than calling
 441        get_crops() for each event.
 442        :param events: the events to get crops for.
 443        :param crop_size: the square size of the image crop to get for this event.
 444                          Defaults to four times the size of the event.
 445        :param in_pixels: whether the crop size is in pixels or micrometers.
 446                          Defaults to pixels, and is ignored if crop_size is None.
 447        :param input_path: the path to the input images. Will only work for lists of events
 448                           from the same scan. Defaults to None (uses the scan's path).
 449        :param channels: the channels to extract images for. Defaults to all channels.
 450        :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True.
 451                           Can be supplied as a list to apply gain to individual channels.
 452        :return: a list of lists of cropped images for each event.
 453        """
 454        if len(events) == 0:
 455            return []
 456        # Adapt singular inputs to lists of appropriate length
 457        if isinstance(crop_size, int):
 458            crop_size = [crop_size] * len(events)
 459        if input_path is None or isinstance(input_path, str):
 460            input_path = [input_path] * len(events)
 461
 462        # Get the order of the events when sorted by slide/tile
 463        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
 464
 465        # Allocate the list to size
 466        crops = [[]] * len(events)
 467        last_tile = None
 468        images = None  # Holds large numpy arrays, so expensive to compare
 469        # Iterate through in slide/tile sorted order
 470        for i in order:
 471            if last_tile != events[i].tile:
 472                # Gather the frame images, preserving them for the next event
 473                frames = Frame.get_frames(events[i].tile, channels)
 474                if isinstance(apply_gain, bool):
 475                    apply = [apply_gain] * len(frames)
 476                else:
 477                    apply = apply_gain
 478                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
 479                last_tile = events[i].tile
 480            # Use the frame images to crop the event images
 481            crops[i] = events[i].crop(images, crop_size[i], in_pixels)
 482        return crops
 483
 484    @classmethod
 485    def get_many_montages(
 486        cls,
 487        events: Sequence[Self],
 488        channels: Sequence[int | str] = None,
 489        composites: dict[int | str, tuple[float, float, float]] = None,
 490        masks: Sequence[np.ndarray[np.uint8]] = None,
 491        labels: Sequence[str] = None,
 492        crop_size: int = 100,
 493        in_pixels: bool = True,
 494        input_path: str = None,
 495        apply_gain: bool | Iterable[bool] = True,
 496        **kwargs,
 497    ) -> list[np.ndarray]:
 498        """
 499        Convenience function for get_montage(), but for a list of events. More efficient
 500        than get_montage() when working with multiple events from the same tile.
 501        :param events: a list of Event objects.
 502        :param channels: the channels to extract images for. Defaults to all channels.
 503        :param composites: dictionary of indices and RGB tuples for a composite.
 504        :param masks: a list of masks to apply to the montages. Must be the same size as the crops.
 505        :param labels: the labels to subtitle montage images, usually the channel names
 506        :param crop_size: the square size of the image crop to get for this event.
 507        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
 508        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
 509        :param apply_gain: whether to apply scanner-calculated gain to the images, if
 510        not already applied. If a list, matches the channels.
 511        :param kwargs: montage options. See csi_images.make_montage() for more details.
 512        :return: a list of numpy arrays representing the montages.
 513        """
 514        if len(events) == 0:
 515            return []
 516        # Adapt singular inputs to lists of appropriate length
 517        if isinstance(crop_size, int):
 518            crop_size = [crop_size] * len(events)
 519        if input_path is None or isinstance(input_path, str):
 520            input_path = [input_path] * len(events)
 521        if masks is None or isinstance(masks, np.ndarray):
 522            masks = [masks] * len(events)
 523
 524        # Get the order of the events when sorted by slide/tile
 525        event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
 526
 527        # Allocate the list to size
 528        montages = [np.empty(0)] * len(events)
 529        # Placeholder variables to avoid rereading the same tile
 530        images = None  # Holds large numpy arrays, so expensive to compare
 531        order = None
 532        rel_composites = None
 533        last_tile = None
 534        # Iterate through in slide/tile sorted order
 535        for i in event_order:
 536            if last_tile != events[i].tile:
 537                channels_to_get, order, rel_composites = events[i].get_montage_channels(
 538                    channels, composites
 539                )
 540                # Gather the frame images, preserving them for the next event
 541                frames = Frame.get_frames(events[i].tile, channels_to_get)
 542                if isinstance(apply_gain, bool):
 543                    apply = [apply_gain] * len(frames)
 544                else:
 545                    apply = apply_gain
 546                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
 547                last_tile = events[i].tile
 548            # Use the frame images to crop the event images and make montages
 549            crops = events[i].crop(images, crop_size[i], in_pixels)
 550            montages[i] = csi_images.make_montage(
 551                crops, order, rel_composites, masks[i], labels, **kwargs
 552            )
 553
 554        return montages
 555
 556    @classmethod
 557    def get_and_save_many_crops(
 558        cls,
 559        events: list[Self],
 560        output_path: str,
 561        labels: Sequence[str],
 562        ext: str = "auto",
 563        additional_gain: Sequence[float] = None,
 564        **kwargs,
 565    ) -> None:
 566        """
 567        Get and save the crops for a list of events, ensuring that there is no wasteful
 568        reading and limiting the image data in memory to 1 tile at a time. This function
 569        is more efficient that chaining get_crops() and save_crops() for each event or
 570        get_many_crops() and then save_crops().
 571        :param events: list of events to get, crop, and save.
 572        :param output_path: the folder to save the crops in. Will make if needed.
 573        :param labels: the labels to save the crops with. See save_crops().
 574        :param ext: the file extension to save the crops as. See save_crops().
 575        :param additional_gain: additional gain to apply to the crops. If not None, must
 576        match the length of the number of crop channels.
 577        :param kwargs: see get_many_crops() for more parameters.
 578        :return:
 579        """
 580        unique_tiles = set([event.tile for event in events])
 581
 582        for tile in unique_tiles:
 583            # Get one tile's worth of event crops
 584            tile_events = [e for e in events if e.tile == tile]
 585            crops_list = cls.get_many_crops(tile_events, **kwargs)
 586            for event, crops in zip(tile_events, crops_list):
 587                # Apply any additional gains
 588                if additional_gain is not None:
 589                    crops = [gain * crop for gain, crop in zip(additional_gain, crops)]
 590                event.save_crops(crops, output_path, labels, ext)
 591
 592    @classmethod
 593    def get_and_save_many_montages(
 594        cls,
 595        events: list[Self],
 596        output_path: str,
 597        ocular_names: bool = False,
 598        tag: str = "",
 599        **kwargs,
 600    ) -> None:
 601        """
 602        Save montages of the events to image files.
 603        :param events: the events to get, montage, and save.
 604        :param output_path: the folder to save the montages to. Will make if needed.
 605        :param ocular_names: whether to use the OCULAR naming convention.
 606        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
 607        :param kwargs: see get_many_montages() for more parameters.
 608        """
 609        unique_tiles = set([event.tile for event in events])
 610
 611        for tile in unique_tiles:
 612            # Get one tile's worth of event crops
 613            tile_events = [e for e in events if e.tile == tile]
 614            montages = cls.get_many_montages(tile_events, **kwargs)
 615            for event, montage in zip(tile_events, montages):
 616                event.save_montage(montage, output_path, ocular_names, tag)
 617
 618
 619class EventArray:
 620    """
 621    A class that holds a large number of events' data, making it easy to analyze and
 622    manipulate many events at once. A more separated version of the Event class.
 623    """
 624
 625    INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y"]
 626
 627    def __init__(
 628        self,
 629        info: pd.DataFrame = None,
 630        metadata: pd.DataFrame = None,
 631        features: pd.DataFrame = None,
 632    ):
 633        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y"
 634        if info is not None:
 635            # Special case: "roi" is often not required, so we'll fill in if its missing
 636            if "roi" not in info.columns:
 637                info["roi"] = 0
 638            if set(info.columns) != set(self.INFO_COLUMNS):
 639                raise ValueError(
 640                    f"EventArray.info must have columns:"
 641                    f"{self.INFO_COLUMNS}; had {list(info.columns)}"
 642                )
 643            # Copy first to avoid modifying the original
 644            info = info.copy()
 645            # Ensure that the columns are the right types
 646            info["slide_id"] = info["slide_id"].astype(str)
 647            info["tile"] = info["tile"].astype(np.uint16)
 648            info["roi"] = info["roi"].astype(np.uint8)
 649            info["x"] = info["x"].round().astype(np.uint16)
 650            info["y"] = info["y"].round().astype(np.uint16)
 651            # Ensure that the columns are in the right order
 652            info = info[self.INFO_COLUMNS]
 653        # All DataFrames must all have the same number of rows
 654        if metadata is not None and (info is None or len(info) != len(metadata)):
 655            raise ValueError(
 656                "If EventArray.metadata is not None, it should match rows with .info"
 657            )
 658        if features is not None and (info is None or len(info) != len(features)):
 659            raise ValueError(
 660                "If EventArray.features is not None, it should match rows with .info"
 661            )
 662        # No columns named "metadata_", "features_", or "None"
 663        column_names = []
 664        if metadata is not None:
 665            column_names += metadata.columns.tolist()
 666        if features is not None:
 667            column_names += features.columns.tolist()
 668        if any([col.lower().startswith("metadata_") for col in column_names]):
 669            raise ValueError("EventArray column names cannot start with 'metadata_'")
 670        if any([col.lower().startswith("features_") for col in column_names]):
 671            raise ValueError("EventArray column names cannot start with 'features_'")
 672        if any([col.lower() == "none" for col in column_names]):
 673            raise ValueError("EventArray column names cannot be 'none'")
 674
 675        self.info = info
 676        self.metadata = metadata
 677        self.features = features
 678
 679    def __len__(self) -> int:
 680        # Convenience method to get the number of events
 681        if self.info is None:
 682            return 0
 683        else:
 684            return len(self.info)
 685
 686    def __eq__(self, other):
 687        # Parse all possibilities for info
 688        if isinstance(self.info, pd.DataFrame):
 689            if isinstance(other.info, pd.DataFrame):
 690                if not self.info.equals(other.info):
 691                    return False
 692            else:
 693                return False
 694        elif self.info is None:
 695            if other.info is not None:
 696                return False
 697
 698        # Parse all possibilities for metadata
 699        if isinstance(self.metadata, pd.DataFrame):
 700            if isinstance(other.metadata, pd.DataFrame):
 701                is_equal = self.metadata.equals(other.metadata)
 702                if not is_equal:
 703                    return False
 704            else:
 705                return False
 706        elif self.metadata is None:
 707            if other.metadata is not None:
 708                return False
 709
 710        # Parse all possibilities for features
 711        if isinstance(self.features, pd.DataFrame):
 712            if isinstance(other.features, pd.DataFrame):
 713                is_equal = self.features.equals(other.features)
 714                if not is_equal:
 715                    return False
 716            else:
 717                return False
 718        elif self.features is None:
 719            if other.features is not None:
 720                return False
 721
 722        return is_equal
 723
 724    def get_sort_order(
 725        self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True
 726    ):
 727        """
 728        Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames.
 729        :param by: name of the column(s) to sort by.
 730        :param ascending: whether to sort in ascending order; can be a list to match by
 731        :return: the order of the indices to sort by.
 732        """
 733        columns = self.get(by)
 734        return columns.sort_values(by=by, ascending=ascending).index
 735
 736    def sort(
 737        self,
 738        by: Hashable | Sequence[Hashable],
 739        ascending: bool | Sequence[bool] = True,
 740    ) -> Self:
 741        """
 742        Sort the EventArray by column(s) in the info, metadata, or features DataFrames.
 743        :param by: name of the column(s) to sort by.
 744        :param ascending: whether to sort in ascending order; can be a list to match by
 745        :return: a new, sorted EventArray.
 746        """
 747        order = self.get_sort_order(by, ascending)
 748        info = self.info.loc[order].reset_index(drop=True)
 749        if self.metadata is not None:
 750            metadata = self.metadata.loc[order].reset_index(drop=True)
 751        else:
 752            metadata = None
 753        if self.features is not None:
 754            features = self.features.loc[order].reset_index(drop=True)
 755        else:
 756            features = None
 757        return EventArray(info, metadata, features)
 758
 759    def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame:
 760        """
 761        Get a DataFrame with the specified columns from the EventArray, by value.
 762        :param column_names: the names of the columns to get.
 763        :return: a DataFrame with the specified columns.
 764        """
 765        if isinstance(column_names, Hashable):
 766            column_names = [column_names]  # Drop into a list for the loop
 767        columns = []
 768        for column_name in column_names:
 769            if column_name in self.info.columns:
 770                columns.append(self.info[column_name])
 771            elif self.metadata is not None and column_name in self.metadata.columns:
 772                columns.append(self.metadata[column_name])
 773            elif self.features is not None and column_name in self.features.columns:
 774                columns.append(self.features[column_name])
 775            else:
 776                raise ValueError(f"Column {column_name} not found in EventArray")
 777        return pd.concat(columns, axis=1)
 778
 779    def rows(self, rows: Sequence[Hashable]) -> Self:
 780        """
 781        Get a subset of the EventArray rows based on a boolean or integer index, by value.
 782        :param rows: row labels, indices, or boolean mask; anything for .loc[]
 783        :return: a new EventArray with the subset of events.
 784        """
 785        info = self.info.loc[rows].reset_index(drop=True)
 786        if self.metadata is not None:
 787            metadata = self.metadata.loc[rows].reset_index(drop=True)
 788        else:
 789            metadata = None
 790        if self.features is not None:
 791            features = self.features.loc[rows].reset_index(drop=True)
 792        else:
 793            features = None
 794        return EventArray(info, metadata, features)
 795
 796    def copy(self) -> Self:
 797        """
 798        Create a deep copy of the EventArray.
 799        :return: a deep copy of the EventArray.
 800        """
 801        return EventArray(
 802            info=self.info.copy(),
 803            metadata=None if self.metadata is None else self.metadata.copy(),
 804            features=None if self.features is None else self.features.copy(),
 805        )
 806
 807    # TODO: add a "filter" convenience function that takes a column name and values to filter by
 808
 809    def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None:
 810        """
 811        Add metadata to the EventArray. Removes the need to check if metadata is None.
 812        Overwrites any existing metadata with the same column names as the new metadata.
 813        :param new_metadata: the metadata to add.
 814        """
 815        if len(self) != len(new_metadata):
 816            raise ValueError("New metadata must match length of existing info")
 817
 818        if self.metadata is None:
 819            self.metadata = new_metadata
 820        else:
 821            if isinstance(new_metadata, pd.Series):
 822                self.metadata[new_metadata.name] = new_metadata
 823            else:
 824                # It's a DataFrame
 825                self.metadata[new_metadata.columns] = new_metadata
 826
 827    def add_features(self, new_features: pd.Series | pd.DataFrame) -> None:
 828        """
 829        Add features to the EventArray. Removes the need to check if features is None.
 830        Overwrites any existing features with the same column names as the new features.
 831        :param new_features: the features to add.
 832        """
 833        if len(self) != len(new_features):
 834            raise ValueError("New features must match length of existing info")
 835
 836        if self.features is None:
 837            self.features = new_features
 838        else:
 839            if isinstance(new_features, pd.Series):
 840                self.features[new_features.name] = new_features
 841            else:
 842                # It's a DataFrame
 843                self.features[new_features.columns] = new_features
 844
 845    @classmethod
 846    def merge(cls, events: Iterable[Self]) -> Self:
 847        """
 848        Combine EventArrays in a list into a single EventArray.
 849        :param events: the new list of events.
 850        """
 851        all_info = []
 852        all_metadata = []
 853        all_features = []
 854        for event_array in events:
 855            # Skip empty EventArrays
 856            if event_array.info is not None:
 857                all_info.append(event_array.info)
 858            if event_array.metadata is not None:
 859                all_metadata.append(event_array.metadata)
 860            if event_array.features is not None:
 861                all_features.append(event_array.features)
 862        if len(all_info) == 0:
 863            return EventArray()
 864        else:
 865            all_info = pd.concat(all_info, ignore_index=True)
 866        if len(all_metadata) == 0:
 867            all_metadata = None
 868        else:
 869            all_metadata = pd.concat(all_metadata, ignore_index=True)
 870        if len(all_features) == 0:
 871            all_features = None
 872        else:
 873            all_features = pd.concat(all_features, ignore_index=True)
 874
 875        return EventArray(all_info, all_metadata, all_features)
 876
 877    def to_events(
 878        self,
 879        scans: Scan | Iterable[Scan],
 880        ignore_missing_scans=True,
 881        ignore_metadata=False,
 882        ignore_features=False,
 883    ) -> list[Event]:
 884        """
 885        Get the events in the EventArray as a list of events. Returns [] if empty.
 886        :param scans: the scans that the events belong to, auto-matched by slide_id.
 887        Pass None if you don't care about scan metadata (pass ignore_missing_scans).
 888        :param ignore_missing_scans: whether to create blank scans for events without scans.
 889        :param ignore_metadata: whether to ignore metadata or not
 890        :param ignore_features: whether to ignore features or not
 891        :return:
 892        """
 893        if len(self) == 0:
 894            return []
 895        if isinstance(scans, Scan):
 896            scans = [scans]
 897        scans = {scan.slide_id: scan for scan in scans}
 898        events = []
 899        for i in range(len(self.info)):
 900            # Determine the associated scan
 901            slide_id = self.info["slide_id"][i]
 902            if slide_id not in scans:
 903                if ignore_missing_scans:
 904                    # Create a placeholder scan if the scan is missing
 905                    scan = Scan.make_placeholder(
 906                        slide_id,
 907                        self.info["tile"][i],
 908                        self.info["roi"][i],
 909                    )
 910                else:
 911                    raise ValueError(
 912                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
 913                    )
 914            else:
 915                scan = scans[slide_id]
 916
 917            # Prepare the metadata and features
 918            if ignore_metadata or self.metadata is None:
 919                metadata = None
 920            else:
 921                # This Series creation method is less efficient,
 922                # but required for preserving dtypes
 923                metadata = pd.Series(
 924                    {col: self.metadata.loc[i, col] for col in self.metadata.columns},
 925                    dtype=object,
 926                )
 927            if ignore_features or self.features is None:
 928                features = None
 929            else:
 930                features = pd.Series(
 931                    {col: self.features.loc[i, col] for col in self.features.columns},
 932                    dtype=object,
 933                )
 934            # Create the event and append it to the list
 935            events.append(
 936                Event(
 937                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
 938                    self.info["x"][i],
 939                    self.info["y"][i],
 940                    metadata=metadata,
 941                    features=features,
 942                )
 943            )
 944        return events
 945
 946    @classmethod
 947    def from_events(cls, events: Iterable[Event]) -> Self:
 948        """
 949        Set the events in the EventArray to a new list of events.
 950        :param events: the new list of events.
 951        """
 952        info = pd.DataFrame(
 953            {
 954                "slide_id": [event.tile.scan.slide_id for event in events],
 955                "tile": [event.tile.n for event in events],
 956                "roi": [event.tile.n_roi for event in events],
 957                "x": [event.x for event in events],
 958                "y": [event.y for event in events],
 959            }
 960        )
 961        metadata_list = [event.metadata for event in events]
 962        # Iterate through and ensure that all metadata is the same shape
 963        for metadata in metadata_list:
 964            if type(metadata) != type(metadata_list[0]):
 965                raise ValueError("All metadata must be the same type.")
 966            if metadata is not None and metadata.shape != metadata_list[0].shape:
 967                raise ValueError("All metadata must be the same shape.")
 968        if metadata_list[0] is None:
 969            metadata = None
 970        else:
 971            metadata = pd.DataFrame(metadata_list)
 972        features_list = [event.features for event in events]
 973        # Iterate through and ensure that all features are the same shape
 974        for features in features_list:
 975            if type(features) != type(features_list[0]):
 976                raise ValueError("All features must be the same type.")
 977            if features is not None and features.shape != features_list[0].shape:
 978                raise ValueError("All features must be the same shape.")
 979        if features_list[0] is None:
 980            features = None
 981        else:
 982            features = pd.DataFrame(features_list)
 983        return EventArray(info=info, metadata=metadata, features=features)
 984
 985    def to_dataframe(self) -> pd.DataFrame:
 986        """
 987        Convert all the data in the EventArray to a single DataFrame.
 988        :return: a DataFrame with all the data in the EventArray.
 989        """
 990        # Make a copy of the info DataFrame and prepend "info_" to the column names
 991        output = self.info.copy()
 992        # Combine with the metadata and prepend "metadata_" to the column names
 993        if self.metadata is not None:
 994            metadata = self.metadata.copy()
 995            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
 996            output = pd.concat([output, metadata], axis=1)
 997        # Combine with the features and prepend "features_" to the column names
 998        if self.features is not None:
 999            features = self.features.copy()
1000            features.columns = [f"features_{col}" for col in features.columns]
1001            output = pd.concat([output, features], axis=1)
1002        return output
1003
1004    @classmethod
1005    def from_dataframe(
1006        cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_"
1007    ) -> Self:
1008        """
1009        From a single, special DataFrame, create an EventArray.
1010        :param df: the DataFrame to convert to an EventArray.
1011        :param metadata_prefix: the prefix for metadata columns.
1012        :param features_prefix: the prefix for features columns.
1013        :return: a DataFrame with all the data in the EventArray.
1014        """
1015        # Split the columns into info, metadata, and features and strip prefix
1016        info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy()
1017        if info.size == 0:
1018            info = None
1019        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
1020        metadata.columns = [
1021            col.replace(metadata_prefix, "") for col in metadata.columns
1022        ]
1023        if metadata.size == 0:
1024            metadata = None
1025        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
1026        features.columns = [
1027            col.replace(features_prefix, "") for col in features.columns
1028        ]
1029        if features.size == 0:
1030            features = None
1031        return cls(info=info, metadata=metadata, features=features)
1032
1033    @classmethod
1034    def from_mask(
1035        cls,
1036        mask: np.ndarray,
1037        tile: Tile,
1038        include_cell_id: bool = True,
1039        images: list[np.ndarray] = None,
1040        image_labels: list[str] = None,
1041        properties: list[str] = None,
1042    ) -> Self:
1043        """
1044        Extract events from a mask DataFrame, including metadata and features.
1045        :param mask: the mask to extract events from.
1046        :param tile: the Tile object associated with this mask.
1047        :param include_cell_id: whether to include the cell_id, or numerical
1048        mask label, as metadata in the EventArray.
1049        :param images: the intensity images to extract features from.
1050        :param image_labels: the labels for the intensity images.
1051        :param properties: list of properties to extract in addition to the defaults:
1052        :return: EventArray corresponding to the mask labels.
1053        """
1054        if csi_images is None:
1055            raise ModuleNotFoundError(
1056                "imageio libraries not installed! "
1057                "run `pip install csi_images[imageio]` to resolve."
1058            )
1059        # Gather mask_info
1060        if images is not None and image_labels is not None:
1061            if len(images) != len(image_labels):
1062                raise ValueError("Intensity images and labels must match lengths.")
1063
1064        mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties)
1065
1066        if len(mask_info) == 0:
1067            return EventArray()
1068
1069        # Combine provided info and mask info
1070        info = pd.DataFrame(
1071            {
1072                "slide_id": tile.scan.slide_id,
1073                "tile": tile.n,
1074                "roi": tile.n_roi,
1075                "x": mask_info["x"],
1076                "y": mask_info["y"],
1077            },
1078        )
1079        # Extract a metadata column if desired
1080        if include_cell_id:
1081            metadata = pd.DataFrame({"cell_id": mask_info["id"]})
1082        else:
1083            metadata = None
1084        # If any additional properties were extracted, add them as features
1085        mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore")
1086        if len(mask_info.columns) > 0:
1087            features = mask_info
1088            features.columns = [col.lower() for col in features.columns]
1089        else:
1090            features = None
1091        return EventArray(info, metadata, features)
1092
1093    def save_csv(self, output_path: str) -> bool:
1094        """
1095        Save the events to an CSV file, including metadata and features.
1096        :param output_path:
1097        :return:
1098        """
1099        if not output_path.endswith(".csv"):
1100            output_path += ".csv"
1101        self.to_dataframe().to_csv(output_path, index=False)
1102        return os.path.exists(output_path)
1103
1104    @classmethod
1105    def load_csv(
1106        cls,
1107        input_path: str,
1108        metadata_prefix: str = "metadata_",
1109        features_prefix: str = "features_",
1110    ) -> Self:
1111        """
1112        Load the events from an CSV file, including metadata and features.
1113        :param input_path:
1114        :param metadata_prefix:
1115        :param features_prefix:
1116        :return:
1117        """
1118        # Load the CSV file
1119        df = pd.read_csv(input_path)
1120        return cls.from_dataframe(df, metadata_prefix, features_prefix)
1121
1122    def save_hdf5(self, output_path: str) -> bool:
1123        """
1124        Save the events to an HDF5 file, including metadata and features.
1125        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
1126        though these files are slightly harder to view in HDFView or similar.
1127        :param output_path:
1128        :return:
1129        """
1130        if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"):
1131            output_path += ".hdf5"
1132        # Open the output_path as an HDF5 file
1133        with pd.HDFStore(output_path) as store:
1134            # Store the dataframes in the HDF5 file
1135            if self.info is not None:
1136                store.put("info", self.info, index=False)
1137            if self.metadata is not None:
1138                store.put("metadata", self.metadata, index=False)
1139            if self.features is not None:
1140                store.put("features", self.features, index=False)
1141        return os.path.exists(output_path)
1142
1143    @classmethod
1144    def load_hdf5(cls, input_path: str) -> Self:
1145        """
1146        Load the events from an HDF5 file, including metadata and features.
1147        :param input_path:
1148        :return:
1149        """
1150        # Open the input_path as an HDF5 file
1151        with pd.HDFStore(input_path, "r") as store:
1152            # Load the dataframes from the HDF5 file
1153            info = store.get("info") if "info" in store else None
1154            metadata = store.get("metadata") if "metadata" in store else None
1155            features = store.get("features") if "features" in store else None
1156        return cls(info=info, metadata=metadata, features=features)
1157
1158    def save_ocular(self, output_path: str, event_type: str = "cells"):
1159        """
1160        Save the events to an OCULAR file. Relies on the dataframe originating
1161        from an OCULAR file (same columns; duplicate metadata/info).
1162        :param output_path:
1163        :param event_type:
1164        :return:
1165        """
1166        if pyreadr is None:
1167            raise ModuleNotFoundError(
1168                "pyreadr not installed! Install pyreadr directly "
1169                "or run `pip install csi-images[rds]` option to resolve."
1170            )
1171        if event_type == "cells":
1172            file_stub = "rc-final"
1173        elif event_type == "others":
1174            file_stub = "others-final"
1175        else:
1176            raise ValueError("Invalid event type. Must be cells or others.")
1177
1178        # Ensure good metadata
1179        metadata = pd.DataFrame(
1180            {
1181                "slide_id": self.info["slide_id"],
1182                "frame_id": self.info["tile"] + 1,  # Convert to 1-indexed for R
1183                "cell_id": (
1184                    self.metadata["cell_id"]
1185                    if "cell_id" in self.metadata.columns
1186                    else range(len(self.info))
1187                ),
1188                "cellx": self.info["x"],
1189                "celly": self.info["y"],
1190            }
1191        )
1192        if self.metadata is not None:
1193            metadata[self.metadata.columns] = self.metadata.copy()
1194
1195        # Check for the "ocular_interesting" column
1196        if event_type == "cells":
1197            if "ocular_interesting" in metadata.columns:
1198                interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool)
1199            elif "hcpc" in metadata.columns:
1200                # Interesting cells don't get an hcpc designation, leaving them as -1
1201                interesting_rows = (
1202                    metadata["hcpc"].to_numpy() == -1
1203                )  # interesting cells
1204            else:
1205                interesting_rows = []
1206            if sum(interesting_rows) > 0:
1207                # Split the metadata into interesting and regular
1208                interesting_events = self.rows(interesting_rows)
1209                interesting_df = pd.concat(
1210                    [interesting_events.features, interesting_events.metadata], axis=1
1211                )
1212                data_events = self.rows(~interesting_rows)
1213                data_df = pd.concat(
1214                    [data_events.features, data_events.metadata], axis=1
1215                )
1216                data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore")
1217
1218                # Drop particular columns for "interesting"
1219                interesting_df = interesting_df.drop(
1220                    [
1221                        "clust",
1222                        "hcpc",
1223                        "frame_id",
1224                        "cell_id",
1225                        "unique_id",
1226                        "ocular_interesting",
1227                    ],
1228                    axis=1,
1229                    errors="ignore",
1230                )
1231                # Save both .csv and .rds
1232                interesting_stub = os.path.join(output_path, "ocular_interesting")
1233                interesting_df.to_csv(f"{interesting_stub}.csv")
1234                # Suppress pandas FutureWarning
1235                with warnings.catch_warnings():
1236                    warnings.simplefilter(action="ignore", category=FutureWarning)
1237                    pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df)
1238            else:
1239                data_df = pd.concat([self.features, metadata], axis=1)
1240        else:
1241            # Get all data and reset_index (will copy it)
1242            data_df = pd.concat([self.features, metadata], axis=1)
1243
1244        # Split based on cluster number to conform to *-final[1-4].rds
1245        n_clusters = max(data_df["clust"]) + 1
1246        split_idx = [round(i * n_clusters / 4) for i in range(5)]
1247        for i in range(4):
1248            subset = (split_idx[i] <= data_df["clust"]) & (
1249                data_df["clust"] < split_idx[i + 1]
1250            )
1251            data_df.loc[subset, "hcpc"] = i + 1
1252            subset = data_df[subset].reset_index(drop=True)
1253            # Suppress pandas FutureWarning
1254            with warnings.catch_warnings():
1255                warnings.simplefilter(action="ignore", category=FutureWarning)
1256                pyreadr.write_rds(
1257                    os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset
1258                )
1259
1260        # Create new example cell strings
1261        data_df["example_cell_id"] = (
1262            data_df["slide_id"]
1263            + " "
1264            + data_df["frame_id"].astype(str)
1265            + " "
1266            + data_df["cell_id"].astype(str)
1267            + " "
1268            + data_df["cellx"].astype(int).astype(str)
1269            + " "
1270            + data_df["celly"].astype(int).astype(str)
1271        )
1272        # Find averagable data columns
1273        if "cellcluster_id" in data_df.columns:
1274            end_idx = data_df.columns.get_loc("cellcluster_id")
1275        else:
1276            end_idx = data_df.columns.get_loc("slide_id")
1277        avg_cols = data_df.columns[:end_idx].tolist()
1278        # Group by cluster and average
1279        data_df = data_df.groupby("clust").agg(
1280            **{col: (col, "mean") for col in avg_cols},
1281            count=("clust", "size"),  # count rows in each cluster
1282            example_cells=("example_cell_id", lambda x: ",".join(x)),
1283            hcpc=("hcpc", lambda x: x.iloc[0]),
1284        )
1285        data_df = data_df.reset_index()  # Do NOT drop, index is "clust"
1286        # Create new columns
1287        metadata = pd.DataFrame(
1288            {
1289                "count": data_df["count"],
1290                "example_cells": data_df["example_cells"],
1291                "clust": data_df["clust"].astype(int),
1292                "hcpc": data_df["hcpc"].astype(int),
1293                "id": data_df["clust"].astype(int).astype(str),
1294                "cccluster": "0",  # Dummy value
1295                "ccdistance": 0.0,  # Dummy value
1296                "rownum": list(range(len(data_df))),
1297                "framegroup": 0,  # Dummy value
1298            }
1299        )
1300        # Need to pad the features to 761 columns, as per OCULAR report needs
1301        additional_columns = range(len(avg_cols), 761)
1302        if len(additional_columns) > 0:
1303            padding = pd.DataFrame(
1304                np.zeros((len(data_df), len(additional_columns))),
1305                columns=[f"pad{i}" for i in additional_columns],
1306            )
1307            data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1)
1308        else:
1309            data_df = pd.concat([data_df[avg_cols], metadata], axis=1)
1310
1311        # Save the cluster data
1312        data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv"))
1313        # Suppress pandas FutureWarning
1314        with warnings.catch_warnings():
1315            warnings.simplefilter(action="ignore", category=FutureWarning)
1316            pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df)
1317
1318    @classmethod
1319    def load_ocular(
1320        cls,
1321        input_path: str,
1322        event_type="cells",
1323        cell_data_files=(
1324            "rc-final1.rds",
1325            "rc-final2.rds",
1326            "rc-final3.rds",
1327            "rc-final4.rds",
1328            "ocular_interesting.rds",
1329        ),
1330        others_data_files=(
1331            "others-final1.rds",
1332            "others-final2.rds",
1333            "others-final3.rds",
1334            "others-final4.rds",
1335        ),
1336        atlas_data_files=(
1337            "ocular_interesting.rds",
1338            "ocular_not_interesting.rds",
1339        ),
1340        drop_common_events=True,
1341    ) -> Self:
1342        """
1343
1344        :param input_path:
1345        :param event_type:
1346        :param cell_data_files:
1347        :param others_data_files:
1348        :param atlas_data_files:
1349        :param drop_common_events:
1350        :return:
1351        """
1352        if pyreadr is None:
1353            raise ModuleNotFoundError(
1354                "pyreadr not installed! Install pyreadr directly "
1355                "or run `pip install csi-images[rds]` option to resolve."
1356            )
1357        # Check if the input path is a directory or a file
1358        if os.path.isfile(input_path):
1359            data_files = [os.path.basename(input_path)]
1360            input_path = os.path.dirname(input_path)
1361        if event_type == "cells":
1362            data_files = cell_data_files
1363        elif event_type == "others":
1364            data_files = others_data_files
1365        else:
1366            raise ValueError("Invalid event type.")
1367
1368        # Load the data from the OCULAR files
1369        file_data = {}
1370        for file in data_files:
1371            file_path = os.path.join(input_path, file)
1372            if not os.path.isfile(file_path):
1373                warnings.warn(f"{file} not found for in {input_path}")
1374                continue
1375            file_data[file] = pyreadr.read_r(file_path)
1376            # Get the DataFrame associated with None (pyreadr dict quirk)
1377            file_data[file] = file_data[file][None]
1378            if len(file_data[file]) == 0:
1379                # File gets dropped from the dict
1380                file_data.pop(file)
1381                warnings.warn(f"{file} has no cells")
1382                continue
1383
1384            # Drop common cells if requested and in this file
1385            if (
1386                file in atlas_data_files
1387                and drop_common_events
1388                and "catalogue_classification" in file_data[file]
1389            ):
1390                common_cell_indices = (
1391                    file_data[file]["catalogue_classification"] == "common_cell"
1392                )
1393                file_data[file] = file_data[file][common_cell_indices == False]
1394
1395            if len(file_data[file]) == 0:
1396                # File gets dropped from the dict
1397                file_data.pop(file)
1398                warnings.warn(f"{file} has no cells after dropping common cells")
1399                continue
1400
1401            # Extract frame_id and cell_id
1402            # DAPI- events already have frame_id cell_id outside rowname
1403            if event_type == "cells" and "frame_id" not in file_data[file].columns:
1404                file_data[file]["rowname"] = file_data[file]["rowname"].astype("str")
1405                # get frame_id cell_id from rownames column and split into two columns
1406                split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True)
1407                if len(split_res.columns) != 2:
1408                    warnings.warn(
1409                        f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}'
1410                    )
1411                # then assign it back to the dataframe
1412                file_data[file][["frame_id", "cell_id"]] = split_res.astype("int")
1413            # Ensure frame_id and cell_id are integers
1414            file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int")
1415            file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int")
1416            # reset indexes since they can cause NaN values in concat
1417            file_data[file] = file_data[file].reset_index(drop=True)
1418
1419        # Merge the data from all files
1420        if len(file_data) == 0:
1421            return EventArray()
1422        elif len(file_data) == 1:
1423            data = [file_data[file] for file in file_data.keys()][0]
1424        else:
1425            data = pd.concat(file_data.values())
1426
1427        # Others is missing the "slide_id". Insert it right before "frame_id" column
1428        if event_type == "others" and "slide_id" not in data.columns:
1429            if os.path.basename(input_path) == "ocular":
1430                slide_id = os.path.basename(os.path.dirname(input_path))
1431            else:
1432                slide_id = "UNKNOWN"
1433            data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id)
1434
1435        # Sort according to ascending cell_id to keep the original, which is in manual_df
1436        data = data.sort_values(by=["cell_id"], ascending=True)
1437        # Filter out duplicates by x & y
1438        data = data.assign(
1439            unique_id=data["slide_id"]
1440            + "_"
1441            + data["frame_id"].astype(str)
1442            + "_"
1443            + data["cellx"].astype(int).astype(str)
1444            + "_"
1445            + data["celly"].astype(int).astype(str)
1446        )
1447        data = data.drop_duplicates(subset=["unique_id"], keep="first")
1448        # Normal unique_id is with cell_id
1449        data = data.assign(
1450            unique_id=data["slide_id"]
1451            + "_"
1452            + data["frame_id"].astype(str)
1453            + "_"
1454            + data["cell_id"].astype(str)
1455        )
1456        data = data.reset_index(drop=True)
1457        # All columns up to "slide_id" are features; drop the "slide_id"
1458        features = data.loc[:, :"slide_id"].iloc[:, :-1]
1459        data = data.loc[:, "slide_id":]
1460        # Grab the info columns
1461        info = data[["slide_id", "frame_id", "cellx", "celly"]]
1462        info.columns = ["slide_id", "tile", "x", "y"]
1463        info = info.assign(roi=0)  # OCULAR only works on 1 ROI, as far as known
1464        info = info[["slide_id", "tile", "roi", "x", "y"]]
1465        # Metadata has duplicate columns for later convenience
1466        metadata = data
1467        # Certain columns tend to be problematic with mixed data formats...
1468        for col in ["TRITC", "CY5", "FITC"]:
1469            if col in metadata:
1470                labels = {
1471                    "False": False,
1472                    "True": True,
1473                    "FALSE": False,
1474                    "TRUE": True,
1475                    False: False,
1476                    True: True,
1477                }
1478                metadata[col] = metadata[col].map(labels).astype(bool)
1479        for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]:
1480            if col in metadata:
1481                metadata[col] = metadata[col].fillna(-1).astype(int)
1482        info["tile"] = info["tile"] - 1  # Convert to 0-based indexing
1483        return EventArray(info, metadata, features)
class Event:
 40class Event:
 41    """
 42    A class that represents a single event in a scan, making it easy to evaluate
 43    singular events. Required metadata is exposed as attributes, and optional
 44    metadata and features are stored as DataFrames.
 45    """
 46
 47    SCAN_TO_SLIDE_TRANSFORM = {
 48        # Axioscan zero is in the top-right corner instead of top-left
 49        Scan.Type.AXIOSCAN7: np.array(
 50            [
 51                [1, 0, 75000],
 52                [0, 1, 0],
 53                [0, 0, 1],
 54            ]
 55        ),
 56        # BZScanner coordinates are a special kind of messed up:
 57        # - The slide is upside-down.
 58        # - The slide is oriented vertically, with the barcode at the bottom.
 59        # - Tiles are numbered from the top-right
 60        Scan.Type.BZSCANNER: np.array(
 61            [
 62                [0, -1, 75000],
 63                [-1, 0, 25000],
 64                [0, 0, 1],
 65            ]
 66        ),
 67    }
 68    """
 69    Homogeneous transformation matrices for converting between scanner and slide
 70    coordinates. The matrices are 3x3, with the final column representing the
 71    translation in micrometers (um). For more information, see 
 72    [affine transformations](https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations).
 73    
 74    Transformations are nominal, and accuracy is not guaranteed; this is due to 
 75    imperfections in slides and alignment in the scanners. Units are in micrometers.
 76    """
 77
 78    def __init__(
 79        self,
 80        tile: Tile,
 81        x: int,
 82        y: int,
 83        metadata: pd.Series = None,
 84        features: pd.Series = None,
 85    ):
 86        self.tile = tile
 87        self.x = int(x)
 88        self.y = int(y)
 89        self.metadata = metadata
 90        self.features = features
 91
 92    def __repr__(self) -> str:
 93        return f"{self.tile}-{self.x}-{self.y}"
 94
 95    def __eq__(self, other) -> bool:
 96        return self.__repr__() == other.__repr__()
 97
 98    def __lt__(self, other):
 99        return self.__repr__() < other.__repr__()
100
101    def get_scan_position(self) -> tuple[float, float]:
102        """
103        Get the position of the event in the scanner's coordinate frame.
104        :return: the scan position of the event in micrometers (um).
105        """
106        # Get overall pixel position
107        real_tile_height, real_tile_width = self.tile.scan.get_image_size()
108        pixel_x = self.x + (real_tile_width * self.tile.x)
109        pixel_y = self.y + (real_tile_height * self.tile.y)
110        # Convert to micrometers
111        x_um = pixel_x * self.tile.scan.pixel_size_um
112        y_um = pixel_y * self.tile.scan.pixel_size_um
113        # Add the scan's origin in the scanner frame
114        x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um
115        y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um
116        return x_um, y_um
117
118    def get_slide_position(self) -> tuple[float, float]:
119        """
120        Get the slide position of the event in micrometers (um).
121        :return: the slide position of the event.
122        """
123        # Turn scan_position into a 3x1 vector
124        scan_position = self.get_scan_position()
125        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
126
127        # Multiply by the appropriate homogeneous matrix
128        if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value):
129            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7]
130        elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value):
131            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER]
132        else:
133            raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.")
134        slide_position = np.matmul(transform, scan_position)
135        return float(slide_position[0][0]), float(slide_position[1][0])
136
137    def crop(
138        self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True
139    ) -> list[np.ndarray]:
140        """
141        Crop the event from the provided frame images. Use if you have already gotten
142        frame images; useful for cropping multiple events from the same frame image.
143        :param images: the frame images.
144        :param crop_size: the square size of the image crop to get for this event.
145        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
146        :return: image_size x image_size crops of the event in the provided frames. If
147        the event is too close to the edge, the crop will be smaller and not centered.
148        """
149        # Convert a crop size in micrometers to pixels
150        if not in_pixels:
151            crop_size = round(crop_size / self.tile.scan.pixel_size_um)
152        image_height, image_width = 0, 0
153        for image in images:
154            if image_height == 0 and image_width == 0:
155                image_height, image_width = image.shape
156            else:
157                if image_height != image.shape[0] or image_width != image.shape[1]:
158                    raise ValueError("All images must be the same size")
159        if image_height == 0 or image_width == 0:
160            raise ValueError("No images provided")
161
162        # Find the crop bounds
163        bounds = [
164            self.x - (crop_size // 2) + 1,
165            self.y - (crop_size // 2) + 1,
166            self.x + math.ceil(crop_size / 2) + 1,
167            self.y + math.ceil(crop_size / 2) + 1,
168        ]
169        # Determine how much the bounds violate the image size
170        displacements = [
171            max(0, -bounds[0]),
172            max(0, -bounds[1]),
173            max(0, bounds[2] - image_width),
174            max(0, bounds[3] - image_height),
175        ]
176        # Cap off the bounds
177        bounds = [
178            max(0, bounds[0]),
179            max(0, bounds[1]),
180            min(image_width, bounds[2]),
181            min(image_height, bounds[3]),
182        ]
183
184        # Crop the images
185        crops = []
186        for image in images:
187            # Create a blank image of the right size
188            crop = np.zeros((crop_size, crop_size), dtype=image.dtype)
189
190            # Insert the cropped image into the blank image, leaving a black buffer
191            # around the edges if the crop would go beyond the original image bounds
192            crop[
193                displacements[1] : crop_size - displacements[3],
194                displacements[0] : crop_size - displacements[2],
195            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
196            crops.append(crop)
197        return crops
198
199    def get_crops(
200        self,
201        crop_size: int = 100,
202        in_pixels: bool = True,
203        input_path: str = None,
204        channels: Iterable[int | str] = None,
205        apply_gain: bool | Iterable[bool] = True,
206    ) -> list[np.ndarray]:
207        """
208        Gets the frame images for this event and then crops the event from the images.
209        Convenient for retrieving a single event's crops, but less efficient when
210        retrieving multiple events from the same tile as it will reread the images.
211        :param crop_size: the square size of the image crop to get for this event.
212        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
213        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
214        :param channels: the channels to extract images for. Defaults to all channels.
215        :param apply_gain: whether to apply scanner-calculated gain to the images, if
216        not already applied. If a list, matches the channels.
217        :return: a list of cropped images from the scan in the order of the channels.
218        """
219        # This function validates channels
220        frames = Frame.get_frames(self.tile, channels)
221        # Convert individual inputs to lists of appropriate length
222        if isinstance(apply_gain, bool):
223            apply_gain = [apply_gain] * len(frames)
224        images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)]
225        return self.crop(images, crop_size, in_pixels)
226
227    def save_crops(
228        self,
229        crops: Sequence[np.ndarray],
230        output_path: str,
231        labels: Sequence[str],
232        ext: str = "auto",
233    ):
234        """
235        Save the crops to image files.
236        :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or
237        grayscale if 1 channel [h, w] or [h, w, 1].
238        :param labels: the labels to append to the file name, usually the channel names
239        associated with each crop.
240        :param output_path: the folder to save the crops to. Will make if needed.
241        :param ext: the file extension to save the crops as. Defaults to "auto", which
242        will save as .tif for grayscale images and .jpg for RGB images.
243        :return: None
244        """
245        if len(crops) != len(labels):
246            raise ValueError("Crops and labels must be the same length")
247
248        if csi_images is None or imageio is None:
249            raise ModuleNotFoundError(
250                "imageio libraries not installed! "
251                "run `pip install csi_images[imageio]` to resolve."
252            )
253
254        os.makedirs(output_path, exist_ok=True)
255
256        for crop, label in zip(crops, labels):
257            if ext == "auto":
258                if len(crop.shape) == 2 or crop.shape[2] == 1:
259                    file_extension = ".tif"
260                elif crop.shape[2] == 3:
261                    file_extension = ".jpg"
262                else:
263                    warnings.warn(
264                        f"Image shape {crop.shape} not recognized; saving as .tif"
265                    )
266                    file_extension = ".tif"
267            else:
268                file_extension = ext
269            file = os.path.join(output_path, f"{self}-{label}{file_extension}")
270            # TODO: add more file types here
271            if file_extension == ".tif":
272                imageio.imwrite(file, crop, compression="deflate")
273            elif file_extension in [".jpg", ".jpeg"]:
274                crop = csi_images.scale_bit_depth(crop, np.uint8)
275                imageio.imwrite(file, crop, quality=80)
276            else:
277                imageio.imwrite(file, crop)
278
279    def load_crops(
280        self, input_path: str, labels: list[str] = None
281    ) -> dict[str, np.ndarray]:
282        """
283        Loads previously saved crop files from a folder.
284        :param input_path: folder containing crop files.
285        :param labels: optional label filter, will only return crops with these labels.
286        :return: a tuple of lists containing the crops and their labels.
287        """
288        crops = {}
289        for file in glob.glob(os.path.join(input_path, f"{self}-*")):
290            label = os.path.splitext(os.path.basename(file))[0].split("-")[-1]
291            # Skip if we have labels to target
292            if labels is not None and label not in labels:
293                continue
294            crops[label] = imageio.imread(file)
295        return crops
296
297    def get_montage_channels(
298        self,
299        channels: Sequence[int | str] | None,
300        composites: dict[int | str, tuple[float, float, float]] | None,
301    ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]:
302        """
303        Get the channel names for the montage from the event's tile.
304        :param channels: channel indices or names for grayscale channels
305        :param composites: dictionary of channel indices or names and RGB values
306        :return: (1) channel indices to retrieve,
307                 (2) relative grayscale channel indices, and
308                 (3) composite channel indices and RGB values.
309        """
310        if channels is None:
311            channels = list(range(len(self.tile.scan.channels)))
312        if (len(channels) == 0) and (composites is None or len(composites) == 0):
313            raise ValueError("Must provide at least one channel type to montage")
314
315        channels_to_get = []
316
317        # Build the list of channels to retrieve
318        if channels is not None:
319            if isinstance(channels[0], str):
320                channels = self.tile.scan.get_channel_indices(channels)
321            channels_to_get += channels
322            order = list(range(len(channels)))  # Always the first n channels
323        else:
324            order = None
325
326        if composites is not None:
327            relative_composites = {}  # Relative indices for retrieved channels
328            # Convert to scan indices
329            rgb_channels = list(composites.keys())
330            if isinstance(rgb_channels[0], str):
331                rgb_channels = self.tile.scan.get_channel_indices(rgb_channels)
332            # Find the index or add to the end
333            for channel, rgb in zip(rgb_channels, composites.values()):
334                if channel not in channels_to_get:
335                    channels_to_get.append(channel)
336                    relative_composites[channel] = rgb
337                else:
338                    relative_composites[channels_to_get.index(channel)] = rgb
339        else:
340            relative_composites = None
341
342        return channels_to_get, order, relative_composites
343
344    def get_montage(
345        self,
346        channels: Sequence[int | str] = None,
347        composites: dict[int | str, tuple[float, float, float]] = None,
348        mask: np.ndarray[np.uint8] = None,
349        labels: Sequence[str] = None,
350        crop_size: int = 100,
351        in_pixels: bool = True,
352        input_path: str = None,
353        apply_gain: bool = True,
354        **kwargs,
355    ) -> np.ndarray:
356        """
357        Convenience function for getting frame images and creating a montage. Mirrors
358        csi_images.make_montage(). Convenient for a single event's montage, but less
359        efficient when for multiple events from the same tile.
360        :param channels: the channels to use for black-and-white montages.
361        :param composites: dictionary of indices and RGB tuples for a composite.
362        :param mask: a mask to apply to the montage. Must be the same size as the crop.
363        :param crop_size: the square size of the image crop to get for this event.
364        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
365        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
366        :param apply_gain: whether to apply scanner-calculated gain to the images, if
367        not already applied. If a list, matches the channels.
368        :param kwargs: montage options. See csi_images.make_montage() for more details.
369        :return: numpy array representing the montage.
370        """
371        channels, order, composites = self.get_montage_channels(channels, composites)
372        images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain)
373        return csi_images.make_montage(
374            images, order, composites, mask, labels, **kwargs
375        )
376
377    def save_montage(
378        self,
379        montage: np.ndarray,
380        output_path: str,
381        ocular_names: bool = False,
382        tag: str = "",
383        file_extension: str = ".jpeg",
384        **kwargs,
385    ):
386        """
387        Save the montage as a JPEG image with a set name.
388        :param montage: the montage to save.
389        :param output_path: the folder to save the montage in. Will make if needed.
390        :param ocular_names: whether to use the OCULAR naming convention.
391        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
392        :param file_extension: the file extension to save the montage as. Defaults to .jpeg.
393        :param kwargs: additional arguments to pass to imageio.imwrite().
394        :return: None
395        """
396        if csi_images is None or imageio is None:
397            raise ModuleNotFoundError(
398                "imageio libraries not installed! "
399                "run `pip install csi_images[imageio]` to resolve."
400            )
401
402        montage = csi_images.scale_bit_depth(montage, np.uint8)
403
404        if not file_extension.startswith("."):
405            file_extension = f".{file_extension}"
406
407        if ocular_names:
408            if "cell_id" not in self.metadata.index:
409                raise ValueError(
410                    "Event metadata must include 'cell_id' for OCULAR naming."
411                )
412            file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}"
413        else:
414            file = f"{self}{tag}{file_extension}"
415
416        os.makedirs(output_path, exist_ok=True)
417        imageio.imwrite(os.path.join(output_path, file), montage, **kwargs)
418
419    def load_montage(self, input_path: str, tag: str = "") -> np.ndarray:
420        """
421        Loads the montage from a file saved by Event.save_montage.
422        :param input_path: the path to the folder where the montage was saved.
423        :param tag: a string to add to the file name, before the extension.
424        :return:
425        """
426        file = f"{self}{tag}.jpeg"
427        return imageio.imread(os.path.join(input_path, file))
428
429    @classmethod
430    def get_many_crops(
431        cls,
432        events: Sequence[Self],
433        crop_size: int | Sequence[int] = 100,
434        in_pixels: bool = True,
435        input_path: str | Sequence[str] = None,
436        channels: Sequence[int | str] = None,
437        apply_gain: bool | Sequence[bool] = True,
438    ) -> list[list[np.ndarray]]:
439        """
440        Get the crops for a list of events, ensuring that there is no wasteful reading
441        of the same tile multiple times. This function is more efficient than calling
442        get_crops() for each event.
443        :param events: the events to get crops for.
444        :param crop_size: the square size of the image crop to get for this event.
445                          Defaults to four times the size of the event.
446        :param in_pixels: whether the crop size is in pixels or micrometers.
447                          Defaults to pixels, and is ignored if crop_size is None.
448        :param input_path: the path to the input images. Will only work for lists of events
449                           from the same scan. Defaults to None (uses the scan's path).
450        :param channels: the channels to extract images for. Defaults to all channels.
451        :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True.
452                           Can be supplied as a list to apply gain to individual channels.
453        :return: a list of lists of cropped images for each event.
454        """
455        if len(events) == 0:
456            return []
457        # Adapt singular inputs to lists of appropriate length
458        if isinstance(crop_size, int):
459            crop_size = [crop_size] * len(events)
460        if input_path is None or isinstance(input_path, str):
461            input_path = [input_path] * len(events)
462
463        # Get the order of the events when sorted by slide/tile
464        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
465
466        # Allocate the list to size
467        crops = [[]] * len(events)
468        last_tile = None
469        images = None  # Holds large numpy arrays, so expensive to compare
470        # Iterate through in slide/tile sorted order
471        for i in order:
472            if last_tile != events[i].tile:
473                # Gather the frame images, preserving them for the next event
474                frames = Frame.get_frames(events[i].tile, channels)
475                if isinstance(apply_gain, bool):
476                    apply = [apply_gain] * len(frames)
477                else:
478                    apply = apply_gain
479                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
480                last_tile = events[i].tile
481            # Use the frame images to crop the event images
482            crops[i] = events[i].crop(images, crop_size[i], in_pixels)
483        return crops
484
485    @classmethod
486    def get_many_montages(
487        cls,
488        events: Sequence[Self],
489        channels: Sequence[int | str] = None,
490        composites: dict[int | str, tuple[float, float, float]] = None,
491        masks: Sequence[np.ndarray[np.uint8]] = None,
492        labels: Sequence[str] = None,
493        crop_size: int = 100,
494        in_pixels: bool = True,
495        input_path: str = None,
496        apply_gain: bool | Iterable[bool] = True,
497        **kwargs,
498    ) -> list[np.ndarray]:
499        """
500        Convenience function for get_montage(), but for a list of events. More efficient
501        than get_montage() when working with multiple events from the same tile.
502        :param events: a list of Event objects.
503        :param channels: the channels to extract images for. Defaults to all channels.
504        :param composites: dictionary of indices and RGB tuples for a composite.
505        :param masks: a list of masks to apply to the montages. Must be the same size as the crops.
506        :param labels: the labels to subtitle montage images, usually the channel names
507        :param crop_size: the square size of the image crop to get for this event.
508        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
509        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
510        :param apply_gain: whether to apply scanner-calculated gain to the images, if
511        not already applied. If a list, matches the channels.
512        :param kwargs: montage options. See csi_images.make_montage() for more details.
513        :return: a list of numpy arrays representing the montages.
514        """
515        if len(events) == 0:
516            return []
517        # Adapt singular inputs to lists of appropriate length
518        if isinstance(crop_size, int):
519            crop_size = [crop_size] * len(events)
520        if input_path is None or isinstance(input_path, str):
521            input_path = [input_path] * len(events)
522        if masks is None or isinstance(masks, np.ndarray):
523            masks = [masks] * len(events)
524
525        # Get the order of the events when sorted by slide/tile
526        event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
527
528        # Allocate the list to size
529        montages = [np.empty(0)] * len(events)
530        # Placeholder variables to avoid rereading the same tile
531        images = None  # Holds large numpy arrays, so expensive to compare
532        order = None
533        rel_composites = None
534        last_tile = None
535        # Iterate through in slide/tile sorted order
536        for i in event_order:
537            if last_tile != events[i].tile:
538                channels_to_get, order, rel_composites = events[i].get_montage_channels(
539                    channels, composites
540                )
541                # Gather the frame images, preserving them for the next event
542                frames = Frame.get_frames(events[i].tile, channels_to_get)
543                if isinstance(apply_gain, bool):
544                    apply = [apply_gain] * len(frames)
545                else:
546                    apply = apply_gain
547                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
548                last_tile = events[i].tile
549            # Use the frame images to crop the event images and make montages
550            crops = events[i].crop(images, crop_size[i], in_pixels)
551            montages[i] = csi_images.make_montage(
552                crops, order, rel_composites, masks[i], labels, **kwargs
553            )
554
555        return montages
556
557    @classmethod
558    def get_and_save_many_crops(
559        cls,
560        events: list[Self],
561        output_path: str,
562        labels: Sequence[str],
563        ext: str = "auto",
564        additional_gain: Sequence[float] = None,
565        **kwargs,
566    ) -> None:
567        """
568        Get and save the crops for a list of events, ensuring that there is no wasteful
569        reading and limiting the image data in memory to 1 tile at a time. This function
570        is more efficient that chaining get_crops() and save_crops() for each event or
571        get_many_crops() and then save_crops().
572        :param events: list of events to get, crop, and save.
573        :param output_path: the folder to save the crops in. Will make if needed.
574        :param labels: the labels to save the crops with. See save_crops().
575        :param ext: the file extension to save the crops as. See save_crops().
576        :param additional_gain: additional gain to apply to the crops. If not None, must
577        match the length of the number of crop channels.
578        :param kwargs: see get_many_crops() for more parameters.
579        :return:
580        """
581        unique_tiles = set([event.tile for event in events])
582
583        for tile in unique_tiles:
584            # Get one tile's worth of event crops
585            tile_events = [e for e in events if e.tile == tile]
586            crops_list = cls.get_many_crops(tile_events, **kwargs)
587            for event, crops in zip(tile_events, crops_list):
588                # Apply any additional gains
589                if additional_gain is not None:
590                    crops = [gain * crop for gain, crop in zip(additional_gain, crops)]
591                event.save_crops(crops, output_path, labels, ext)
592
593    @classmethod
594    def get_and_save_many_montages(
595        cls,
596        events: list[Self],
597        output_path: str,
598        ocular_names: bool = False,
599        tag: str = "",
600        **kwargs,
601    ) -> None:
602        """
603        Save montages of the events to image files.
604        :param events: the events to get, montage, and save.
605        :param output_path: the folder to save the montages to. Will make if needed.
606        :param ocular_names: whether to use the OCULAR naming convention.
607        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
608        :param kwargs: see get_many_montages() for more parameters.
609        """
610        unique_tiles = set([event.tile for event in events])
611
612        for tile in unique_tiles:
613            # Get one tile's worth of event crops
614            tile_events = [e for e in events if e.tile == tile]
615            montages = cls.get_many_montages(tile_events, **kwargs)
616            for event, montage in zip(tile_events, montages):
617                event.save_montage(montage, output_path, ocular_names, tag)

A class that represents a single event in a scan, making it easy to evaluate singular events. Required metadata is exposed as attributes, and optional metadata and features are stored as DataFrames.

Event( tile: csi_images.csi_tiles.Tile, x: int, y: int, metadata: pandas.core.series.Series = None, features: pandas.core.series.Series = None)
78    def __init__(
79        self,
80        tile: Tile,
81        x: int,
82        y: int,
83        metadata: pd.Series = None,
84        features: pd.Series = None,
85    ):
86        self.tile = tile
87        self.x = int(x)
88        self.y = int(y)
89        self.metadata = metadata
90        self.features = features
SCAN_TO_SLIDE_TRANSFORM = {<Type.AXIOSCAN7: 'axioscan7'>: array([[ 1, 0, 75000], [ 0, 1, 0], [ 0, 0, 1]]), <Type.BZSCANNER: 'bzscanner'>: array([[ 0, -1, 75000], [ -1, 0, 25000], [ 0, 0, 1]])}

Homogeneous transformation matrices for converting between scanner and slide coordinates. The matrices are 3x3, with the final column representing the translation in micrometers (um). For more information, see affine transformations.

Transformations are nominal, and accuracy is not guaranteed; this is due to imperfections in slides and alignment in the scanners. Units are in micrometers.

tile
x
y
metadata
features
def get_scan_position(self) -> tuple[float, float]:
101    def get_scan_position(self) -> tuple[float, float]:
102        """
103        Get the position of the event in the scanner's coordinate frame.
104        :return: the scan position of the event in micrometers (um).
105        """
106        # Get overall pixel position
107        real_tile_height, real_tile_width = self.tile.scan.get_image_size()
108        pixel_x = self.x + (real_tile_width * self.tile.x)
109        pixel_y = self.y + (real_tile_height * self.tile.y)
110        # Convert to micrometers
111        x_um = pixel_x * self.tile.scan.pixel_size_um
112        y_um = pixel_y * self.tile.scan.pixel_size_um
113        # Add the scan's origin in the scanner frame
114        x_um += self.tile.scan.roi[self.tile.n_roi].origin_x_um
115        y_um += self.tile.scan.roi[self.tile.n_roi].origin_y_um
116        return x_um, y_um

Get the position of the event in the scanner's coordinate frame.

Returns

the scan position of the event in micrometers (um).

def get_slide_position(self) -> tuple[float, float]:
118    def get_slide_position(self) -> tuple[float, float]:
119        """
120        Get the slide position of the event in micrometers (um).
121        :return: the slide position of the event.
122        """
123        # Turn scan_position into a 3x1 vector
124        scan_position = self.get_scan_position()
125        scan_position = np.array([[scan_position[0]], [scan_position[1]], [1]])
126
127        # Multiply by the appropriate homogeneous matrix
128        if self.tile.scan.scanner_id.startswith(self.tile.scan.Type.AXIOSCAN7.value):
129            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.AXIOSCAN7]
130        elif self.tile.scan.scanner_id.startswith(self.tile.scan.Type.BZSCANNER.value):
131            transform = self.SCAN_TO_SLIDE_TRANSFORM[self.tile.scan.Type.BZSCANNER]
132        else:
133            raise ValueError(f"Scanner type {self.tile.scan.scanner_id} not supported.")
134        slide_position = np.matmul(transform, scan_position)
135        return float(slide_position[0][0]), float(slide_position[1][0])

Get the slide position of the event in micrometers (um).

Returns

the slide position of the event.

def crop( self, images: Iterable[numpy.ndarray], crop_size: int = 100, in_pixels: bool = True) -> list[numpy.ndarray]:
137    def crop(
138        self, images: Iterable[np.ndarray], crop_size: int = 100, in_pixels: bool = True
139    ) -> list[np.ndarray]:
140        """
141        Crop the event from the provided frame images. Use if you have already gotten
142        frame images; useful for cropping multiple events from the same frame image.
143        :param images: the frame images.
144        :param crop_size: the square size of the image crop to get for this event.
145        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
146        :return: image_size x image_size crops of the event in the provided frames. If
147        the event is too close to the edge, the crop will be smaller and not centered.
148        """
149        # Convert a crop size in micrometers to pixels
150        if not in_pixels:
151            crop_size = round(crop_size / self.tile.scan.pixel_size_um)
152        image_height, image_width = 0, 0
153        for image in images:
154            if image_height == 0 and image_width == 0:
155                image_height, image_width = image.shape
156            else:
157                if image_height != image.shape[0] or image_width != image.shape[1]:
158                    raise ValueError("All images must be the same size")
159        if image_height == 0 or image_width == 0:
160            raise ValueError("No images provided")
161
162        # Find the crop bounds
163        bounds = [
164            self.x - (crop_size // 2) + 1,
165            self.y - (crop_size // 2) + 1,
166            self.x + math.ceil(crop_size / 2) + 1,
167            self.y + math.ceil(crop_size / 2) + 1,
168        ]
169        # Determine how much the bounds violate the image size
170        displacements = [
171            max(0, -bounds[0]),
172            max(0, -bounds[1]),
173            max(0, bounds[2] - image_width),
174            max(0, bounds[3] - image_height),
175        ]
176        # Cap off the bounds
177        bounds = [
178            max(0, bounds[0]),
179            max(0, bounds[1]),
180            min(image_width, bounds[2]),
181            min(image_height, bounds[3]),
182        ]
183
184        # Crop the images
185        crops = []
186        for image in images:
187            # Create a blank image of the right size
188            crop = np.zeros((crop_size, crop_size), dtype=image.dtype)
189
190            # Insert the cropped image into the blank image, leaving a black buffer
191            # around the edges if the crop would go beyond the original image bounds
192            crop[
193                displacements[1] : crop_size - displacements[3],
194                displacements[0] : crop_size - displacements[2],
195            ] = image[bounds[1] : bounds[3], bounds[0] : bounds[2]]
196            crops.append(crop)
197        return crops

Crop the event from the provided frame images. Use if you have already gotten frame images; useful for cropping multiple events from the same frame image.

Parameters
  • images: the frame images.
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
Returns

image_size x image_size crops of the event in the provided frames. If the event is too close to the edge, the crop will be smaller and not centered.

def get_crops( self, crop_size: int = 100, in_pixels: bool = True, input_path: str = None, channels: Iterable[int | str] = None, apply_gain: Union[bool, Iterable[bool]] = True) -> list[numpy.ndarray]:
199    def get_crops(
200        self,
201        crop_size: int = 100,
202        in_pixels: bool = True,
203        input_path: str = None,
204        channels: Iterable[int | str] = None,
205        apply_gain: bool | Iterable[bool] = True,
206    ) -> list[np.ndarray]:
207        """
208        Gets the frame images for this event and then crops the event from the images.
209        Convenient for retrieving a single event's crops, but less efficient when
210        retrieving multiple events from the same tile as it will reread the images.
211        :param crop_size: the square size of the image crop to get for this event.
212        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
213        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
214        :param channels: the channels to extract images for. Defaults to all channels.
215        :param apply_gain: whether to apply scanner-calculated gain to the images, if
216        not already applied. If a list, matches the channels.
217        :return: a list of cropped images from the scan in the order of the channels.
218        """
219        # This function validates channels
220        frames = Frame.get_frames(self.tile, channels)
221        # Convert individual inputs to lists of appropriate length
222        if isinstance(apply_gain, bool):
223            apply_gain = [apply_gain] * len(frames)
224        images = [f.get_image(input_path, a) for f, a in zip(frames, apply_gain)]
225        return self.crop(images, crop_size, in_pixels)

Gets the frame images for this event and then crops the event from the images. Convenient for retrieving a single event's crops, but less efficient when retrieving multiple events from the same tile as it will reread the images.

Parameters
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
  • input_path: the path to the input images. Defaults to None (uses the scan's path).
  • channels: the channels to extract images for. Defaults to all channels.
  • apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
Returns

a list of cropped images from the scan in the order of the channels.

def save_crops( self, crops: Sequence[numpy.ndarray], output_path: str, labels: Sequence[str], ext: str = 'auto'):
227    def save_crops(
228        self,
229        crops: Sequence[np.ndarray],
230        output_path: str,
231        labels: Sequence[str],
232        ext: str = "auto",
233    ):
234        """
235        Save the crops to image files.
236        :param crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or
237        grayscale if 1 channel [h, w] or [h, w, 1].
238        :param labels: the labels to append to the file name, usually the channel names
239        associated with each crop.
240        :param output_path: the folder to save the crops to. Will make if needed.
241        :param ext: the file extension to save the crops as. Defaults to "auto", which
242        will save as .tif for grayscale images and .jpg for RGB images.
243        :return: None
244        """
245        if len(crops) != len(labels):
246            raise ValueError("Crops and labels must be the same length")
247
248        if csi_images is None or imageio is None:
249            raise ModuleNotFoundError(
250                "imageio libraries not installed! "
251                "run `pip install csi_images[imageio]` to resolve."
252            )
253
254        os.makedirs(output_path, exist_ok=True)
255
256        for crop, label in zip(crops, labels):
257            if ext == "auto":
258                if len(crop.shape) == 2 or crop.shape[2] == 1:
259                    file_extension = ".tif"
260                elif crop.shape[2] == 3:
261                    file_extension = ".jpg"
262                else:
263                    warnings.warn(
264                        f"Image shape {crop.shape} not recognized; saving as .tif"
265                    )
266                    file_extension = ".tif"
267            else:
268                file_extension = ext
269            file = os.path.join(output_path, f"{self}-{label}{file_extension}")
270            # TODO: add more file types here
271            if file_extension == ".tif":
272                imageio.imwrite(file, crop, compression="deflate")
273            elif file_extension in [".jpg", ".jpeg"]:
274                crop = csi_images.scale_bit_depth(crop, np.uint8)
275                imageio.imwrite(file, crop, quality=80)
276            else:
277                imageio.imwrite(file, crop)

Save the crops to image files.

Parameters
  • crops: the crops to save. Will save as RGB if 3 channel [h, w, 3] or grayscale if 1 channel [h, w] or [h, w, 1].
  • labels: the labels to append to the file name, usually the channel names associated with each crop.
  • output_path: the folder to save the crops to. Will make if needed.
  • ext: the file extension to save the crops as. Defaults to "auto", which will save as .tif for grayscale images and .jpg for RGB images.
Returns

None

def load_crops( self, input_path: str, labels: list[str] = None) -> dict[str, numpy.ndarray]:
279    def load_crops(
280        self, input_path: str, labels: list[str] = None
281    ) -> dict[str, np.ndarray]:
282        """
283        Loads previously saved crop files from a folder.
284        :param input_path: folder containing crop files.
285        :param labels: optional label filter, will only return crops with these labels.
286        :return: a tuple of lists containing the crops and their labels.
287        """
288        crops = {}
289        for file in glob.glob(os.path.join(input_path, f"{self}-*")):
290            label = os.path.splitext(os.path.basename(file))[0].split("-")[-1]
291            # Skip if we have labels to target
292            if labels is not None and label not in labels:
293                continue
294            crops[label] = imageio.imread(file)
295        return crops

Loads previously saved crop files from a folder.

Parameters
  • input_path: folder containing crop files.
  • labels: optional label filter, will only return crops with these labels.
Returns

a tuple of lists containing the crops and their labels.

def get_montage_channels( self, channels: Optional[Sequence[int | str]], composites: dict[int | str, tuple[float, float, float]] | None) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]:
297    def get_montage_channels(
298        self,
299        channels: Sequence[int | str] | None,
300        composites: dict[int | str, tuple[float, float, float]] | None,
301    ) -> tuple[list[int], list[int], dict[int, tuple[float, float, float]]]:
302        """
303        Get the channel names for the montage from the event's tile.
304        :param channels: channel indices or names for grayscale channels
305        :param composites: dictionary of channel indices or names and RGB values
306        :return: (1) channel indices to retrieve,
307                 (2) relative grayscale channel indices, and
308                 (3) composite channel indices and RGB values.
309        """
310        if channels is None:
311            channels = list(range(len(self.tile.scan.channels)))
312        if (len(channels) == 0) and (composites is None or len(composites) == 0):
313            raise ValueError("Must provide at least one channel type to montage")
314
315        channels_to_get = []
316
317        # Build the list of channels to retrieve
318        if channels is not None:
319            if isinstance(channels[0], str):
320                channels = self.tile.scan.get_channel_indices(channels)
321            channels_to_get += channels
322            order = list(range(len(channels)))  # Always the first n channels
323        else:
324            order = None
325
326        if composites is not None:
327            relative_composites = {}  # Relative indices for retrieved channels
328            # Convert to scan indices
329            rgb_channels = list(composites.keys())
330            if isinstance(rgb_channels[0], str):
331                rgb_channels = self.tile.scan.get_channel_indices(rgb_channels)
332            # Find the index or add to the end
333            for channel, rgb in zip(rgb_channels, composites.values()):
334                if channel not in channels_to_get:
335                    channels_to_get.append(channel)
336                    relative_composites[channel] = rgb
337                else:
338                    relative_composites[channels_to_get.index(channel)] = rgb
339        else:
340            relative_composites = None
341
342        return channels_to_get, order, relative_composites

Get the channel names for the montage from the event's tile.

Parameters
  • channels: channel indices or names for grayscale channels
  • composites: dictionary of channel indices or names and RGB values
Returns

(1) channel indices to retrieve, (2) relative grayscale channel indices, and (3) composite channel indices and RGB values.

def get_montage( self, channels: Sequence[int | str] = None, composites: dict[int | str, tuple[float, float, float]] = None, mask: numpy.ndarray[numpy.uint8] = None, labels: Sequence[str] = None, crop_size: int = 100, in_pixels: bool = True, input_path: str = None, apply_gain: bool = True, **kwargs) -> numpy.ndarray:
344    def get_montage(
345        self,
346        channels: Sequence[int | str] = None,
347        composites: dict[int | str, tuple[float, float, float]] = None,
348        mask: np.ndarray[np.uint8] = None,
349        labels: Sequence[str] = None,
350        crop_size: int = 100,
351        in_pixels: bool = True,
352        input_path: str = None,
353        apply_gain: bool = True,
354        **kwargs,
355    ) -> np.ndarray:
356        """
357        Convenience function for getting frame images and creating a montage. Mirrors
358        csi_images.make_montage(). Convenient for a single event's montage, but less
359        efficient when for multiple events from the same tile.
360        :param channels: the channels to use for black-and-white montages.
361        :param composites: dictionary of indices and RGB tuples for a composite.
362        :param mask: a mask to apply to the montage. Must be the same size as the crop.
363        :param crop_size: the square size of the image crop to get for this event.
364        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
365        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
366        :param apply_gain: whether to apply scanner-calculated gain to the images, if
367        not already applied. If a list, matches the channels.
368        :param kwargs: montage options. See csi_images.make_montage() for more details.
369        :return: numpy array representing the montage.
370        """
371        channels, order, composites = self.get_montage_channels(channels, composites)
372        images = self.get_crops(crop_size, in_pixels, input_path, channels, apply_gain)
373        return csi_images.make_montage(
374            images, order, composites, mask, labels, **kwargs
375        )

Convenience function for getting frame images and creating a montage. Mirrors csi_images.make_montage(). Convenient for a single event's montage, but less efficient when for multiple events from the same tile.

Parameters
  • channels: the channels to use for black-and-white montages.
  • composites: dictionary of indices and RGB tuples for a composite.
  • mask: a mask to apply to the montage. Must be the same size as the crop.
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
  • input_path: the path to the input images. Defaults to None (uses the scan's path).
  • apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
  • kwargs: montage options. See csi_images.make_montage() for more details.
Returns

numpy array representing the montage.

def save_montage( self, montage: numpy.ndarray, output_path: str, ocular_names: bool = False, tag: str = '', file_extension: str = '.jpeg', **kwargs):
377    def save_montage(
378        self,
379        montage: np.ndarray,
380        output_path: str,
381        ocular_names: bool = False,
382        tag: str = "",
383        file_extension: str = ".jpeg",
384        **kwargs,
385    ):
386        """
387        Save the montage as a JPEG image with a set name.
388        :param montage: the montage to save.
389        :param output_path: the folder to save the montage in. Will make if needed.
390        :param ocular_names: whether to use the OCULAR naming convention.
391        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
392        :param file_extension: the file extension to save the montage as. Defaults to .jpeg.
393        :param kwargs: additional arguments to pass to imageio.imwrite().
394        :return: None
395        """
396        if csi_images is None or imageio is None:
397            raise ModuleNotFoundError(
398                "imageio libraries not installed! "
399                "run `pip install csi_images[imageio]` to resolve."
400            )
401
402        montage = csi_images.scale_bit_depth(montage, np.uint8)
403
404        if not file_extension.startswith("."):
405            file_extension = f".{file_extension}"
406
407        if ocular_names:
408            if "cell_id" not in self.metadata.index:
409                raise ValueError(
410                    "Event metadata must include 'cell_id' for OCULAR naming."
411                )
412            file = f"{self.tile.n}-{self.metadata['cell_id']}-{self.x}-{self.y}{file_extension}"
413        else:
414            file = f"{self}{tag}{file_extension}"
415
416        os.makedirs(output_path, exist_ok=True)
417        imageio.imwrite(os.path.join(output_path, file), montage, **kwargs)

Save the montage as a JPEG image with a set name.

Parameters
  • montage: the montage to save.
  • output_path: the folder to save the montage in. Will make if needed.
  • ocular_names: whether to use the OCULAR naming convention.
  • tag: a tag to append to the file name. Ignored if ocular_names is True.
  • file_extension: the file extension to save the montage as. Defaults to .jpeg.
  • kwargs: additional arguments to pass to imageio.imwrite().
Returns

None

def load_montage(self, input_path: str, tag: str = '') -> numpy.ndarray:
419    def load_montage(self, input_path: str, tag: str = "") -> np.ndarray:
420        """
421        Loads the montage from a file saved by Event.save_montage.
422        :param input_path: the path to the folder where the montage was saved.
423        :param tag: a string to add to the file name, before the extension.
424        :return:
425        """
426        file = f"{self}{tag}.jpeg"
427        return imageio.imread(os.path.join(input_path, file))

Loads the montage from a file saved by Event.save_montage.

Parameters
  • input_path: the path to the folder where the montage was saved.
  • tag: a string to add to the file name, before the extension.
Returns
@classmethod
def get_many_crops( cls, events: Sequence[Self], crop_size: Union[int, Sequence[int]] = 100, in_pixels: bool = True, input_path: Union[str, Sequence[str]] = None, channels: Sequence[int | str] = None, apply_gain: Union[bool, Sequence[bool]] = True) -> list[list[numpy.ndarray]]:
429    @classmethod
430    def get_many_crops(
431        cls,
432        events: Sequence[Self],
433        crop_size: int | Sequence[int] = 100,
434        in_pixels: bool = True,
435        input_path: str | Sequence[str] = None,
436        channels: Sequence[int | str] = None,
437        apply_gain: bool | Sequence[bool] = True,
438    ) -> list[list[np.ndarray]]:
439        """
440        Get the crops for a list of events, ensuring that there is no wasteful reading
441        of the same tile multiple times. This function is more efficient than calling
442        get_crops() for each event.
443        :param events: the events to get crops for.
444        :param crop_size: the square size of the image crop to get for this event.
445                          Defaults to four times the size of the event.
446        :param in_pixels: whether the crop size is in pixels or micrometers.
447                          Defaults to pixels, and is ignored if crop_size is None.
448        :param input_path: the path to the input images. Will only work for lists of events
449                           from the same scan. Defaults to None (uses the scan's path).
450        :param channels: the channels to extract images for. Defaults to all channels.
451        :param apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True.
452                           Can be supplied as a list to apply gain to individual channels.
453        :return: a list of lists of cropped images for each event.
454        """
455        if len(events) == 0:
456            return []
457        # Adapt singular inputs to lists of appropriate length
458        if isinstance(crop_size, int):
459            crop_size = [crop_size] * len(events)
460        if input_path is None or isinstance(input_path, str):
461            input_path = [input_path] * len(events)
462
463        # Get the order of the events when sorted by slide/tile
464        order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
465
466        # Allocate the list to size
467        crops = [[]] * len(events)
468        last_tile = None
469        images = None  # Holds large numpy arrays, so expensive to compare
470        # Iterate through in slide/tile sorted order
471        for i in order:
472            if last_tile != events[i].tile:
473                # Gather the frame images, preserving them for the next event
474                frames = Frame.get_frames(events[i].tile, channels)
475                if isinstance(apply_gain, bool):
476                    apply = [apply_gain] * len(frames)
477                else:
478                    apply = apply_gain
479                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
480                last_tile = events[i].tile
481            # Use the frame images to crop the event images
482            crops[i] = events[i].crop(images, crop_size[i], in_pixels)
483        return crops

Get the crops for a list of events, ensuring that there is no wasteful reading of the same tile multiple times. This function is more efficient than calling get_crops() for each event.

Parameters
  • events: the events to get crops for.
  • crop_size: the square size of the image crop to get for this event. Defaults to four times the size of the event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels, and is ignored if crop_size is None.
  • input_path: the path to the input images. Will only work for lists of events from the same scan. Defaults to None (uses the scan's path).
  • channels: the channels to extract images for. Defaults to all channels.
  • apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. Defaults to True. Can be supplied as a list to apply gain to individual channels.
Returns

a list of lists of cropped images for each event.

@classmethod
def get_many_montages( cls, events: Sequence[Self], channels: Sequence[int | str] = None, composites: dict[int | str, tuple[float, float, float]] = None, masks: Sequence[numpy.ndarray[numpy.uint8]] = None, labels: Sequence[str] = None, crop_size: int = 100, in_pixels: bool = True, input_path: str = None, apply_gain: Union[bool, Iterable[bool]] = True, **kwargs) -> list[numpy.ndarray]:
485    @classmethod
486    def get_many_montages(
487        cls,
488        events: Sequence[Self],
489        channels: Sequence[int | str] = None,
490        composites: dict[int | str, tuple[float, float, float]] = None,
491        masks: Sequence[np.ndarray[np.uint8]] = None,
492        labels: Sequence[str] = None,
493        crop_size: int = 100,
494        in_pixels: bool = True,
495        input_path: str = None,
496        apply_gain: bool | Iterable[bool] = True,
497        **kwargs,
498    ) -> list[np.ndarray]:
499        """
500        Convenience function for get_montage(), but for a list of events. More efficient
501        than get_montage() when working with multiple events from the same tile.
502        :param events: a list of Event objects.
503        :param channels: the channels to extract images for. Defaults to all channels.
504        :param composites: dictionary of indices and RGB tuples for a composite.
505        :param masks: a list of masks to apply to the montages. Must be the same size as the crops.
506        :param labels: the labels to subtitle montage images, usually the channel names
507        :param crop_size: the square size of the image crop to get for this event.
508        :param in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
509        :param input_path: the path to the input images. Defaults to None (uses the scan's path).
510        :param apply_gain: whether to apply scanner-calculated gain to the images, if
511        not already applied. If a list, matches the channels.
512        :param kwargs: montage options. See csi_images.make_montage() for more details.
513        :return: a list of numpy arrays representing the montages.
514        """
515        if len(events) == 0:
516            return []
517        # Adapt singular inputs to lists of appropriate length
518        if isinstance(crop_size, int):
519            crop_size = [crop_size] * len(events)
520        if input_path is None or isinstance(input_path, str):
521            input_path = [input_path] * len(events)
522        if masks is None or isinstance(masks, np.ndarray):
523            masks = [masks] * len(events)
524
525        # Get the order of the events when sorted by slide/tile
526        event_order, _ = zip(*sorted(enumerate(events), key=lambda x: x[1].__repr__()))
527
528        # Allocate the list to size
529        montages = [np.empty(0)] * len(events)
530        # Placeholder variables to avoid rereading the same tile
531        images = None  # Holds large numpy arrays, so expensive to compare
532        order = None
533        rel_composites = None
534        last_tile = None
535        # Iterate through in slide/tile sorted order
536        for i in event_order:
537            if last_tile != events[i].tile:
538                channels_to_get, order, rel_composites = events[i].get_montage_channels(
539                    channels, composites
540                )
541                # Gather the frame images, preserving them for the next event
542                frames = Frame.get_frames(events[i].tile, channels_to_get)
543                if isinstance(apply_gain, bool):
544                    apply = [apply_gain] * len(frames)
545                else:
546                    apply = apply_gain
547                images = [f.get_image(input_path[i], a) for f, a in zip(frames, apply)]
548                last_tile = events[i].tile
549            # Use the frame images to crop the event images and make montages
550            crops = events[i].crop(images, crop_size[i], in_pixels)
551            montages[i] = csi_images.make_montage(
552                crops, order, rel_composites, masks[i], labels, **kwargs
553            )
554
555        return montages

Convenience function for get_montage(), but for a list of events. More efficient than get_montage() when working with multiple events from the same tile.

Parameters
  • events: a list of Event objects.
  • channels: the channels to extract images for. Defaults to all channels.
  • composites: dictionary of indices and RGB tuples for a composite.
  • masks: a list of masks to apply to the montages. Must be the same size as the crops.
  • labels: the labels to subtitle montage images, usually the channel names
  • crop_size: the square size of the image crop to get for this event.
  • in_pixels: whether the crop size is in pixels or micrometers. Defaults to pixels.
  • input_path: the path to the input images. Defaults to None (uses the scan's path).
  • apply_gain: whether to apply scanner-calculated gain to the images, if not already applied. If a list, matches the channels.
  • kwargs: montage options. See csi_images.make_montage() for more details.
Returns

a list of numpy arrays representing the montages.

@classmethod
def get_and_save_many_crops( cls, events: list[typing.Self], output_path: str, labels: Sequence[str], ext: str = 'auto', additional_gain: Sequence[float] = None, **kwargs) -> None:
557    @classmethod
558    def get_and_save_many_crops(
559        cls,
560        events: list[Self],
561        output_path: str,
562        labels: Sequence[str],
563        ext: str = "auto",
564        additional_gain: Sequence[float] = None,
565        **kwargs,
566    ) -> None:
567        """
568        Get and save the crops for a list of events, ensuring that there is no wasteful
569        reading and limiting the image data in memory to 1 tile at a time. This function
570        is more efficient that chaining get_crops() and save_crops() for each event or
571        get_many_crops() and then save_crops().
572        :param events: list of events to get, crop, and save.
573        :param output_path: the folder to save the crops in. Will make if needed.
574        :param labels: the labels to save the crops with. See save_crops().
575        :param ext: the file extension to save the crops as. See save_crops().
576        :param additional_gain: additional gain to apply to the crops. If not None, must
577        match the length of the number of crop channels.
578        :param kwargs: see get_many_crops() for more parameters.
579        :return:
580        """
581        unique_tiles = set([event.tile for event in events])
582
583        for tile in unique_tiles:
584            # Get one tile's worth of event crops
585            tile_events = [e for e in events if e.tile == tile]
586            crops_list = cls.get_many_crops(tile_events, **kwargs)
587            for event, crops in zip(tile_events, crops_list):
588                # Apply any additional gains
589                if additional_gain is not None:
590                    crops = [gain * crop for gain, crop in zip(additional_gain, crops)]
591                event.save_crops(crops, output_path, labels, ext)

Get and save the crops for a list of events, ensuring that there is no wasteful reading and limiting the image data in memory to 1 tile at a time. This function is more efficient that chaining get_crops() and save_crops() for each event or get_many_crops() and then save_crops().

Parameters
  • events: list of events to get, crop, and save.
  • output_path: the folder to save the crops in. Will make if needed.
  • labels: the labels to save the crops with. See save_crops().
  • ext: the file extension to save the crops as. See save_crops().
  • additional_gain: additional gain to apply to the crops. If not None, must match the length of the number of crop channels.
  • kwargs: see get_many_crops() for more parameters.
Returns
@classmethod
def get_and_save_many_montages( cls, events: list[typing.Self], output_path: str, ocular_names: bool = False, tag: str = '', **kwargs) -> None:
593    @classmethod
594    def get_and_save_many_montages(
595        cls,
596        events: list[Self],
597        output_path: str,
598        ocular_names: bool = False,
599        tag: str = "",
600        **kwargs,
601    ) -> None:
602        """
603        Save montages of the events to image files.
604        :param events: the events to get, montage, and save.
605        :param output_path: the folder to save the montages to. Will make if needed.
606        :param ocular_names: whether to use the OCULAR naming convention.
607        :param tag: a tag to append to the file name. Ignored if ocular_names is True.
608        :param kwargs: see get_many_montages() for more parameters.
609        """
610        unique_tiles = set([event.tile for event in events])
611
612        for tile in unique_tiles:
613            # Get one tile's worth of event crops
614            tile_events = [e for e in events if e.tile == tile]
615            montages = cls.get_many_montages(tile_events, **kwargs)
616            for event, montage in zip(tile_events, montages):
617                event.save_montage(montage, output_path, ocular_names, tag)

Save montages of the events to image files.

Parameters
  • events: the events to get, montage, and save.
  • output_path: the folder to save the montages to. Will make if needed.
  • ocular_names: whether to use the OCULAR naming convention.
  • tag: a tag to append to the file name. Ignored if ocular_names is True.
  • kwargs: see get_many_montages() for more parameters.
class EventArray:
 620class EventArray:
 621    """
 622    A class that holds a large number of events' data, making it easy to analyze and
 623    manipulate many events at once. A more separated version of the Event class.
 624    """
 625
 626    INFO_COLUMNS = ["slide_id", "tile", "roi", "x", "y"]
 627
 628    def __init__(
 629        self,
 630        info: pd.DataFrame = None,
 631        metadata: pd.DataFrame = None,
 632        features: pd.DataFrame = None,
 633    ):
 634        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y"
 635        if info is not None:
 636            # Special case: "roi" is often not required, so we'll fill in if its missing
 637            if "roi" not in info.columns:
 638                info["roi"] = 0
 639            if set(info.columns) != set(self.INFO_COLUMNS):
 640                raise ValueError(
 641                    f"EventArray.info must have columns:"
 642                    f"{self.INFO_COLUMNS}; had {list(info.columns)}"
 643                )
 644            # Copy first to avoid modifying the original
 645            info = info.copy()
 646            # Ensure that the columns are the right types
 647            info["slide_id"] = info["slide_id"].astype(str)
 648            info["tile"] = info["tile"].astype(np.uint16)
 649            info["roi"] = info["roi"].astype(np.uint8)
 650            info["x"] = info["x"].round().astype(np.uint16)
 651            info["y"] = info["y"].round().astype(np.uint16)
 652            # Ensure that the columns are in the right order
 653            info = info[self.INFO_COLUMNS]
 654        # All DataFrames must all have the same number of rows
 655        if metadata is not None and (info is None or len(info) != len(metadata)):
 656            raise ValueError(
 657                "If EventArray.metadata is not None, it should match rows with .info"
 658            )
 659        if features is not None and (info is None or len(info) != len(features)):
 660            raise ValueError(
 661                "If EventArray.features is not None, it should match rows with .info"
 662            )
 663        # No columns named "metadata_", "features_", or "None"
 664        column_names = []
 665        if metadata is not None:
 666            column_names += metadata.columns.tolist()
 667        if features is not None:
 668            column_names += features.columns.tolist()
 669        if any([col.lower().startswith("metadata_") for col in column_names]):
 670            raise ValueError("EventArray column names cannot start with 'metadata_'")
 671        if any([col.lower().startswith("features_") for col in column_names]):
 672            raise ValueError("EventArray column names cannot start with 'features_'")
 673        if any([col.lower() == "none" for col in column_names]):
 674            raise ValueError("EventArray column names cannot be 'none'")
 675
 676        self.info = info
 677        self.metadata = metadata
 678        self.features = features
 679
 680    def __len__(self) -> int:
 681        # Convenience method to get the number of events
 682        if self.info is None:
 683            return 0
 684        else:
 685            return len(self.info)
 686
 687    def __eq__(self, other):
 688        # Parse all possibilities for info
 689        if isinstance(self.info, pd.DataFrame):
 690            if isinstance(other.info, pd.DataFrame):
 691                if not self.info.equals(other.info):
 692                    return False
 693            else:
 694                return False
 695        elif self.info is None:
 696            if other.info is not None:
 697                return False
 698
 699        # Parse all possibilities for metadata
 700        if isinstance(self.metadata, pd.DataFrame):
 701            if isinstance(other.metadata, pd.DataFrame):
 702                is_equal = self.metadata.equals(other.metadata)
 703                if not is_equal:
 704                    return False
 705            else:
 706                return False
 707        elif self.metadata is None:
 708            if other.metadata is not None:
 709                return False
 710
 711        # Parse all possibilities for features
 712        if isinstance(self.features, pd.DataFrame):
 713            if isinstance(other.features, pd.DataFrame):
 714                is_equal = self.features.equals(other.features)
 715                if not is_equal:
 716                    return False
 717            else:
 718                return False
 719        elif self.features is None:
 720            if other.features is not None:
 721                return False
 722
 723        return is_equal
 724
 725    def get_sort_order(
 726        self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True
 727    ):
 728        """
 729        Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames.
 730        :param by: name of the column(s) to sort by.
 731        :param ascending: whether to sort in ascending order; can be a list to match by
 732        :return: the order of the indices to sort by.
 733        """
 734        columns = self.get(by)
 735        return columns.sort_values(by=by, ascending=ascending).index
 736
 737    def sort(
 738        self,
 739        by: Hashable | Sequence[Hashable],
 740        ascending: bool | Sequence[bool] = True,
 741    ) -> Self:
 742        """
 743        Sort the EventArray by column(s) in the info, metadata, or features DataFrames.
 744        :param by: name of the column(s) to sort by.
 745        :param ascending: whether to sort in ascending order; can be a list to match by
 746        :return: a new, sorted EventArray.
 747        """
 748        order = self.get_sort_order(by, ascending)
 749        info = self.info.loc[order].reset_index(drop=True)
 750        if self.metadata is not None:
 751            metadata = self.metadata.loc[order].reset_index(drop=True)
 752        else:
 753            metadata = None
 754        if self.features is not None:
 755            features = self.features.loc[order].reset_index(drop=True)
 756        else:
 757            features = None
 758        return EventArray(info, metadata, features)
 759
 760    def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame:
 761        """
 762        Get a DataFrame with the specified columns from the EventArray, by value.
 763        :param column_names: the names of the columns to get.
 764        :return: a DataFrame with the specified columns.
 765        """
 766        if isinstance(column_names, Hashable):
 767            column_names = [column_names]  # Drop into a list for the loop
 768        columns = []
 769        for column_name in column_names:
 770            if column_name in self.info.columns:
 771                columns.append(self.info[column_name])
 772            elif self.metadata is not None and column_name in self.metadata.columns:
 773                columns.append(self.metadata[column_name])
 774            elif self.features is not None and column_name in self.features.columns:
 775                columns.append(self.features[column_name])
 776            else:
 777                raise ValueError(f"Column {column_name} not found in EventArray")
 778        return pd.concat(columns, axis=1)
 779
 780    def rows(self, rows: Sequence[Hashable]) -> Self:
 781        """
 782        Get a subset of the EventArray rows based on a boolean or integer index, by value.
 783        :param rows: row labels, indices, or boolean mask; anything for .loc[]
 784        :return: a new EventArray with the subset of events.
 785        """
 786        info = self.info.loc[rows].reset_index(drop=True)
 787        if self.metadata is not None:
 788            metadata = self.metadata.loc[rows].reset_index(drop=True)
 789        else:
 790            metadata = None
 791        if self.features is not None:
 792            features = self.features.loc[rows].reset_index(drop=True)
 793        else:
 794            features = None
 795        return EventArray(info, metadata, features)
 796
 797    def copy(self) -> Self:
 798        """
 799        Create a deep copy of the EventArray.
 800        :return: a deep copy of the EventArray.
 801        """
 802        return EventArray(
 803            info=self.info.copy(),
 804            metadata=None if self.metadata is None else self.metadata.copy(),
 805            features=None if self.features is None else self.features.copy(),
 806        )
 807
 808    # TODO: add a "filter" convenience function that takes a column name and values to filter by
 809
 810    def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None:
 811        """
 812        Add metadata to the EventArray. Removes the need to check if metadata is None.
 813        Overwrites any existing metadata with the same column names as the new metadata.
 814        :param new_metadata: the metadata to add.
 815        """
 816        if len(self) != len(new_metadata):
 817            raise ValueError("New metadata must match length of existing info")
 818
 819        if self.metadata is None:
 820            self.metadata = new_metadata
 821        else:
 822            if isinstance(new_metadata, pd.Series):
 823                self.metadata[new_metadata.name] = new_metadata
 824            else:
 825                # It's a DataFrame
 826                self.metadata[new_metadata.columns] = new_metadata
 827
 828    def add_features(self, new_features: pd.Series | pd.DataFrame) -> None:
 829        """
 830        Add features to the EventArray. Removes the need to check if features is None.
 831        Overwrites any existing features with the same column names as the new features.
 832        :param new_features: the features to add.
 833        """
 834        if len(self) != len(new_features):
 835            raise ValueError("New features must match length of existing info")
 836
 837        if self.features is None:
 838            self.features = new_features
 839        else:
 840            if isinstance(new_features, pd.Series):
 841                self.features[new_features.name] = new_features
 842            else:
 843                # It's a DataFrame
 844                self.features[new_features.columns] = new_features
 845
 846    @classmethod
 847    def merge(cls, events: Iterable[Self]) -> Self:
 848        """
 849        Combine EventArrays in a list into a single EventArray.
 850        :param events: the new list of events.
 851        """
 852        all_info = []
 853        all_metadata = []
 854        all_features = []
 855        for event_array in events:
 856            # Skip empty EventArrays
 857            if event_array.info is not None:
 858                all_info.append(event_array.info)
 859            if event_array.metadata is not None:
 860                all_metadata.append(event_array.metadata)
 861            if event_array.features is not None:
 862                all_features.append(event_array.features)
 863        if len(all_info) == 0:
 864            return EventArray()
 865        else:
 866            all_info = pd.concat(all_info, ignore_index=True)
 867        if len(all_metadata) == 0:
 868            all_metadata = None
 869        else:
 870            all_metadata = pd.concat(all_metadata, ignore_index=True)
 871        if len(all_features) == 0:
 872            all_features = None
 873        else:
 874            all_features = pd.concat(all_features, ignore_index=True)
 875
 876        return EventArray(all_info, all_metadata, all_features)
 877
 878    def to_events(
 879        self,
 880        scans: Scan | Iterable[Scan],
 881        ignore_missing_scans=True,
 882        ignore_metadata=False,
 883        ignore_features=False,
 884    ) -> list[Event]:
 885        """
 886        Get the events in the EventArray as a list of events. Returns [] if empty.
 887        :param scans: the scans that the events belong to, auto-matched by slide_id.
 888        Pass None if you don't care about scan metadata (pass ignore_missing_scans).
 889        :param ignore_missing_scans: whether to create blank scans for events without scans.
 890        :param ignore_metadata: whether to ignore metadata or not
 891        :param ignore_features: whether to ignore features or not
 892        :return:
 893        """
 894        if len(self) == 0:
 895            return []
 896        if isinstance(scans, Scan):
 897            scans = [scans]
 898        scans = {scan.slide_id: scan for scan in scans}
 899        events = []
 900        for i in range(len(self.info)):
 901            # Determine the associated scan
 902            slide_id = self.info["slide_id"][i]
 903            if slide_id not in scans:
 904                if ignore_missing_scans:
 905                    # Create a placeholder scan if the scan is missing
 906                    scan = Scan.make_placeholder(
 907                        slide_id,
 908                        self.info["tile"][i],
 909                        self.info["roi"][i],
 910                    )
 911                else:
 912                    raise ValueError(
 913                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
 914                    )
 915            else:
 916                scan = scans[slide_id]
 917
 918            # Prepare the metadata and features
 919            if ignore_metadata or self.metadata is None:
 920                metadata = None
 921            else:
 922                # This Series creation method is less efficient,
 923                # but required for preserving dtypes
 924                metadata = pd.Series(
 925                    {col: self.metadata.loc[i, col] for col in self.metadata.columns},
 926                    dtype=object,
 927                )
 928            if ignore_features or self.features is None:
 929                features = None
 930            else:
 931                features = pd.Series(
 932                    {col: self.features.loc[i, col] for col in self.features.columns},
 933                    dtype=object,
 934                )
 935            # Create the event and append it to the list
 936            events.append(
 937                Event(
 938                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
 939                    self.info["x"][i],
 940                    self.info["y"][i],
 941                    metadata=metadata,
 942                    features=features,
 943                )
 944            )
 945        return events
 946
 947    @classmethod
 948    def from_events(cls, events: Iterable[Event]) -> Self:
 949        """
 950        Set the events in the EventArray to a new list of events.
 951        :param events: the new list of events.
 952        """
 953        info = pd.DataFrame(
 954            {
 955                "slide_id": [event.tile.scan.slide_id for event in events],
 956                "tile": [event.tile.n for event in events],
 957                "roi": [event.tile.n_roi for event in events],
 958                "x": [event.x for event in events],
 959                "y": [event.y for event in events],
 960            }
 961        )
 962        metadata_list = [event.metadata for event in events]
 963        # Iterate through and ensure that all metadata is the same shape
 964        for metadata in metadata_list:
 965            if type(metadata) != type(metadata_list[0]):
 966                raise ValueError("All metadata must be the same type.")
 967            if metadata is not None and metadata.shape != metadata_list[0].shape:
 968                raise ValueError("All metadata must be the same shape.")
 969        if metadata_list[0] is None:
 970            metadata = None
 971        else:
 972            metadata = pd.DataFrame(metadata_list)
 973        features_list = [event.features for event in events]
 974        # Iterate through and ensure that all features are the same shape
 975        for features in features_list:
 976            if type(features) != type(features_list[0]):
 977                raise ValueError("All features must be the same type.")
 978            if features is not None and features.shape != features_list[0].shape:
 979                raise ValueError("All features must be the same shape.")
 980        if features_list[0] is None:
 981            features = None
 982        else:
 983            features = pd.DataFrame(features_list)
 984        return EventArray(info=info, metadata=metadata, features=features)
 985
 986    def to_dataframe(self) -> pd.DataFrame:
 987        """
 988        Convert all the data in the EventArray to a single DataFrame.
 989        :return: a DataFrame with all the data in the EventArray.
 990        """
 991        # Make a copy of the info DataFrame and prepend "info_" to the column names
 992        output = self.info.copy()
 993        # Combine with the metadata and prepend "metadata_" to the column names
 994        if self.metadata is not None:
 995            metadata = self.metadata.copy()
 996            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
 997            output = pd.concat([output, metadata], axis=1)
 998        # Combine with the features and prepend "features_" to the column names
 999        if self.features is not None:
1000            features = self.features.copy()
1001            features.columns = [f"features_{col}" for col in features.columns]
1002            output = pd.concat([output, features], axis=1)
1003        return output
1004
1005    @classmethod
1006    def from_dataframe(
1007        cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_"
1008    ) -> Self:
1009        """
1010        From a single, special DataFrame, create an EventArray.
1011        :param df: the DataFrame to convert to an EventArray.
1012        :param metadata_prefix: the prefix for metadata columns.
1013        :param features_prefix: the prefix for features columns.
1014        :return: a DataFrame with all the data in the EventArray.
1015        """
1016        # Split the columns into info, metadata, and features and strip prefix
1017        info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy()
1018        if info.size == 0:
1019            info = None
1020        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
1021        metadata.columns = [
1022            col.replace(metadata_prefix, "") for col in metadata.columns
1023        ]
1024        if metadata.size == 0:
1025            metadata = None
1026        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
1027        features.columns = [
1028            col.replace(features_prefix, "") for col in features.columns
1029        ]
1030        if features.size == 0:
1031            features = None
1032        return cls(info=info, metadata=metadata, features=features)
1033
1034    @classmethod
1035    def from_mask(
1036        cls,
1037        mask: np.ndarray,
1038        tile: Tile,
1039        include_cell_id: bool = True,
1040        images: list[np.ndarray] = None,
1041        image_labels: list[str] = None,
1042        properties: list[str] = None,
1043    ) -> Self:
1044        """
1045        Extract events from a mask DataFrame, including metadata and features.
1046        :param mask: the mask to extract events from.
1047        :param tile: the Tile object associated with this mask.
1048        :param include_cell_id: whether to include the cell_id, or numerical
1049        mask label, as metadata in the EventArray.
1050        :param images: the intensity images to extract features from.
1051        :param image_labels: the labels for the intensity images.
1052        :param properties: list of properties to extract in addition to the defaults:
1053        :return: EventArray corresponding to the mask labels.
1054        """
1055        if csi_images is None:
1056            raise ModuleNotFoundError(
1057                "imageio libraries not installed! "
1058                "run `pip install csi_images[imageio]` to resolve."
1059            )
1060        # Gather mask_info
1061        if images is not None and image_labels is not None:
1062            if len(images) != len(image_labels):
1063                raise ValueError("Intensity images and labels must match lengths.")
1064
1065        mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties)
1066
1067        if len(mask_info) == 0:
1068            return EventArray()
1069
1070        # Combine provided info and mask info
1071        info = pd.DataFrame(
1072            {
1073                "slide_id": tile.scan.slide_id,
1074                "tile": tile.n,
1075                "roi": tile.n_roi,
1076                "x": mask_info["x"],
1077                "y": mask_info["y"],
1078            },
1079        )
1080        # Extract a metadata column if desired
1081        if include_cell_id:
1082            metadata = pd.DataFrame({"cell_id": mask_info["id"]})
1083        else:
1084            metadata = None
1085        # If any additional properties were extracted, add them as features
1086        mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore")
1087        if len(mask_info.columns) > 0:
1088            features = mask_info
1089            features.columns = [col.lower() for col in features.columns]
1090        else:
1091            features = None
1092        return EventArray(info, metadata, features)
1093
1094    def save_csv(self, output_path: str) -> bool:
1095        """
1096        Save the events to an CSV file, including metadata and features.
1097        :param output_path:
1098        :return:
1099        """
1100        if not output_path.endswith(".csv"):
1101            output_path += ".csv"
1102        self.to_dataframe().to_csv(output_path, index=False)
1103        return os.path.exists(output_path)
1104
1105    @classmethod
1106    def load_csv(
1107        cls,
1108        input_path: str,
1109        metadata_prefix: str = "metadata_",
1110        features_prefix: str = "features_",
1111    ) -> Self:
1112        """
1113        Load the events from an CSV file, including metadata and features.
1114        :param input_path:
1115        :param metadata_prefix:
1116        :param features_prefix:
1117        :return:
1118        """
1119        # Load the CSV file
1120        df = pd.read_csv(input_path)
1121        return cls.from_dataframe(df, metadata_prefix, features_prefix)
1122
1123    def save_hdf5(self, output_path: str) -> bool:
1124        """
1125        Save the events to an HDF5 file, including metadata and features.
1126        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
1127        though these files are slightly harder to view in HDFView or similar.
1128        :param output_path:
1129        :return:
1130        """
1131        if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"):
1132            output_path += ".hdf5"
1133        # Open the output_path as an HDF5 file
1134        with pd.HDFStore(output_path) as store:
1135            # Store the dataframes in the HDF5 file
1136            if self.info is not None:
1137                store.put("info", self.info, index=False)
1138            if self.metadata is not None:
1139                store.put("metadata", self.metadata, index=False)
1140            if self.features is not None:
1141                store.put("features", self.features, index=False)
1142        return os.path.exists(output_path)
1143
1144    @classmethod
1145    def load_hdf5(cls, input_path: str) -> Self:
1146        """
1147        Load the events from an HDF5 file, including metadata and features.
1148        :param input_path:
1149        :return:
1150        """
1151        # Open the input_path as an HDF5 file
1152        with pd.HDFStore(input_path, "r") as store:
1153            # Load the dataframes from the HDF5 file
1154            info = store.get("info") if "info" in store else None
1155            metadata = store.get("metadata") if "metadata" in store else None
1156            features = store.get("features") if "features" in store else None
1157        return cls(info=info, metadata=metadata, features=features)
1158
1159    def save_ocular(self, output_path: str, event_type: str = "cells"):
1160        """
1161        Save the events to an OCULAR file. Relies on the dataframe originating
1162        from an OCULAR file (same columns; duplicate metadata/info).
1163        :param output_path:
1164        :param event_type:
1165        :return:
1166        """
1167        if pyreadr is None:
1168            raise ModuleNotFoundError(
1169                "pyreadr not installed! Install pyreadr directly "
1170                "or run `pip install csi-images[rds]` option to resolve."
1171            )
1172        if event_type == "cells":
1173            file_stub = "rc-final"
1174        elif event_type == "others":
1175            file_stub = "others-final"
1176        else:
1177            raise ValueError("Invalid event type. Must be cells or others.")
1178
1179        # Ensure good metadata
1180        metadata = pd.DataFrame(
1181            {
1182                "slide_id": self.info["slide_id"],
1183                "frame_id": self.info["tile"] + 1,  # Convert to 1-indexed for R
1184                "cell_id": (
1185                    self.metadata["cell_id"]
1186                    if "cell_id" in self.metadata.columns
1187                    else range(len(self.info))
1188                ),
1189                "cellx": self.info["x"],
1190                "celly": self.info["y"],
1191            }
1192        )
1193        if self.metadata is not None:
1194            metadata[self.metadata.columns] = self.metadata.copy()
1195
1196        # Check for the "ocular_interesting" column
1197        if event_type == "cells":
1198            if "ocular_interesting" in metadata.columns:
1199                interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool)
1200            elif "hcpc" in metadata.columns:
1201                # Interesting cells don't get an hcpc designation, leaving them as -1
1202                interesting_rows = (
1203                    metadata["hcpc"].to_numpy() == -1
1204                )  # interesting cells
1205            else:
1206                interesting_rows = []
1207            if sum(interesting_rows) > 0:
1208                # Split the metadata into interesting and regular
1209                interesting_events = self.rows(interesting_rows)
1210                interesting_df = pd.concat(
1211                    [interesting_events.features, interesting_events.metadata], axis=1
1212                )
1213                data_events = self.rows(~interesting_rows)
1214                data_df = pd.concat(
1215                    [data_events.features, data_events.metadata], axis=1
1216                )
1217                data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore")
1218
1219                # Drop particular columns for "interesting"
1220                interesting_df = interesting_df.drop(
1221                    [
1222                        "clust",
1223                        "hcpc",
1224                        "frame_id",
1225                        "cell_id",
1226                        "unique_id",
1227                        "ocular_interesting",
1228                    ],
1229                    axis=1,
1230                    errors="ignore",
1231                )
1232                # Save both .csv and .rds
1233                interesting_stub = os.path.join(output_path, "ocular_interesting")
1234                interesting_df.to_csv(f"{interesting_stub}.csv")
1235                # Suppress pandas FutureWarning
1236                with warnings.catch_warnings():
1237                    warnings.simplefilter(action="ignore", category=FutureWarning)
1238                    pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df)
1239            else:
1240                data_df = pd.concat([self.features, metadata], axis=1)
1241        else:
1242            # Get all data and reset_index (will copy it)
1243            data_df = pd.concat([self.features, metadata], axis=1)
1244
1245        # Split based on cluster number to conform to *-final[1-4].rds
1246        n_clusters = max(data_df["clust"]) + 1
1247        split_idx = [round(i * n_clusters / 4) for i in range(5)]
1248        for i in range(4):
1249            subset = (split_idx[i] <= data_df["clust"]) & (
1250                data_df["clust"] < split_idx[i + 1]
1251            )
1252            data_df.loc[subset, "hcpc"] = i + 1
1253            subset = data_df[subset].reset_index(drop=True)
1254            # Suppress pandas FutureWarning
1255            with warnings.catch_warnings():
1256                warnings.simplefilter(action="ignore", category=FutureWarning)
1257                pyreadr.write_rds(
1258                    os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset
1259                )
1260
1261        # Create new example cell strings
1262        data_df["example_cell_id"] = (
1263            data_df["slide_id"]
1264            + " "
1265            + data_df["frame_id"].astype(str)
1266            + " "
1267            + data_df["cell_id"].astype(str)
1268            + " "
1269            + data_df["cellx"].astype(int).astype(str)
1270            + " "
1271            + data_df["celly"].astype(int).astype(str)
1272        )
1273        # Find averagable data columns
1274        if "cellcluster_id" in data_df.columns:
1275            end_idx = data_df.columns.get_loc("cellcluster_id")
1276        else:
1277            end_idx = data_df.columns.get_loc("slide_id")
1278        avg_cols = data_df.columns[:end_idx].tolist()
1279        # Group by cluster and average
1280        data_df = data_df.groupby("clust").agg(
1281            **{col: (col, "mean") for col in avg_cols},
1282            count=("clust", "size"),  # count rows in each cluster
1283            example_cells=("example_cell_id", lambda x: ",".join(x)),
1284            hcpc=("hcpc", lambda x: x.iloc[0]),
1285        )
1286        data_df = data_df.reset_index()  # Do NOT drop, index is "clust"
1287        # Create new columns
1288        metadata = pd.DataFrame(
1289            {
1290                "count": data_df["count"],
1291                "example_cells": data_df["example_cells"],
1292                "clust": data_df["clust"].astype(int),
1293                "hcpc": data_df["hcpc"].astype(int),
1294                "id": data_df["clust"].astype(int).astype(str),
1295                "cccluster": "0",  # Dummy value
1296                "ccdistance": 0.0,  # Dummy value
1297                "rownum": list(range(len(data_df))),
1298                "framegroup": 0,  # Dummy value
1299            }
1300        )
1301        # Need to pad the features to 761 columns, as per OCULAR report needs
1302        additional_columns = range(len(avg_cols), 761)
1303        if len(additional_columns) > 0:
1304            padding = pd.DataFrame(
1305                np.zeros((len(data_df), len(additional_columns))),
1306                columns=[f"pad{i}" for i in additional_columns],
1307            )
1308            data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1)
1309        else:
1310            data_df = pd.concat([data_df[avg_cols], metadata], axis=1)
1311
1312        # Save the cluster data
1313        data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv"))
1314        # Suppress pandas FutureWarning
1315        with warnings.catch_warnings():
1316            warnings.simplefilter(action="ignore", category=FutureWarning)
1317            pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df)
1318
1319    @classmethod
1320    def load_ocular(
1321        cls,
1322        input_path: str,
1323        event_type="cells",
1324        cell_data_files=(
1325            "rc-final1.rds",
1326            "rc-final2.rds",
1327            "rc-final3.rds",
1328            "rc-final4.rds",
1329            "ocular_interesting.rds",
1330        ),
1331        others_data_files=(
1332            "others-final1.rds",
1333            "others-final2.rds",
1334            "others-final3.rds",
1335            "others-final4.rds",
1336        ),
1337        atlas_data_files=(
1338            "ocular_interesting.rds",
1339            "ocular_not_interesting.rds",
1340        ),
1341        drop_common_events=True,
1342    ) -> Self:
1343        """
1344
1345        :param input_path:
1346        :param event_type:
1347        :param cell_data_files:
1348        :param others_data_files:
1349        :param atlas_data_files:
1350        :param drop_common_events:
1351        :return:
1352        """
1353        if pyreadr is None:
1354            raise ModuleNotFoundError(
1355                "pyreadr not installed! Install pyreadr directly "
1356                "or run `pip install csi-images[rds]` option to resolve."
1357            )
1358        # Check if the input path is a directory or a file
1359        if os.path.isfile(input_path):
1360            data_files = [os.path.basename(input_path)]
1361            input_path = os.path.dirname(input_path)
1362        if event_type == "cells":
1363            data_files = cell_data_files
1364        elif event_type == "others":
1365            data_files = others_data_files
1366        else:
1367            raise ValueError("Invalid event type.")
1368
1369        # Load the data from the OCULAR files
1370        file_data = {}
1371        for file in data_files:
1372            file_path = os.path.join(input_path, file)
1373            if not os.path.isfile(file_path):
1374                warnings.warn(f"{file} not found for in {input_path}")
1375                continue
1376            file_data[file] = pyreadr.read_r(file_path)
1377            # Get the DataFrame associated with None (pyreadr dict quirk)
1378            file_data[file] = file_data[file][None]
1379            if len(file_data[file]) == 0:
1380                # File gets dropped from the dict
1381                file_data.pop(file)
1382                warnings.warn(f"{file} has no cells")
1383                continue
1384
1385            # Drop common cells if requested and in this file
1386            if (
1387                file in atlas_data_files
1388                and drop_common_events
1389                and "catalogue_classification" in file_data[file]
1390            ):
1391                common_cell_indices = (
1392                    file_data[file]["catalogue_classification"] == "common_cell"
1393                )
1394                file_data[file] = file_data[file][common_cell_indices == False]
1395
1396            if len(file_data[file]) == 0:
1397                # File gets dropped from the dict
1398                file_data.pop(file)
1399                warnings.warn(f"{file} has no cells after dropping common cells")
1400                continue
1401
1402            # Extract frame_id and cell_id
1403            # DAPI- events already have frame_id cell_id outside rowname
1404            if event_type == "cells" and "frame_id" not in file_data[file].columns:
1405                file_data[file]["rowname"] = file_data[file]["rowname"].astype("str")
1406                # get frame_id cell_id from rownames column and split into two columns
1407                split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True)
1408                if len(split_res.columns) != 2:
1409                    warnings.warn(
1410                        f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}'
1411                    )
1412                # then assign it back to the dataframe
1413                file_data[file][["frame_id", "cell_id"]] = split_res.astype("int")
1414            # Ensure frame_id and cell_id are integers
1415            file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int")
1416            file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int")
1417            # reset indexes since they can cause NaN values in concat
1418            file_data[file] = file_data[file].reset_index(drop=True)
1419
1420        # Merge the data from all files
1421        if len(file_data) == 0:
1422            return EventArray()
1423        elif len(file_data) == 1:
1424            data = [file_data[file] for file in file_data.keys()][0]
1425        else:
1426            data = pd.concat(file_data.values())
1427
1428        # Others is missing the "slide_id". Insert it right before "frame_id" column
1429        if event_type == "others" and "slide_id" not in data.columns:
1430            if os.path.basename(input_path) == "ocular":
1431                slide_id = os.path.basename(os.path.dirname(input_path))
1432            else:
1433                slide_id = "UNKNOWN"
1434            data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id)
1435
1436        # Sort according to ascending cell_id to keep the original, which is in manual_df
1437        data = data.sort_values(by=["cell_id"], ascending=True)
1438        # Filter out duplicates by x & y
1439        data = data.assign(
1440            unique_id=data["slide_id"]
1441            + "_"
1442            + data["frame_id"].astype(str)
1443            + "_"
1444            + data["cellx"].astype(int).astype(str)
1445            + "_"
1446            + data["celly"].astype(int).astype(str)
1447        )
1448        data = data.drop_duplicates(subset=["unique_id"], keep="first")
1449        # Normal unique_id is with cell_id
1450        data = data.assign(
1451            unique_id=data["slide_id"]
1452            + "_"
1453            + data["frame_id"].astype(str)
1454            + "_"
1455            + data["cell_id"].astype(str)
1456        )
1457        data = data.reset_index(drop=True)
1458        # All columns up to "slide_id" are features; drop the "slide_id"
1459        features = data.loc[:, :"slide_id"].iloc[:, :-1]
1460        data = data.loc[:, "slide_id":]
1461        # Grab the info columns
1462        info = data[["slide_id", "frame_id", "cellx", "celly"]]
1463        info.columns = ["slide_id", "tile", "x", "y"]
1464        info = info.assign(roi=0)  # OCULAR only works on 1 ROI, as far as known
1465        info = info[["slide_id", "tile", "roi", "x", "y"]]
1466        # Metadata has duplicate columns for later convenience
1467        metadata = data
1468        # Certain columns tend to be problematic with mixed data formats...
1469        for col in ["TRITC", "CY5", "FITC"]:
1470            if col in metadata:
1471                labels = {
1472                    "False": False,
1473                    "True": True,
1474                    "FALSE": False,
1475                    "TRUE": True,
1476                    False: False,
1477                    True: True,
1478                }
1479                metadata[col] = metadata[col].map(labels).astype(bool)
1480        for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]:
1481            if col in metadata:
1482                metadata[col] = metadata[col].fillna(-1).astype(int)
1483        info["tile"] = info["tile"] - 1  # Convert to 0-based indexing
1484        return EventArray(info, metadata, features)

A class that holds a large number of events' data, making it easy to analyze and manipulate many events at once. A more separated version of the Event class.

EventArray( info: pandas.core.frame.DataFrame = None, metadata: pandas.core.frame.DataFrame = None, features: pandas.core.frame.DataFrame = None)
628    def __init__(
629        self,
630        info: pd.DataFrame = None,
631        metadata: pd.DataFrame = None,
632        features: pd.DataFrame = None,
633    ):
634        # Info must be a DataFrame with columns "slide_id", "tile", "roi", "x", "y"
635        if info is not None:
636            # Special case: "roi" is often not required, so we'll fill in if its missing
637            if "roi" not in info.columns:
638                info["roi"] = 0
639            if set(info.columns) != set(self.INFO_COLUMNS):
640                raise ValueError(
641                    f"EventArray.info must have columns:"
642                    f"{self.INFO_COLUMNS}; had {list(info.columns)}"
643                )
644            # Copy first to avoid modifying the original
645            info = info.copy()
646            # Ensure that the columns are the right types
647            info["slide_id"] = info["slide_id"].astype(str)
648            info["tile"] = info["tile"].astype(np.uint16)
649            info["roi"] = info["roi"].astype(np.uint8)
650            info["x"] = info["x"].round().astype(np.uint16)
651            info["y"] = info["y"].round().astype(np.uint16)
652            # Ensure that the columns are in the right order
653            info = info[self.INFO_COLUMNS]
654        # All DataFrames must all have the same number of rows
655        if metadata is not None and (info is None or len(info) != len(metadata)):
656            raise ValueError(
657                "If EventArray.metadata is not None, it should match rows with .info"
658            )
659        if features is not None and (info is None or len(info) != len(features)):
660            raise ValueError(
661                "If EventArray.features is not None, it should match rows with .info"
662            )
663        # No columns named "metadata_", "features_", or "None"
664        column_names = []
665        if metadata is not None:
666            column_names += metadata.columns.tolist()
667        if features is not None:
668            column_names += features.columns.tolist()
669        if any([col.lower().startswith("metadata_") for col in column_names]):
670            raise ValueError("EventArray column names cannot start with 'metadata_'")
671        if any([col.lower().startswith("features_") for col in column_names]):
672            raise ValueError("EventArray column names cannot start with 'features_'")
673        if any([col.lower() == "none" for col in column_names]):
674            raise ValueError("EventArray column names cannot be 'none'")
675
676        self.info = info
677        self.metadata = metadata
678        self.features = features
INFO_COLUMNS = ['slide_id', 'tile', 'roi', 'x', 'y']
info
metadata
features
def get_sort_order( self, by: Union[Hashable, Sequence[Hashable]], ascending: Union[bool, Sequence[bool]] = True):
725    def get_sort_order(
726        self, by: Hashable | Sequence[Hashable], ascending: bool | Sequence[bool] = True
727    ):
728        """
729        Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames.
730        :param by: name of the column(s) to sort by.
731        :param ascending: whether to sort in ascending order; can be a list to match by
732        :return: the order of the indices to sort by.
733        """
734        columns = self.get(by)
735        return columns.sort_values(by=by, ascending=ascending).index

Get the sort order for the EventArray by a column in the info, metadata, or features DataFrames.

Parameters
  • by: name of the column(s) to sort by.
  • ascending: whether to sort in ascending order; can be a list to match by
Returns

the order of the indices to sort by.

def sort( self, by: Union[Hashable, Sequence[Hashable]], ascending: Union[bool, Sequence[bool]] = True) -> Self:
737    def sort(
738        self,
739        by: Hashable | Sequence[Hashable],
740        ascending: bool | Sequence[bool] = True,
741    ) -> Self:
742        """
743        Sort the EventArray by column(s) in the info, metadata, or features DataFrames.
744        :param by: name of the column(s) to sort by.
745        :param ascending: whether to sort in ascending order; can be a list to match by
746        :return: a new, sorted EventArray.
747        """
748        order = self.get_sort_order(by, ascending)
749        info = self.info.loc[order].reset_index(drop=True)
750        if self.metadata is not None:
751            metadata = self.metadata.loc[order].reset_index(drop=True)
752        else:
753            metadata = None
754        if self.features is not None:
755            features = self.features.loc[order].reset_index(drop=True)
756        else:
757            features = None
758        return EventArray(info, metadata, features)

Sort the EventArray by column(s) in the info, metadata, or features DataFrames.

Parameters
  • by: name of the column(s) to sort by.
  • ascending: whether to sort in ascending order; can be a list to match by
Returns

a new, sorted EventArray.

def get( self, column_names: Union[Hashable, Sequence[Hashable]]) -> pandas.core.frame.DataFrame:
760    def get(self, column_names: Hashable | Sequence[Hashable]) -> pd.DataFrame:
761        """
762        Get a DataFrame with the specified columns from the EventArray, by value.
763        :param column_names: the names of the columns to get.
764        :return: a DataFrame with the specified columns.
765        """
766        if isinstance(column_names, Hashable):
767            column_names = [column_names]  # Drop into a list for the loop
768        columns = []
769        for column_name in column_names:
770            if column_name in self.info.columns:
771                columns.append(self.info[column_name])
772            elif self.metadata is not None and column_name in self.metadata.columns:
773                columns.append(self.metadata[column_name])
774            elif self.features is not None and column_name in self.features.columns:
775                columns.append(self.features[column_name])
776            else:
777                raise ValueError(f"Column {column_name} not found in EventArray")
778        return pd.concat(columns, axis=1)

Get a DataFrame with the specified columns from the EventArray, by value.

Parameters
  • column_names: the names of the columns to get.
Returns

a DataFrame with the specified columns.

def rows(self, rows: Sequence[Hashable]) -> Self:
780    def rows(self, rows: Sequence[Hashable]) -> Self:
781        """
782        Get a subset of the EventArray rows based on a boolean or integer index, by value.
783        :param rows: row labels, indices, or boolean mask; anything for .loc[]
784        :return: a new EventArray with the subset of events.
785        """
786        info = self.info.loc[rows].reset_index(drop=True)
787        if self.metadata is not None:
788            metadata = self.metadata.loc[rows].reset_index(drop=True)
789        else:
790            metadata = None
791        if self.features is not None:
792            features = self.features.loc[rows].reset_index(drop=True)
793        else:
794            features = None
795        return EventArray(info, metadata, features)

Get a subset of the EventArray rows based on a boolean or integer index, by value.

Parameters
  • rows: row labels, indices, or boolean mask; anything for .loc[]
Returns

a new EventArray with the subset of events.

def copy(self) -> Self:
797    def copy(self) -> Self:
798        """
799        Create a deep copy of the EventArray.
800        :return: a deep copy of the EventArray.
801        """
802        return EventArray(
803            info=self.info.copy(),
804            metadata=None if self.metadata is None else self.metadata.copy(),
805            features=None if self.features is None else self.features.copy(),
806        )

Create a deep copy of the EventArray.

Returns

a deep copy of the EventArray.

def add_metadata( self, new_metadata: pandas.core.series.Series | pandas.core.frame.DataFrame) -> None:
810    def add_metadata(self, new_metadata: pd.Series | pd.DataFrame) -> None:
811        """
812        Add metadata to the EventArray. Removes the need to check if metadata is None.
813        Overwrites any existing metadata with the same column names as the new metadata.
814        :param new_metadata: the metadata to add.
815        """
816        if len(self) != len(new_metadata):
817            raise ValueError("New metadata must match length of existing info")
818
819        if self.metadata is None:
820            self.metadata = new_metadata
821        else:
822            if isinstance(new_metadata, pd.Series):
823                self.metadata[new_metadata.name] = new_metadata
824            else:
825                # It's a DataFrame
826                self.metadata[new_metadata.columns] = new_metadata

Add metadata to the EventArray. Removes the need to check if metadata is None. Overwrites any existing metadata with the same column names as the new metadata.

Parameters
  • new_metadata: the metadata to add.
def add_features( self, new_features: pandas.core.series.Series | pandas.core.frame.DataFrame) -> None:
828    def add_features(self, new_features: pd.Series | pd.DataFrame) -> None:
829        """
830        Add features to the EventArray. Removes the need to check if features is None.
831        Overwrites any existing features with the same column names as the new features.
832        :param new_features: the features to add.
833        """
834        if len(self) != len(new_features):
835            raise ValueError("New features must match length of existing info")
836
837        if self.features is None:
838            self.features = new_features
839        else:
840            if isinstance(new_features, pd.Series):
841                self.features[new_features.name] = new_features
842            else:
843                # It's a DataFrame
844                self.features[new_features.columns] = new_features

Add features to the EventArray. Removes the need to check if features is None. Overwrites any existing features with the same column names as the new features.

Parameters
  • new_features: the features to add.
@classmethod
def merge(cls, events: Iterable[Self]) -> Self:
846    @classmethod
847    def merge(cls, events: Iterable[Self]) -> Self:
848        """
849        Combine EventArrays in a list into a single EventArray.
850        :param events: the new list of events.
851        """
852        all_info = []
853        all_metadata = []
854        all_features = []
855        for event_array in events:
856            # Skip empty EventArrays
857            if event_array.info is not None:
858                all_info.append(event_array.info)
859            if event_array.metadata is not None:
860                all_metadata.append(event_array.metadata)
861            if event_array.features is not None:
862                all_features.append(event_array.features)
863        if len(all_info) == 0:
864            return EventArray()
865        else:
866            all_info = pd.concat(all_info, ignore_index=True)
867        if len(all_metadata) == 0:
868            all_metadata = None
869        else:
870            all_metadata = pd.concat(all_metadata, ignore_index=True)
871        if len(all_features) == 0:
872            all_features = None
873        else:
874            all_features = pd.concat(all_features, ignore_index=True)
875
876        return EventArray(all_info, all_metadata, all_features)

Combine EventArrays in a list into a single EventArray.

Parameters
  • events: the new list of events.
def to_events( self, scans: Union[csi_images.csi_scans.Scan, Iterable[csi_images.csi_scans.Scan]], ignore_missing_scans=True, ignore_metadata=False, ignore_features=False) -> list[Event]:
878    def to_events(
879        self,
880        scans: Scan | Iterable[Scan],
881        ignore_missing_scans=True,
882        ignore_metadata=False,
883        ignore_features=False,
884    ) -> list[Event]:
885        """
886        Get the events in the EventArray as a list of events. Returns [] if empty.
887        :param scans: the scans that the events belong to, auto-matched by slide_id.
888        Pass None if you don't care about scan metadata (pass ignore_missing_scans).
889        :param ignore_missing_scans: whether to create blank scans for events without scans.
890        :param ignore_metadata: whether to ignore metadata or not
891        :param ignore_features: whether to ignore features or not
892        :return:
893        """
894        if len(self) == 0:
895            return []
896        if isinstance(scans, Scan):
897            scans = [scans]
898        scans = {scan.slide_id: scan for scan in scans}
899        events = []
900        for i in range(len(self.info)):
901            # Determine the associated scan
902            slide_id = self.info["slide_id"][i]
903            if slide_id not in scans:
904                if ignore_missing_scans:
905                    # Create a placeholder scan if the scan is missing
906                    scan = Scan.make_placeholder(
907                        slide_id,
908                        self.info["tile"][i],
909                        self.info["roi"][i],
910                    )
911                else:
912                    raise ValueError(
913                        f"Scan {self.info['slide_id'][i]} not found for event {i}."
914                    )
915            else:
916                scan = scans[slide_id]
917
918            # Prepare the metadata and features
919            if ignore_metadata or self.metadata is None:
920                metadata = None
921            else:
922                # This Series creation method is less efficient,
923                # but required for preserving dtypes
924                metadata = pd.Series(
925                    {col: self.metadata.loc[i, col] for col in self.metadata.columns},
926                    dtype=object,
927                )
928            if ignore_features or self.features is None:
929                features = None
930            else:
931                features = pd.Series(
932                    {col: self.features.loc[i, col] for col in self.features.columns},
933                    dtype=object,
934                )
935            # Create the event and append it to the list
936            events.append(
937                Event(
938                    Tile(scan, self.info["tile"][i], self.info["roi"][i]),
939                    self.info["x"][i],
940                    self.info["y"][i],
941                    metadata=metadata,
942                    features=features,
943                )
944            )
945        return events

Get the events in the EventArray as a list of events. Returns [] if empty.

Parameters
  • scans: the scans that the events belong to, auto-matched by slide_id. Pass None if you don't care about scan metadata (pass ignore_missing_scans).
  • ignore_missing_scans: whether to create blank scans for events without scans.
  • ignore_metadata: whether to ignore metadata or not
  • ignore_features: whether to ignore features or not
Returns
@classmethod
def from_events(cls, events: Iterable[Event]) -> Self:
947    @classmethod
948    def from_events(cls, events: Iterable[Event]) -> Self:
949        """
950        Set the events in the EventArray to a new list of events.
951        :param events: the new list of events.
952        """
953        info = pd.DataFrame(
954            {
955                "slide_id": [event.tile.scan.slide_id for event in events],
956                "tile": [event.tile.n for event in events],
957                "roi": [event.tile.n_roi for event in events],
958                "x": [event.x for event in events],
959                "y": [event.y for event in events],
960            }
961        )
962        metadata_list = [event.metadata for event in events]
963        # Iterate through and ensure that all metadata is the same shape
964        for metadata in metadata_list:
965            if type(metadata) != type(metadata_list[0]):
966                raise ValueError("All metadata must be the same type.")
967            if metadata is not None and metadata.shape != metadata_list[0].shape:
968                raise ValueError("All metadata must be the same shape.")
969        if metadata_list[0] is None:
970            metadata = None
971        else:
972            metadata = pd.DataFrame(metadata_list)
973        features_list = [event.features for event in events]
974        # Iterate through and ensure that all features are the same shape
975        for features in features_list:
976            if type(features) != type(features_list[0]):
977                raise ValueError("All features must be the same type.")
978            if features is not None and features.shape != features_list[0].shape:
979                raise ValueError("All features must be the same shape.")
980        if features_list[0] is None:
981            features = None
982        else:
983            features = pd.DataFrame(features_list)
984        return EventArray(info=info, metadata=metadata, features=features)

Set the events in the EventArray to a new list of events.

Parameters
  • events: the new list of events.
def to_dataframe(self) -> pandas.core.frame.DataFrame:
 986    def to_dataframe(self) -> pd.DataFrame:
 987        """
 988        Convert all the data in the EventArray to a single DataFrame.
 989        :return: a DataFrame with all the data in the EventArray.
 990        """
 991        # Make a copy of the info DataFrame and prepend "info_" to the column names
 992        output = self.info.copy()
 993        # Combine with the metadata and prepend "metadata_" to the column names
 994        if self.metadata is not None:
 995            metadata = self.metadata.copy()
 996            metadata.columns = [f"metadata_{col}" for col in metadata.columns]
 997            output = pd.concat([output, metadata], axis=1)
 998        # Combine with the features and prepend "features_" to the column names
 999        if self.features is not None:
1000            features = self.features.copy()
1001            features.columns = [f"features_{col}" for col in features.columns]
1002            output = pd.concat([output, features], axis=1)
1003        return output

Convert all the data in the EventArray to a single DataFrame.

Returns

a DataFrame with all the data in the EventArray.

@classmethod
def from_dataframe( cls, df, metadata_prefix: str = 'metadata_', features_prefix: str = 'features_') -> Self:
1005    @classmethod
1006    def from_dataframe(
1007        cls, df, metadata_prefix: str = "metadata_", features_prefix: str = "features_"
1008    ) -> Self:
1009        """
1010        From a single, special DataFrame, create an EventArray.
1011        :param df: the DataFrame to convert to an EventArray.
1012        :param metadata_prefix: the prefix for metadata columns.
1013        :param features_prefix: the prefix for features columns.
1014        :return: a DataFrame with all the data in the EventArray.
1015        """
1016        # Split the columns into info, metadata, and features and strip prefix
1017        info = df[[col for col in df.columns if col in cls.INFO_COLUMNS]].copy()
1018        if info.size == 0:
1019            info = None
1020        metadata = df[[col for col in df.columns if col.startswith("metadata_")]].copy()
1021        metadata.columns = [
1022            col.replace(metadata_prefix, "") for col in metadata.columns
1023        ]
1024        if metadata.size == 0:
1025            metadata = None
1026        features = df[[col for col in df.columns if col.startswith("features_")]].copy()
1027        features.columns = [
1028            col.replace(features_prefix, "") for col in features.columns
1029        ]
1030        if features.size == 0:
1031            features = None
1032        return cls(info=info, metadata=metadata, features=features)

From a single, special DataFrame, create an EventArray.

Parameters
  • df: the DataFrame to convert to an EventArray.
  • metadata_prefix: the prefix for metadata columns.
  • features_prefix: the prefix for features columns.
Returns

a DataFrame with all the data in the EventArray.

@classmethod
def from_mask( cls, mask: numpy.ndarray, tile: csi_images.csi_tiles.Tile, include_cell_id: bool = True, images: list[numpy.ndarray] = None, image_labels: list[str] = None, properties: list[str] = None) -> Self:
1034    @classmethod
1035    def from_mask(
1036        cls,
1037        mask: np.ndarray,
1038        tile: Tile,
1039        include_cell_id: bool = True,
1040        images: list[np.ndarray] = None,
1041        image_labels: list[str] = None,
1042        properties: list[str] = None,
1043    ) -> Self:
1044        """
1045        Extract events from a mask DataFrame, including metadata and features.
1046        :param mask: the mask to extract events from.
1047        :param tile: the Tile object associated with this mask.
1048        :param include_cell_id: whether to include the cell_id, or numerical
1049        mask label, as metadata in the EventArray.
1050        :param images: the intensity images to extract features from.
1051        :param image_labels: the labels for the intensity images.
1052        :param properties: list of properties to extract in addition to the defaults:
1053        :return: EventArray corresponding to the mask labels.
1054        """
1055        if csi_images is None:
1056            raise ModuleNotFoundError(
1057                "imageio libraries not installed! "
1058                "run `pip install csi_images[imageio]` to resolve."
1059            )
1060        # Gather mask_info
1061        if images is not None and image_labels is not None:
1062            if len(images) != len(image_labels):
1063                raise ValueError("Intensity images and labels must match lengths.")
1064
1065        mask_info = csi_images.extract_mask_info(mask, images, image_labels, properties)
1066
1067        if len(mask_info) == 0:
1068            return EventArray()
1069
1070        # Combine provided info and mask info
1071        info = pd.DataFrame(
1072            {
1073                "slide_id": tile.scan.slide_id,
1074                "tile": tile.n,
1075                "roi": tile.n_roi,
1076                "x": mask_info["x"],
1077                "y": mask_info["y"],
1078            },
1079        )
1080        # Extract a metadata column if desired
1081        if include_cell_id:
1082            metadata = pd.DataFrame({"cell_id": mask_info["id"]})
1083        else:
1084            metadata = None
1085        # If any additional properties were extracted, add them as features
1086        mask_info = mask_info.drop(columns=["id", "x", "y"], errors="ignore")
1087        if len(mask_info.columns) > 0:
1088            features = mask_info
1089            features.columns = [col.lower() for col in features.columns]
1090        else:
1091            features = None
1092        return EventArray(info, metadata, features)

Extract events from a mask DataFrame, including metadata and features.

Parameters
  • mask: the mask to extract events from.
  • tile: the Tile object associated with this mask.
  • include_cell_id: whether to include the cell_id, or numerical mask label, as metadata in the EventArray.
  • images: the intensity images to extract features from.
  • image_labels: the labels for the intensity images.
  • properties: list of properties to extract in addition to the defaults:
Returns

EventArray corresponding to the mask labels.

def save_csv(self, output_path: str) -> bool:
1094    def save_csv(self, output_path: str) -> bool:
1095        """
1096        Save the events to an CSV file, including metadata and features.
1097        :param output_path:
1098        :return:
1099        """
1100        if not output_path.endswith(".csv"):
1101            output_path += ".csv"
1102        self.to_dataframe().to_csv(output_path, index=False)
1103        return os.path.exists(output_path)

Save the events to an CSV file, including metadata and features.

Parameters
  • output_path:
Returns
@classmethod
def load_csv( cls, input_path: str, metadata_prefix: str = 'metadata_', features_prefix: str = 'features_') -> Self:
1105    @classmethod
1106    def load_csv(
1107        cls,
1108        input_path: str,
1109        metadata_prefix: str = "metadata_",
1110        features_prefix: str = "features_",
1111    ) -> Self:
1112        """
1113        Load the events from an CSV file, including metadata and features.
1114        :param input_path:
1115        :param metadata_prefix:
1116        :param features_prefix:
1117        :return:
1118        """
1119        # Load the CSV file
1120        df = pd.read_csv(input_path)
1121        return cls.from_dataframe(df, metadata_prefix, features_prefix)

Load the events from an CSV file, including metadata and features.

Parameters
  • input_path:
  • metadata_prefix:
  • features_prefix:
Returns
def save_hdf5(self, output_path: str) -> bool:
1123    def save_hdf5(self, output_path: str) -> bool:
1124        """
1125        Save the events to an HDF5 file, including metadata and features.
1126        Uses the pandas-provided HDF5 functions for ease, and external compatibility,
1127        though these files are slightly harder to view in HDFView or similar.
1128        :param output_path:
1129        :return:
1130        """
1131        if not output_path.endswith(".hdf5") and not output_path.endswith(".h5"):
1132            output_path += ".hdf5"
1133        # Open the output_path as an HDF5 file
1134        with pd.HDFStore(output_path) as store:
1135            # Store the dataframes in the HDF5 file
1136            if self.info is not None:
1137                store.put("info", self.info, index=False)
1138            if self.metadata is not None:
1139                store.put("metadata", self.metadata, index=False)
1140            if self.features is not None:
1141                store.put("features", self.features, index=False)
1142        return os.path.exists(output_path)

Save the events to an HDF5 file, including metadata and features. Uses the pandas-provided HDF5 functions for ease, and external compatibility, though these files are slightly harder to view in HDFView or similar.

Parameters
  • output_path:
Returns
@classmethod
def load_hdf5(cls, input_path: str) -> Self:
1144    @classmethod
1145    def load_hdf5(cls, input_path: str) -> Self:
1146        """
1147        Load the events from an HDF5 file, including metadata and features.
1148        :param input_path:
1149        :return:
1150        """
1151        # Open the input_path as an HDF5 file
1152        with pd.HDFStore(input_path, "r") as store:
1153            # Load the dataframes from the HDF5 file
1154            info = store.get("info") if "info" in store else None
1155            metadata = store.get("metadata") if "metadata" in store else None
1156            features = store.get("features") if "features" in store else None
1157        return cls(info=info, metadata=metadata, features=features)

Load the events from an HDF5 file, including metadata and features.

Parameters
  • input_path:
Returns
def save_ocular(self, output_path: str, event_type: str = 'cells'):
1159    def save_ocular(self, output_path: str, event_type: str = "cells"):
1160        """
1161        Save the events to an OCULAR file. Relies on the dataframe originating
1162        from an OCULAR file (same columns; duplicate metadata/info).
1163        :param output_path:
1164        :param event_type:
1165        :return:
1166        """
1167        if pyreadr is None:
1168            raise ModuleNotFoundError(
1169                "pyreadr not installed! Install pyreadr directly "
1170                "or run `pip install csi-images[rds]` option to resolve."
1171            )
1172        if event_type == "cells":
1173            file_stub = "rc-final"
1174        elif event_type == "others":
1175            file_stub = "others-final"
1176        else:
1177            raise ValueError("Invalid event type. Must be cells or others.")
1178
1179        # Ensure good metadata
1180        metadata = pd.DataFrame(
1181            {
1182                "slide_id": self.info["slide_id"],
1183                "frame_id": self.info["tile"] + 1,  # Convert to 1-indexed for R
1184                "cell_id": (
1185                    self.metadata["cell_id"]
1186                    if "cell_id" in self.metadata.columns
1187                    else range(len(self.info))
1188                ),
1189                "cellx": self.info["x"],
1190                "celly": self.info["y"],
1191            }
1192        )
1193        if self.metadata is not None:
1194            metadata[self.metadata.columns] = self.metadata.copy()
1195
1196        # Check for the "ocular_interesting" column
1197        if event_type == "cells":
1198            if "ocular_interesting" in metadata.columns:
1199                interesting_rows = metadata["ocular_interesting"].to_numpy(dtype=bool)
1200            elif "hcpc" in metadata.columns:
1201                # Interesting cells don't get an hcpc designation, leaving them as -1
1202                interesting_rows = (
1203                    metadata["hcpc"].to_numpy() == -1
1204                )  # interesting cells
1205            else:
1206                interesting_rows = []
1207            if sum(interesting_rows) > 0:
1208                # Split the metadata into interesting and regular
1209                interesting_events = self.rows(interesting_rows)
1210                interesting_df = pd.concat(
1211                    [interesting_events.features, interesting_events.metadata], axis=1
1212                )
1213                data_events = self.rows(~interesting_rows)
1214                data_df = pd.concat(
1215                    [data_events.features, data_events.metadata], axis=1
1216                )
1217                data_df = data_df.drop(columns=["ocular_interesting"], errors="ignore")
1218
1219                # Drop particular columns for "interesting"
1220                interesting_df = interesting_df.drop(
1221                    [
1222                        "clust",
1223                        "hcpc",
1224                        "frame_id",
1225                        "cell_id",
1226                        "unique_id",
1227                        "ocular_interesting",
1228                    ],
1229                    axis=1,
1230                    errors="ignore",
1231                )
1232                # Save both .csv and .rds
1233                interesting_stub = os.path.join(output_path, "ocular_interesting")
1234                interesting_df.to_csv(f"{interesting_stub}.csv")
1235                # Suppress pandas FutureWarning
1236                with warnings.catch_warnings():
1237                    warnings.simplefilter(action="ignore", category=FutureWarning)
1238                    pyreadr.write_rds(f"{interesting_stub}.rds", interesting_df)
1239            else:
1240                data_df = pd.concat([self.features, metadata], axis=1)
1241        else:
1242            # Get all data and reset_index (will copy it)
1243            data_df = pd.concat([self.features, metadata], axis=1)
1244
1245        # Split based on cluster number to conform to *-final[1-4].rds
1246        n_clusters = max(data_df["clust"]) + 1
1247        split_idx = [round(i * n_clusters / 4) for i in range(5)]
1248        for i in range(4):
1249            subset = (split_idx[i] <= data_df["clust"]) & (
1250                data_df["clust"] < split_idx[i + 1]
1251            )
1252            data_df.loc[subset, "hcpc"] = i + 1
1253            subset = data_df[subset].reset_index(drop=True)
1254            # Suppress pandas FutureWarning
1255            with warnings.catch_warnings():
1256                warnings.simplefilter(action="ignore", category=FutureWarning)
1257                pyreadr.write_rds(
1258                    os.path.join(output_path, f"{file_stub}{i+1}.rds"), subset
1259                )
1260
1261        # Create new example cell strings
1262        data_df["example_cell_id"] = (
1263            data_df["slide_id"]
1264            + " "
1265            + data_df["frame_id"].astype(str)
1266            + " "
1267            + data_df["cell_id"].astype(str)
1268            + " "
1269            + data_df["cellx"].astype(int).astype(str)
1270            + " "
1271            + data_df["celly"].astype(int).astype(str)
1272        )
1273        # Find averagable data columns
1274        if "cellcluster_id" in data_df.columns:
1275            end_idx = data_df.columns.get_loc("cellcluster_id")
1276        else:
1277            end_idx = data_df.columns.get_loc("slide_id")
1278        avg_cols = data_df.columns[:end_idx].tolist()
1279        # Group by cluster and average
1280        data_df = data_df.groupby("clust").agg(
1281            **{col: (col, "mean") for col in avg_cols},
1282            count=("clust", "size"),  # count rows in each cluster
1283            example_cells=("example_cell_id", lambda x: ",".join(x)),
1284            hcpc=("hcpc", lambda x: x.iloc[0]),
1285        )
1286        data_df = data_df.reset_index()  # Do NOT drop, index is "clust"
1287        # Create new columns
1288        metadata = pd.DataFrame(
1289            {
1290                "count": data_df["count"],
1291                "example_cells": data_df["example_cells"],
1292                "clust": data_df["clust"].astype(int),
1293                "hcpc": data_df["hcpc"].astype(int),
1294                "id": data_df["clust"].astype(int).astype(str),
1295                "cccluster": "0",  # Dummy value
1296                "ccdistance": 0.0,  # Dummy value
1297                "rownum": list(range(len(data_df))),
1298                "framegroup": 0,  # Dummy value
1299            }
1300        )
1301        # Need to pad the features to 761 columns, as per OCULAR report needs
1302        additional_columns = range(len(avg_cols), 761)
1303        if len(additional_columns) > 0:
1304            padding = pd.DataFrame(
1305                np.zeros((len(data_df), len(additional_columns))),
1306                columns=[f"pad{i}" for i in additional_columns],
1307            )
1308            data_df = pd.concat([data_df[avg_cols], padding, metadata], axis=1)
1309        else:
1310            data_df = pd.concat([data_df[avg_cols], metadata], axis=1)
1311
1312        # Save the cluster data
1313        data_df.to_csv(os.path.join(output_path, f"{file_stub}.csv"))
1314        # Suppress pandas FutureWarning
1315        with warnings.catch_warnings():
1316            warnings.simplefilter(action="ignore", category=FutureWarning)
1317            pyreadr.write_rds(os.path.join(output_path, f"{file_stub}.rds"), data_df)

Save the events to an OCULAR file. Relies on the dataframe originating from an OCULAR file (same columns; duplicate metadata/info).

Parameters
  • output_path:
  • event_type:
Returns
@classmethod
def load_ocular( cls, input_path: str, event_type='cells', cell_data_files=('rc-final1.rds', 'rc-final2.rds', 'rc-final3.rds', 'rc-final4.rds', 'ocular_interesting.rds'), others_data_files=('others-final1.rds', 'others-final2.rds', 'others-final3.rds', 'others-final4.rds'), atlas_data_files=('ocular_interesting.rds', 'ocular_not_interesting.rds'), drop_common_events=True) -> Self:
1319    @classmethod
1320    def load_ocular(
1321        cls,
1322        input_path: str,
1323        event_type="cells",
1324        cell_data_files=(
1325            "rc-final1.rds",
1326            "rc-final2.rds",
1327            "rc-final3.rds",
1328            "rc-final4.rds",
1329            "ocular_interesting.rds",
1330        ),
1331        others_data_files=(
1332            "others-final1.rds",
1333            "others-final2.rds",
1334            "others-final3.rds",
1335            "others-final4.rds",
1336        ),
1337        atlas_data_files=(
1338            "ocular_interesting.rds",
1339            "ocular_not_interesting.rds",
1340        ),
1341        drop_common_events=True,
1342    ) -> Self:
1343        """
1344
1345        :param input_path:
1346        :param event_type:
1347        :param cell_data_files:
1348        :param others_data_files:
1349        :param atlas_data_files:
1350        :param drop_common_events:
1351        :return:
1352        """
1353        if pyreadr is None:
1354            raise ModuleNotFoundError(
1355                "pyreadr not installed! Install pyreadr directly "
1356                "or run `pip install csi-images[rds]` option to resolve."
1357            )
1358        # Check if the input path is a directory or a file
1359        if os.path.isfile(input_path):
1360            data_files = [os.path.basename(input_path)]
1361            input_path = os.path.dirname(input_path)
1362        if event_type == "cells":
1363            data_files = cell_data_files
1364        elif event_type == "others":
1365            data_files = others_data_files
1366        else:
1367            raise ValueError("Invalid event type.")
1368
1369        # Load the data from the OCULAR files
1370        file_data = {}
1371        for file in data_files:
1372            file_path = os.path.join(input_path, file)
1373            if not os.path.isfile(file_path):
1374                warnings.warn(f"{file} not found for in {input_path}")
1375                continue
1376            file_data[file] = pyreadr.read_r(file_path)
1377            # Get the DataFrame associated with None (pyreadr dict quirk)
1378            file_data[file] = file_data[file][None]
1379            if len(file_data[file]) == 0:
1380                # File gets dropped from the dict
1381                file_data.pop(file)
1382                warnings.warn(f"{file} has no cells")
1383                continue
1384
1385            # Drop common cells if requested and in this file
1386            if (
1387                file in atlas_data_files
1388                and drop_common_events
1389                and "catalogue_classification" in file_data[file]
1390            ):
1391                common_cell_indices = (
1392                    file_data[file]["catalogue_classification"] == "common_cell"
1393                )
1394                file_data[file] = file_data[file][common_cell_indices == False]
1395
1396            if len(file_data[file]) == 0:
1397                # File gets dropped from the dict
1398                file_data.pop(file)
1399                warnings.warn(f"{file} has no cells after dropping common cells")
1400                continue
1401
1402            # Extract frame_id and cell_id
1403            # DAPI- events already have frame_id cell_id outside rowname
1404            if event_type == "cells" and "frame_id" not in file_data[file].columns:
1405                file_data[file]["rowname"] = file_data[file]["rowname"].astype("str")
1406                # get frame_id cell_id from rownames column and split into two columns
1407                split_res = file_data[file]["rowname"].str.split(" ", n=1, expand=True)
1408                if len(split_res.columns) != 2:
1409                    warnings.warn(
1410                        f'Expected "frame_id cell_id" but got {file_data[file]["rowname"]}'
1411                    )
1412                # then assign it back to the dataframe
1413                file_data[file][["frame_id", "cell_id"]] = split_res.astype("int")
1414            # Ensure frame_id and cell_id are integers
1415            file_data[file]["frame_id"] = file_data[file]["frame_id"].astype("int")
1416            file_data[file]["cell_id"] = file_data[file]["cell_id"].astype("int")
1417            # reset indexes since they can cause NaN values in concat
1418            file_data[file] = file_data[file].reset_index(drop=True)
1419
1420        # Merge the data from all files
1421        if len(file_data) == 0:
1422            return EventArray()
1423        elif len(file_data) == 1:
1424            data = [file_data[file] for file in file_data.keys()][0]
1425        else:
1426            data = pd.concat(file_data.values())
1427
1428        # Others is missing the "slide_id". Insert it right before "frame_id" column
1429        if event_type == "others" and "slide_id" not in data.columns:
1430            if os.path.basename(input_path) == "ocular":
1431                slide_id = os.path.basename(os.path.dirname(input_path))
1432            else:
1433                slide_id = "UNKNOWN"
1434            data.insert(data.columns.get_loc("frame_id"), "slide_id", slide_id)
1435
1436        # Sort according to ascending cell_id to keep the original, which is in manual_df
1437        data = data.sort_values(by=["cell_id"], ascending=True)
1438        # Filter out duplicates by x & y
1439        data = data.assign(
1440            unique_id=data["slide_id"]
1441            + "_"
1442            + data["frame_id"].astype(str)
1443            + "_"
1444            + data["cellx"].astype(int).astype(str)
1445            + "_"
1446            + data["celly"].astype(int).astype(str)
1447        )
1448        data = data.drop_duplicates(subset=["unique_id"], keep="first")
1449        # Normal unique_id is with cell_id
1450        data = data.assign(
1451            unique_id=data["slide_id"]
1452            + "_"
1453            + data["frame_id"].astype(str)
1454            + "_"
1455            + data["cell_id"].astype(str)
1456        )
1457        data = data.reset_index(drop=True)
1458        # All columns up to "slide_id" are features; drop the "slide_id"
1459        features = data.loc[:, :"slide_id"].iloc[:, :-1]
1460        data = data.loc[:, "slide_id":]
1461        # Grab the info columns
1462        info = data[["slide_id", "frame_id", "cellx", "celly"]]
1463        info.columns = ["slide_id", "tile", "x", "y"]
1464        info = info.assign(roi=0)  # OCULAR only works on 1 ROI, as far as known
1465        info = info[["slide_id", "tile", "roi", "x", "y"]]
1466        # Metadata has duplicate columns for later convenience
1467        metadata = data
1468        # Certain columns tend to be problematic with mixed data formats...
1469        for col in ["TRITC", "CY5", "FITC"]:
1470            if col in metadata:
1471                labels = {
1472                    "False": False,
1473                    "True": True,
1474                    "FALSE": False,
1475                    "TRUE": True,
1476                    False: False,
1477                    True: True,
1478                }
1479                metadata[col] = metadata[col].map(labels).astype(bool)
1480        for col in ["catalogue_id", "catalogue_distance", "clust", "hcpc"]:
1481            if col in metadata:
1482                metadata[col] = metadata[col].fillna(-1).astype(int)
1483        info["tile"] = info["tile"] - 1  # Convert to 0-based indexing
1484        return EventArray(info, metadata, features)
Parameters
  • input_path:
  • event_type:
  • cell_data_files:
  • others_data_files:
  • atlas_data_files:
  • drop_common_events:
Returns