csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import enum
  9import datetime
 10import zoneinfo
 11import typing
 12
 13import yaml
 14import json
 15
 16try:
 17    import aicspylibczi
 18except ImportError:
 19    aicspylibczi = None
 20
 21
 22class Scan(yaml.YAMLObject):
 23    """
 24    Class that composes a whole scan's metadata. Contains some universal data,
 25    plus lists for channels and ROIs.
 26
 27    .. include:: ../docs/csi_images/coordinate_systems.md
 28    """
 29
 30    yaml_tag = "csi_utils.scans.Scan"
 31
 32    class Type(enum.Enum):
 33        BZSCANNER = "bzscanner"
 34        AXIOSCAN7 = "axioscan7"
 35
 36    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 37    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 38
 39    METADATA_FILE_NAME = {
 40        Type.AXIOSCAN7: "scan.yaml",
 41        Type.BZSCANNER: "slideinfo.txt",
 42    }
 43    DATETIME_FORMAT = {
 44        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 45        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 46    }
 47
 48    # Actual channel names, from the BZScanner's default order
 49    BZSCANNER_CHANNEL_MAP = {
 50        "DAPI": "DAPI",
 51        "TRITC": "AF555",
 52        "CY5": "AF647",
 53        "BF": "BRIGHT",
 54        "FITC": "AF488",
 55    }
 56
 57    class Channel(yaml.YAMLObject):
 58        """
 59        Class that comprises a channel; we usually have multiple (2-5) per scan.
 60        Contains three fields:
 61        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 62        - exposure_ms: the exposure time to capture a frame in milliseconds
 63        - intensity: the light intensity used OR the gain applied to the channel
 64        """
 65
 66        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 67
 68        def __init__(
 69            self,
 70            name: str = "",
 71            exposure_ms: float = -1.0,
 72            intensity: float = -1.0,
 73        ):
 74            self.name = name
 75            self.exposure_ms = exposure_ms
 76            self.intensity = intensity
 77
 78        def __repr__(self):
 79            return yaml.dump(self, sort_keys=False)
 80
 81        def __eq__(self, other):
 82            return self.__repr__() == other.__repr__()
 83
 84    class ROI(yaml.YAMLObject):
 85        """
 86        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 87        """
 88
 89        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 90
 91        def __init__(
 92            self,
 93            origin_x_um: int = -1,
 94            origin_y_um: int = -1,
 95            width_um: int = -1,
 96            height_um: int = -1,
 97            tile_rows: int = -1,
 98            tile_cols: int = -1,
 99            focus_points=None,
100        ):
101            if focus_points is None:
102                focus_points = []
103            self.origin_x_um = origin_x_um
104            self.origin_y_um = origin_y_um
105            self.width_um = width_um
106            self.height_um = height_um
107            self.tile_rows = tile_rows
108            self.tile_cols = tile_cols
109            self.focus_points = focus_points
110
111        def __repr__(self):
112            return yaml.dump(self, sort_keys=False)
113
114        def __eq__(self, other):
115            return self.__repr__() == other.__repr__()
116
117        def similar(self, other):
118            return (
119                self.origin_y_um == other.origin_y_um
120                and self.origin_x_um == other.origin_x_um
121                and self.width_um == other.width_um
122                and self.height_um == other.height_um
123                and self.tile_rows == other.tile_rows
124                and self.tile_cols == other.tile_cols
125            )
126
127    def __init__(
128        self,
129        slide_id: str = "",
130        path: str = "",
131        start_date: str = "",
132        end_date: str = "",
133        scan_time_s: int = -1,
134        scanner_id: str = "",
135        tray: int = -1,
136        slot: int = -1,
137        camera: str = "",
138        objective: str = "",
139        pixel_size_um: float = -1.0,
140        tile_width_px: int = -1,
141        tile_height_px: int = -1,
142        tile_overlap_proportion: int = -1,
143        channels=None,
144        roi=None,
145    ):
146        if roi is None:
147            roi = []
148        if channels is None:
149            channels = []
150        self.slide_id = slide_id
151        self.path = path
152        self.start_date = start_date
153        self.end_date = end_date
154        self.scan_time_s = scan_time_s
155        self.scanner_id = scanner_id
156        self.tray = tray
157        self.slot = slot
158        self.camera = camera
159        self.objective = objective
160        self.pixel_size_um = pixel_size_um
161        self.tile_width_px = tile_width_px
162        self.tile_height_px = tile_height_px
163        self.tile_overlap_proportion = tile_overlap_proportion
164        self.channels = channels
165        self.roi = roi
166
167    def __repr__(self):
168        return yaml.dump(self, sort_keys=False)
169
170    def __eq__(self, other):
171        return self.__repr__() == other.__repr__()
172
173    def has_same_profile(self, other):
174        return (
175            self.camera == other.camera
176            and self.objective == other.objective
177            and self.pixel_size_um == other.pixel_size_um
178            and self.tile_width_px == other.tile_width_px
179            and self.tile_height_px == other.tile_height_px
180            and self.tile_overlap_proportion == other.tile_overlap_proportion
181            and self.channels == other.channels
182            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
183        )
184
185    def get_channel_names(self) -> list[str]:
186        """
187        Get the channel names in the scan's channel order.
188        :return: a list of channel names.
189        """
190        return [channel.name for channel in self.channels]
191
192    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
193        """
194        Given a list of channel names, return the corresponding indices in the scan's
195        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
196        actual AlexaFluor names (AF555, AF647, AF488).
197        If a list entry is None, it will return -1 for that entry.
198        :param channel_names: a list of channel names.
199        :return: a list of channel indices.
200        """
201        # Get the scan's channel name list
202        scan_channel_names = self.get_channel_names()
203
204        channel_indices = []
205        for name in channel_names:
206            # Convert any BZScanner channel names to the actual channel names
207            if name in self.BZSCANNER_CHANNEL_MAP:
208                name = self.BZSCANNER_CHANNEL_MAP[name]
209
210            # Append the corresponding index if possible
211            if name is None:
212                channel_indices.append(-1)
213            elif name in scan_channel_names:
214                channel_indices.append(scan_channel_names.index(name))
215            else:
216                raise ValueError(
217                    f"Channel name {name} not found in scan channels {scan_channel_names}"
218                )
219        return channel_indices
220
221    def save_yaml(self, output_path: str):
222        """
223        Write the Scan object to a .yaml file.
224        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
225        :return: nothing; will raise an error on failure
226        """
227        # Create necessary folders
228        output_path = os.path.abspath(output_path)
229        if os.path.splitext(output_path)[1] == ".yaml":
230            os.makedirs(os.path.dirname(output_path), exist_ok=True)
231        else:
232            os.makedirs(output_path, exist_ok=True)
233            # Add the standard metadata file name to the path if needed
234            output_path = os.path.join(
235                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
236            )
237
238        # Populate the file
239        with open(output_path, "w") as file:
240            yaml.dump(self, stream=file, sort_keys=False)
241
242    @classmethod
243    def load_yaml(cls, input_path: str) -> typing.Self:
244        """
245        Load a Scan object from a .yaml file.
246        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
247        :return: a Scan object
248        """
249        input_path = os.path.abspath(input_path)
250        if os.path.isdir(input_path):
251            input_path = os.path.join(
252                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
253            )
254        with open(input_path, "r") as file:
255            metadata_obj = yaml.load(file, Loader=yaml.Loader)
256        return metadata_obj
257
258    def to_dict(self) -> dict:
259        # Dump to json; then add indents and a top-level key
260        channels_json = json.dumps(
261            self.channels, default=lambda x: x.__dict__, indent=2
262        )
263        channels_json = "  ".join(channels_json.splitlines(True))
264        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
265
266        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
267        roi_json = "  ".join(roi_json.splitlines(True))
268        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
269
270        return {
271            "slide_id": self.slide_id,
272            "path": self.path,
273            "start_date": self.start_date,
274            "end_date": self.end_date,
275            "scan_time_s": self.scan_time_s,
276            "scanner_id": self.scanner_id,
277            "tray": self.tray,
278            "slot": self.slot,
279            "camera": self.camera,
280            "objective": self.objective,
281            "pixel_size_um": self.pixel_size_um,
282            "tile_width_px": self.tile_width_px,
283            "tile_height_px": self.tile_height_px,
284            "tile_overlap_proportion": self.tile_overlap_proportion,
285            "channels": channels_json,
286            "roi": roi_json,
287        }
288
289    @classmethod
290    def from_dict(cls, scan_dict) -> typing.Self:
291        local_timezone = zoneinfo.ZoneInfo("localtime")
292        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
293        result = cls(
294            slide_id=scan_dict["slide_id"],
295            path=scan_dict["path"],
296            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
297            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
298            scan_time_s=int(dt),
299            scanner_id=scan_dict["scanner_id"],
300            tray=scan_dict["tray"],
301            slot=scan_dict["slot"],
302            camera=scan_dict["camera"],
303            objective=scan_dict["objective"],
304            pixel_size_um=scan_dict["pixel_size"],
305            tile_width_px=scan_dict["tile_width"],
306            tile_height_px=scan_dict["tile_height"],
307            tile_overlap_proportion=scan_dict["tile_overlap"],
308        )
309        for channel_json in scan_dict["channels"]["data"]:
310            result.channels.append(
311                cls.Channel(
312                    name=channel_json["name"],
313                    exposure_ms=channel_json["exposure_ms"],
314                    intensity=channel_json["intensity"],
315                )
316            )
317        for roi_json in scan_dict["roi"]["data"]:
318            result.roi.append(
319                cls.ROI(
320                    origin_x_um=roi_json["origin_x_um"],
321                    origin_y_um=roi_json["origin_y_um"],
322                    width_um=roi_json["width_um"],
323                    height_um=roi_json["height_um"],
324                    tile_rows=roi_json["tile_rows"],
325                    tile_cols=roi_json["tile_cols"],
326                    focus_points=roi_json["focus_points"],
327                )
328            )
329        return result
330
331    @classmethod
332    def load_czi(cls, input_path: str) -> typing.Self:
333        """
334        :param input_path: the path to the .czi file
335        :return: a Scan object
336        """
337        if aicspylibczi is None:
338            raise ModuleNotFoundError(
339                "aicspylibczi library not installed. "
340                "Install csi-images with [imageio] option to resolve."
341            )
342
343        # Normalize paths
344        input_path = os.path.abspath(input_path)
345
346        # Read in metadata as XML elements
347        metadata_xml = aicspylibczi.CziFile(input_path).meta
348        # Read in shape metadata from binary
349        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
350
351        # Populate metadata
352        scan = cls()
353
354        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
355        if scan.slide_id is not None:
356            scan.slide_id = scan.slide_id.strip().upper()
357        # Map the raw scanner ID (service ID) to our IDs
358        scan.scanner_id = cls.SCANNER_IDS[
359            metadata_xml.find(".//Microscope/UserDefinedName").text
360        ]
361
362        # Extract start and finish datetimes
363        date = metadata_xml.find(".//Document/CreationDate").text
364        # Strip out sub-second precision
365        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
366        date_as_datetime = datetime.datetime.strptime(
367            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
368        )
369        scan.start_date = date_as_datetime.strftime(
370            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
371        )
372        scan.scan_time_s = round(
373            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
374        )
375        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
376        scan.end_date = date_as_datetime.strftime(
377            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
378        )
379
380        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
381        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
382
383        # Get camera and magnifying info
384        scan.camera = (
385            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
386        )["Name"]
387        magnification = metadata_xml.find(
388            ".//Objectives/Objective/NominalMagnification"
389        )
390        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
391        scan.objective = f"{magnification.text}x-{aperture.text}"
392        scan.pixel_size_um = (
393            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
394        )
395        # Round off the pixel size to nanometers; might not be optimal, but this
396        # gets rounded when we send it to the database anyways (to 7 places)
397        scan.pixel_size_um = round(scan.pixel_size_um, 3)
398
399        # Get tile information
400        # Note: X Y is untested, could be flipped. I always forget. Just don't use
401        # non-square frames and we're all good.
402        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
403        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
404
405        scan.tile_width_px = rois_shape[0]["X"][1]
406        scan.tile_height_px = rois_shape[0]["Y"][1]
407        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
408
409        # Extract channels and create Channel objects from them
410        channel_indices = []
411        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
412            channel_indices.append(int(channel.attrib["Id"][-1]))
413            intensity_xml = channel.find(".//Intensity")
414            if intensity_xml is None:
415                intensity = 0
416            else:
417                intensity = float(intensity_xml.text[:-2]) * 1e-2
418            scan.channels.append(
419                cls.Channel(
420                    name=channel.attrib["Name"].upper(),
421                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
422                    intensity=intensity,
423                )
424            )
425        # Make sure the channels are sorted
426        scan.channels = [
427            channel for _, channel in sorted(zip(channel_indices, scan.channels))
428        ]
429        # Verify that the shape corresponds to the channels
430        for roi in rois_shape:
431            if roi["C"][1] != len(scan.channels):
432                raise ValueError(
433                    f"Number of channels {len(scan.channels)} "
434                    f"is not the same as the number of channels in an ROI: "
435                    f"{roi['C'][1]}"
436                )
437
438        # Get the real ROI limits; the metadata is not always correct
439        limits_xml = metadata_xml.findall(".//AllowedScanArea")
440        limits = [
441            round(float(limits_xml[0].find("Center").text.split(",")[0])),
442            round(float(limits_xml[0].find("Center").text.split(",")[1])),
443            round(float(limits_xml[0].find("Size").text.split(",")[0])),
444            round(float(limits_xml[0].find("Size").text.split(",")[1])),
445        ]
446        # Convert to top-left and bottom-right
447        limits = [
448            round(limits[0] - limits[2] / 2),
449            round(limits[1] - limits[3] / 2),
450            round(limits[0] + limits[2] / 2),
451            round(limits[1] + limits[3] / 2),
452        ]
453
454        # Extract ROIs and create ROI objects from them
455        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
456        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
457        if len(rois_xml_metadata) != len(rois_shape):
458            raise ValueError(
459                f"Metadata and binary data from {input_path} "
460                f"do not match in number of ROIs"
461            )
462        # We need both to determine the number of rows/columns because the XML lies
463        roi_indices = []
464        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
465            name = roi_xml.attrib["Name"]
466            # Determine the index of this scene
467            scene_index = -1
468            for scene in scenes_xml_metadata:
469                if scene.attrib["Name"] == name:
470                    scene_index = int(scene.attrib["Index"])
471                    break
472            if scene_index == -1:
473                raise ValueError(f"ROI {name} does not correspond to any scenes")
474            else:
475                roi_indices.append(scene_index)
476            # Extract other metadata
477            roi_limits = [
478                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
479                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
480                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
481                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
482            ]
483            # Convert to top-left and bottom-right
484            roi_limits = [
485                round(roi_limits[0] - roi_limits[2] / 2),
486                round(roi_limits[1] - roi_limits[3] / 2),
487                round(roi_limits[0] + roi_limits[2] / 2),
488                round(roi_limits[1] + roi_limits[3] / 2),
489            ]
490            # Bound the ROI to the actual scan limits
491            roi_limits = [
492                max(roi_limits[0], limits[0]),
493                max(roi_limits[1], limits[1]),
494                min(roi_limits[2], limits[2]),
495                min(roi_limits[3], limits[3]),
496            ]
497
498            tile_rows = int(roi_xml.find("Rows").text)
499            # Current best way of reliably extracting; <Columns> entry can be wrong
500            if (roi_shape["M"][1] % tile_rows) != 0:
501                raise ValueError(
502                    f"The number of tiles {roi_shape['M'][1]} is not "
503                    f"divisible by the tile rows {tile_rows}; metadata "
504                    f"must be messed up. Thanks Zeiss"
505                )
506            else:
507                tile_cols = int(roi_shape["M"][1] / tile_rows)
508            # Support points are actually the relevant focus points for this ROI
509            focus_points = []
510            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
511                focus_points.append(
512                    [
513                        int(float(focus_point.find("X").text)),
514                        int(float(focus_point.find("Y").text)),
515                        int(float(focus_point.find("Z").text)),
516                    ]
517                )
518            # Strip all sub-micron precision, it does not matter
519            scan.roi.append(
520                cls.ROI(
521                    origin_x_um=roi_limits[0],
522                    origin_y_um=roi_limits[1],
523                    width_um=roi_limits[2] - roi_limits[0],
524                    height_um=roi_limits[3] - roi_limits[1],
525                    tile_rows=tile_rows,
526                    tile_cols=tile_cols,
527                    focus_points=focus_points,
528                )
529            )
530        # Sort based on the scene indices
531        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
532
533        return scan
534
535    @classmethod
536    def load_txt(cls, input_path: str) -> typing.Self:
537        """
538        Loads a Scan object from a .txt file, which originates from the BZScanner.
539        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
540        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
541        :return: a Scan object
542        """
543        # Set paths
544        input_path = os.path.abspath(input_path)
545        if os.path.isdir(input_path):
546            input_path = os.path.join(
547                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
548            )
549
550        # Read in metadata as a dict
551        with open(input_path, "r") as file:
552            metadata_contents = file.read()
553            # Read each line, splitting on the = sign
554            metadata_dict = {}
555            for line in metadata_contents.splitlines():
556                key, value = line.split("=")
557                metadata_dict[key] = value
558
559        # Populate metadata
560        scan = cls()
561
562        scan.slide_id = metadata_dict["SLIDEID"]
563        scan.slide_id = scan.slide_id.strip().upper()
564
565        scan.path = metadata_dict["SLIDEDIR"]
566
567        # Extract start and finish datetimes
568        date = metadata_dict["DATE"]
569        date_as_datetime = datetime.datetime.strptime(
570            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
571        )
572        date_as_datetime = date_as_datetime.astimezone(
573            zoneinfo.ZoneInfo("America/Los_Angeles")
574        )  # Hardcoded because BZScanners are here
575        scan.start_date = date_as_datetime.strftime(
576            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
577        )
578        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
579        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
580        scan.end_date = date_as_datetime.strftime(
581            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
582        )
583
584        # Map the raw scanner ID (service ID) to our IDs
585        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
586        scan.tray = 0  # only one tray in a BZScanner
587        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
588
589        # Get camera and magnifying info
590        scan.camera = ""
591        magnification = 10
592        aperture = 0  # TODO: find the actual aperture
593        scan.objective = f"{magnification}x-{aperture}"
594        scan.pixel_size_um = 0.591  # Estimated from image metadata
595
596        # Get tile information
597        scan.tile_width_px = 1362  # Known from image metadata
598        scan.tile_height_px = 1004  # Known from image metadata
599        scan.tile_overlap_proportion = 0
600
601        # Extract channels and create Channel objects from them
602        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
603            channel_settings = metadata_dict[channel].split(",")
604            if channel_settings[0] == "0":
605                continue
606            scan.channels.append(
607                cls.Channel(
608                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
609                    exposure_ms=float(channel_settings[1]),
610                    intensity=float(channel_settings[2]),
611                )
612            )
613
614        # Get focus points
615        focus_points = []
616        for i in range(33):
617            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
618            if focus_point[0] == "0":
619                break
620            focus_points.append(
621                [
622                    int(float(focus_point[1])),
623                    int(float(focus_point[2])),
624                    int(float(focus_point[3])),
625                ]
626            )
627
628        # In the BZScanner, the slide is vertical instead of horizontal
629        # We put in nominal values for the ROI, which is oriented vertically as well
630        tile_rows = 96
631        tile_cols = 24
632        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
633        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
634        origin_x_um = 2500 + round((20000 - roi_width) / 2)
635        origin_y_um = 2500 + round((58000 - roi_height) / 2)
636        scan.roi.append(
637            cls.ROI(
638                origin_x_um=origin_x_um,
639                origin_y_um=origin_y_um,
640                width_um=roi_width,
641                height_um=roi_height,
642                tile_rows=tile_rows,
643                tile_cols=tile_cols,
644                focus_points=focus_points,
645            )
646        )
647        return scan
648
649    @classmethod
650    def load_from_folder(cls, input_path: str) -> typing.Self:
651        """
652        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
653        Prefers scan.yaml if both exist.
654        :param input_path: /path/to/folder
655        :return: a Scan object
656        """
657        input_path = os.path.abspath(input_path)
658        if os.path.isfile(
659            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
660        ):
661            return cls.load_yaml(input_path)
662        elif os.path.isfile(
663            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
664        ):
665            return cls.load_txt(input_path)
666        else:
667            raise ValueError(
668                f"No scan metadata files "
669                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
670                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
671                f"{input_path}"
672            )
673        pass
674
675    @classmethod
676    def make_placeholder(
677        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
678    ) -> typing.Self:
679        """
680        Make a placeholder Scan object with only basic required information filled in.
681        :param slide_id: the slide ID
682        :param n_tile: the number of this tile, which will become the number of
683                       tiles in the scan
684        :param n_roi: the number of ROIs in the scan
685        :return: a Scan object
686        """
687        # Sanitize inputs here
688        slide_id = str(slide_id).strip().upper()
689        n_tile = int(n_tile)
690        n_roi = int(n_roi)
691        # Generate the object
692        scan = cls()
693        scan.slide_id = slide_id
694        scan.roi = [cls.ROI() for _ in range(n_roi)]
695        scan.roi[0].tile_cols = n_tile
696        return scan
class Scan(yaml.YAMLObject):
 23class Scan(yaml.YAMLObject):
 24    """
 25    Class that composes a whole scan's metadata. Contains some universal data,
 26    plus lists for channels and ROIs.
 27
 28    .. include:: ../docs/csi_images/coordinate_systems.md
 29    """
 30
 31    yaml_tag = "csi_utils.scans.Scan"
 32
 33    class Type(enum.Enum):
 34        BZSCANNER = "bzscanner"
 35        AXIOSCAN7 = "axioscan7"
 36
 37    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 38    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 39
 40    METADATA_FILE_NAME = {
 41        Type.AXIOSCAN7: "scan.yaml",
 42        Type.BZSCANNER: "slideinfo.txt",
 43    }
 44    DATETIME_FORMAT = {
 45        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 46        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 47    }
 48
 49    # Actual channel names, from the BZScanner's default order
 50    BZSCANNER_CHANNEL_MAP = {
 51        "DAPI": "DAPI",
 52        "TRITC": "AF555",
 53        "CY5": "AF647",
 54        "BF": "BRIGHT",
 55        "FITC": "AF488",
 56    }
 57
 58    class Channel(yaml.YAMLObject):
 59        """
 60        Class that comprises a channel; we usually have multiple (2-5) per scan.
 61        Contains three fields:
 62        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 63        - exposure_ms: the exposure time to capture a frame in milliseconds
 64        - intensity: the light intensity used OR the gain applied to the channel
 65        """
 66
 67        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 68
 69        def __init__(
 70            self,
 71            name: str = "",
 72            exposure_ms: float = -1.0,
 73            intensity: float = -1.0,
 74        ):
 75            self.name = name
 76            self.exposure_ms = exposure_ms
 77            self.intensity = intensity
 78
 79        def __repr__(self):
 80            return yaml.dump(self, sort_keys=False)
 81
 82        def __eq__(self, other):
 83            return self.__repr__() == other.__repr__()
 84
 85    class ROI(yaml.YAMLObject):
 86        """
 87        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 88        """
 89
 90        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 91
 92        def __init__(
 93            self,
 94            origin_x_um: int = -1,
 95            origin_y_um: int = -1,
 96            width_um: int = -1,
 97            height_um: int = -1,
 98            tile_rows: int = -1,
 99            tile_cols: int = -1,
100            focus_points=None,
101        ):
102            if focus_points is None:
103                focus_points = []
104            self.origin_x_um = origin_x_um
105            self.origin_y_um = origin_y_um
106            self.width_um = width_um
107            self.height_um = height_um
108            self.tile_rows = tile_rows
109            self.tile_cols = tile_cols
110            self.focus_points = focus_points
111
112        def __repr__(self):
113            return yaml.dump(self, sort_keys=False)
114
115        def __eq__(self, other):
116            return self.__repr__() == other.__repr__()
117
118        def similar(self, other):
119            return (
120                self.origin_y_um == other.origin_y_um
121                and self.origin_x_um == other.origin_x_um
122                and self.width_um == other.width_um
123                and self.height_um == other.height_um
124                and self.tile_rows == other.tile_rows
125                and self.tile_cols == other.tile_cols
126            )
127
128    def __init__(
129        self,
130        slide_id: str = "",
131        path: str = "",
132        start_date: str = "",
133        end_date: str = "",
134        scan_time_s: int = -1,
135        scanner_id: str = "",
136        tray: int = -1,
137        slot: int = -1,
138        camera: str = "",
139        objective: str = "",
140        pixel_size_um: float = -1.0,
141        tile_width_px: int = -1,
142        tile_height_px: int = -1,
143        tile_overlap_proportion: int = -1,
144        channels=None,
145        roi=None,
146    ):
147        if roi is None:
148            roi = []
149        if channels is None:
150            channels = []
151        self.slide_id = slide_id
152        self.path = path
153        self.start_date = start_date
154        self.end_date = end_date
155        self.scan_time_s = scan_time_s
156        self.scanner_id = scanner_id
157        self.tray = tray
158        self.slot = slot
159        self.camera = camera
160        self.objective = objective
161        self.pixel_size_um = pixel_size_um
162        self.tile_width_px = tile_width_px
163        self.tile_height_px = tile_height_px
164        self.tile_overlap_proportion = tile_overlap_proportion
165        self.channels = channels
166        self.roi = roi
167
168    def __repr__(self):
169        return yaml.dump(self, sort_keys=False)
170
171    def __eq__(self, other):
172        return self.__repr__() == other.__repr__()
173
174    def has_same_profile(self, other):
175        return (
176            self.camera == other.camera
177            and self.objective == other.objective
178            and self.pixel_size_um == other.pixel_size_um
179            and self.tile_width_px == other.tile_width_px
180            and self.tile_height_px == other.tile_height_px
181            and self.tile_overlap_proportion == other.tile_overlap_proportion
182            and self.channels == other.channels
183            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
184        )
185
186    def get_channel_names(self) -> list[str]:
187        """
188        Get the channel names in the scan's channel order.
189        :return: a list of channel names.
190        """
191        return [channel.name for channel in self.channels]
192
193    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
194        """
195        Given a list of channel names, return the corresponding indices in the scan's
196        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
197        actual AlexaFluor names (AF555, AF647, AF488).
198        If a list entry is None, it will return -1 for that entry.
199        :param channel_names: a list of channel names.
200        :return: a list of channel indices.
201        """
202        # Get the scan's channel name list
203        scan_channel_names = self.get_channel_names()
204
205        channel_indices = []
206        for name in channel_names:
207            # Convert any BZScanner channel names to the actual channel names
208            if name in self.BZSCANNER_CHANNEL_MAP:
209                name = self.BZSCANNER_CHANNEL_MAP[name]
210
211            # Append the corresponding index if possible
212            if name is None:
213                channel_indices.append(-1)
214            elif name in scan_channel_names:
215                channel_indices.append(scan_channel_names.index(name))
216            else:
217                raise ValueError(
218                    f"Channel name {name} not found in scan channels {scan_channel_names}"
219                )
220        return channel_indices
221
222    def save_yaml(self, output_path: str):
223        """
224        Write the Scan object to a .yaml file.
225        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
226        :return: nothing; will raise an error on failure
227        """
228        # Create necessary folders
229        output_path = os.path.abspath(output_path)
230        if os.path.splitext(output_path)[1] == ".yaml":
231            os.makedirs(os.path.dirname(output_path), exist_ok=True)
232        else:
233            os.makedirs(output_path, exist_ok=True)
234            # Add the standard metadata file name to the path if needed
235            output_path = os.path.join(
236                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
237            )
238
239        # Populate the file
240        with open(output_path, "w") as file:
241            yaml.dump(self, stream=file, sort_keys=False)
242
243    @classmethod
244    def load_yaml(cls, input_path: str) -> typing.Self:
245        """
246        Load a Scan object from a .yaml file.
247        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
248        :return: a Scan object
249        """
250        input_path = os.path.abspath(input_path)
251        if os.path.isdir(input_path):
252            input_path = os.path.join(
253                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
254            )
255        with open(input_path, "r") as file:
256            metadata_obj = yaml.load(file, Loader=yaml.Loader)
257        return metadata_obj
258
259    def to_dict(self) -> dict:
260        # Dump to json; then add indents and a top-level key
261        channels_json = json.dumps(
262            self.channels, default=lambda x: x.__dict__, indent=2
263        )
264        channels_json = "  ".join(channels_json.splitlines(True))
265        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
266
267        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
268        roi_json = "  ".join(roi_json.splitlines(True))
269        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
270
271        return {
272            "slide_id": self.slide_id,
273            "path": self.path,
274            "start_date": self.start_date,
275            "end_date": self.end_date,
276            "scan_time_s": self.scan_time_s,
277            "scanner_id": self.scanner_id,
278            "tray": self.tray,
279            "slot": self.slot,
280            "camera": self.camera,
281            "objective": self.objective,
282            "pixel_size_um": self.pixel_size_um,
283            "tile_width_px": self.tile_width_px,
284            "tile_height_px": self.tile_height_px,
285            "tile_overlap_proportion": self.tile_overlap_proportion,
286            "channels": channels_json,
287            "roi": roi_json,
288        }
289
290    @classmethod
291    def from_dict(cls, scan_dict) -> typing.Self:
292        local_timezone = zoneinfo.ZoneInfo("localtime")
293        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
294        result = cls(
295            slide_id=scan_dict["slide_id"],
296            path=scan_dict["path"],
297            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
298            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
299            scan_time_s=int(dt),
300            scanner_id=scan_dict["scanner_id"],
301            tray=scan_dict["tray"],
302            slot=scan_dict["slot"],
303            camera=scan_dict["camera"],
304            objective=scan_dict["objective"],
305            pixel_size_um=scan_dict["pixel_size"],
306            tile_width_px=scan_dict["tile_width"],
307            tile_height_px=scan_dict["tile_height"],
308            tile_overlap_proportion=scan_dict["tile_overlap"],
309        )
310        for channel_json in scan_dict["channels"]["data"]:
311            result.channels.append(
312                cls.Channel(
313                    name=channel_json["name"],
314                    exposure_ms=channel_json["exposure_ms"],
315                    intensity=channel_json["intensity"],
316                )
317            )
318        for roi_json in scan_dict["roi"]["data"]:
319            result.roi.append(
320                cls.ROI(
321                    origin_x_um=roi_json["origin_x_um"],
322                    origin_y_um=roi_json["origin_y_um"],
323                    width_um=roi_json["width_um"],
324                    height_um=roi_json["height_um"],
325                    tile_rows=roi_json["tile_rows"],
326                    tile_cols=roi_json["tile_cols"],
327                    focus_points=roi_json["focus_points"],
328                )
329            )
330        return result
331
332    @classmethod
333    def load_czi(cls, input_path: str) -> typing.Self:
334        """
335        :param input_path: the path to the .czi file
336        :return: a Scan object
337        """
338        if aicspylibczi is None:
339            raise ModuleNotFoundError(
340                "aicspylibczi library not installed. "
341                "Install csi-images with [imageio] option to resolve."
342            )
343
344        # Normalize paths
345        input_path = os.path.abspath(input_path)
346
347        # Read in metadata as XML elements
348        metadata_xml = aicspylibczi.CziFile(input_path).meta
349        # Read in shape metadata from binary
350        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
351
352        # Populate metadata
353        scan = cls()
354
355        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
356        if scan.slide_id is not None:
357            scan.slide_id = scan.slide_id.strip().upper()
358        # Map the raw scanner ID (service ID) to our IDs
359        scan.scanner_id = cls.SCANNER_IDS[
360            metadata_xml.find(".//Microscope/UserDefinedName").text
361        ]
362
363        # Extract start and finish datetimes
364        date = metadata_xml.find(".//Document/CreationDate").text
365        # Strip out sub-second precision
366        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
367        date_as_datetime = datetime.datetime.strptime(
368            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
369        )
370        scan.start_date = date_as_datetime.strftime(
371            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
372        )
373        scan.scan_time_s = round(
374            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
375        )
376        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
377        scan.end_date = date_as_datetime.strftime(
378            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
379        )
380
381        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
382        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
383
384        # Get camera and magnifying info
385        scan.camera = (
386            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
387        )["Name"]
388        magnification = metadata_xml.find(
389            ".//Objectives/Objective/NominalMagnification"
390        )
391        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
392        scan.objective = f"{magnification.text}x-{aperture.text}"
393        scan.pixel_size_um = (
394            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
395        )
396        # Round off the pixel size to nanometers; might not be optimal, but this
397        # gets rounded when we send it to the database anyways (to 7 places)
398        scan.pixel_size_um = round(scan.pixel_size_um, 3)
399
400        # Get tile information
401        # Note: X Y is untested, could be flipped. I always forget. Just don't use
402        # non-square frames and we're all good.
403        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
404        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
405
406        scan.tile_width_px = rois_shape[0]["X"][1]
407        scan.tile_height_px = rois_shape[0]["Y"][1]
408        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
409
410        # Extract channels and create Channel objects from them
411        channel_indices = []
412        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
413            channel_indices.append(int(channel.attrib["Id"][-1]))
414            intensity_xml = channel.find(".//Intensity")
415            if intensity_xml is None:
416                intensity = 0
417            else:
418                intensity = float(intensity_xml.text[:-2]) * 1e-2
419            scan.channels.append(
420                cls.Channel(
421                    name=channel.attrib["Name"].upper(),
422                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
423                    intensity=intensity,
424                )
425            )
426        # Make sure the channels are sorted
427        scan.channels = [
428            channel for _, channel in sorted(zip(channel_indices, scan.channels))
429        ]
430        # Verify that the shape corresponds to the channels
431        for roi in rois_shape:
432            if roi["C"][1] != len(scan.channels):
433                raise ValueError(
434                    f"Number of channels {len(scan.channels)} "
435                    f"is not the same as the number of channels in an ROI: "
436                    f"{roi['C'][1]}"
437                )
438
439        # Get the real ROI limits; the metadata is not always correct
440        limits_xml = metadata_xml.findall(".//AllowedScanArea")
441        limits = [
442            round(float(limits_xml[0].find("Center").text.split(",")[0])),
443            round(float(limits_xml[0].find("Center").text.split(",")[1])),
444            round(float(limits_xml[0].find("Size").text.split(",")[0])),
445            round(float(limits_xml[0].find("Size").text.split(",")[1])),
446        ]
447        # Convert to top-left and bottom-right
448        limits = [
449            round(limits[0] - limits[2] / 2),
450            round(limits[1] - limits[3] / 2),
451            round(limits[0] + limits[2] / 2),
452            round(limits[1] + limits[3] / 2),
453        ]
454
455        # Extract ROIs and create ROI objects from them
456        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
457        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
458        if len(rois_xml_metadata) != len(rois_shape):
459            raise ValueError(
460                f"Metadata and binary data from {input_path} "
461                f"do not match in number of ROIs"
462            )
463        # We need both to determine the number of rows/columns because the XML lies
464        roi_indices = []
465        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
466            name = roi_xml.attrib["Name"]
467            # Determine the index of this scene
468            scene_index = -1
469            for scene in scenes_xml_metadata:
470                if scene.attrib["Name"] == name:
471                    scene_index = int(scene.attrib["Index"])
472                    break
473            if scene_index == -1:
474                raise ValueError(f"ROI {name} does not correspond to any scenes")
475            else:
476                roi_indices.append(scene_index)
477            # Extract other metadata
478            roi_limits = [
479                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
480                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
481                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
482                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
483            ]
484            # Convert to top-left and bottom-right
485            roi_limits = [
486                round(roi_limits[0] - roi_limits[2] / 2),
487                round(roi_limits[1] - roi_limits[3] / 2),
488                round(roi_limits[0] + roi_limits[2] / 2),
489                round(roi_limits[1] + roi_limits[3] / 2),
490            ]
491            # Bound the ROI to the actual scan limits
492            roi_limits = [
493                max(roi_limits[0], limits[0]),
494                max(roi_limits[1], limits[1]),
495                min(roi_limits[2], limits[2]),
496                min(roi_limits[3], limits[3]),
497            ]
498
499            tile_rows = int(roi_xml.find("Rows").text)
500            # Current best way of reliably extracting; <Columns> entry can be wrong
501            if (roi_shape["M"][1] % tile_rows) != 0:
502                raise ValueError(
503                    f"The number of tiles {roi_shape['M'][1]} is not "
504                    f"divisible by the tile rows {tile_rows}; metadata "
505                    f"must be messed up. Thanks Zeiss"
506                )
507            else:
508                tile_cols = int(roi_shape["M"][1] / tile_rows)
509            # Support points are actually the relevant focus points for this ROI
510            focus_points = []
511            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
512                focus_points.append(
513                    [
514                        int(float(focus_point.find("X").text)),
515                        int(float(focus_point.find("Y").text)),
516                        int(float(focus_point.find("Z").text)),
517                    ]
518                )
519            # Strip all sub-micron precision, it does not matter
520            scan.roi.append(
521                cls.ROI(
522                    origin_x_um=roi_limits[0],
523                    origin_y_um=roi_limits[1],
524                    width_um=roi_limits[2] - roi_limits[0],
525                    height_um=roi_limits[3] - roi_limits[1],
526                    tile_rows=tile_rows,
527                    tile_cols=tile_cols,
528                    focus_points=focus_points,
529                )
530            )
531        # Sort based on the scene indices
532        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
533
534        return scan
535
536    @classmethod
537    def load_txt(cls, input_path: str) -> typing.Self:
538        """
539        Loads a Scan object from a .txt file, which originates from the BZScanner.
540        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
541        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
542        :return: a Scan object
543        """
544        # Set paths
545        input_path = os.path.abspath(input_path)
546        if os.path.isdir(input_path):
547            input_path = os.path.join(
548                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
549            )
550
551        # Read in metadata as a dict
552        with open(input_path, "r") as file:
553            metadata_contents = file.read()
554            # Read each line, splitting on the = sign
555            metadata_dict = {}
556            for line in metadata_contents.splitlines():
557                key, value = line.split("=")
558                metadata_dict[key] = value
559
560        # Populate metadata
561        scan = cls()
562
563        scan.slide_id = metadata_dict["SLIDEID"]
564        scan.slide_id = scan.slide_id.strip().upper()
565
566        scan.path = metadata_dict["SLIDEDIR"]
567
568        # Extract start and finish datetimes
569        date = metadata_dict["DATE"]
570        date_as_datetime = datetime.datetime.strptime(
571            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
572        )
573        date_as_datetime = date_as_datetime.astimezone(
574            zoneinfo.ZoneInfo("America/Los_Angeles")
575        )  # Hardcoded because BZScanners are here
576        scan.start_date = date_as_datetime.strftime(
577            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
578        )
579        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
580        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
581        scan.end_date = date_as_datetime.strftime(
582            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
583        )
584
585        # Map the raw scanner ID (service ID) to our IDs
586        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
587        scan.tray = 0  # only one tray in a BZScanner
588        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
589
590        # Get camera and magnifying info
591        scan.camera = ""
592        magnification = 10
593        aperture = 0  # TODO: find the actual aperture
594        scan.objective = f"{magnification}x-{aperture}"
595        scan.pixel_size_um = 0.591  # Estimated from image metadata
596
597        # Get tile information
598        scan.tile_width_px = 1362  # Known from image metadata
599        scan.tile_height_px = 1004  # Known from image metadata
600        scan.tile_overlap_proportion = 0
601
602        # Extract channels and create Channel objects from them
603        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
604            channel_settings = metadata_dict[channel].split(",")
605            if channel_settings[0] == "0":
606                continue
607            scan.channels.append(
608                cls.Channel(
609                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
610                    exposure_ms=float(channel_settings[1]),
611                    intensity=float(channel_settings[2]),
612                )
613            )
614
615        # Get focus points
616        focus_points = []
617        for i in range(33):
618            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
619            if focus_point[0] == "0":
620                break
621            focus_points.append(
622                [
623                    int(float(focus_point[1])),
624                    int(float(focus_point[2])),
625                    int(float(focus_point[3])),
626                ]
627            )
628
629        # In the BZScanner, the slide is vertical instead of horizontal
630        # We put in nominal values for the ROI, which is oriented vertically as well
631        tile_rows = 96
632        tile_cols = 24
633        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
634        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
635        origin_x_um = 2500 + round((20000 - roi_width) / 2)
636        origin_y_um = 2500 + round((58000 - roi_height) / 2)
637        scan.roi.append(
638            cls.ROI(
639                origin_x_um=origin_x_um,
640                origin_y_um=origin_y_um,
641                width_um=roi_width,
642                height_um=roi_height,
643                tile_rows=tile_rows,
644                tile_cols=tile_cols,
645                focus_points=focus_points,
646            )
647        )
648        return scan
649
650    @classmethod
651    def load_from_folder(cls, input_path: str) -> typing.Self:
652        """
653        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
654        Prefers scan.yaml if both exist.
655        :param input_path: /path/to/folder
656        :return: a Scan object
657        """
658        input_path = os.path.abspath(input_path)
659        if os.path.isfile(
660            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
661        ):
662            return cls.load_yaml(input_path)
663        elif os.path.isfile(
664            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
665        ):
666            return cls.load_txt(input_path)
667        else:
668            raise ValueError(
669                f"No scan metadata files "
670                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
671                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
672                f"{input_path}"
673            )
674        pass
675
676    @classmethod
677    def make_placeholder(
678        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
679    ) -> typing.Self:
680        """
681        Make a placeholder Scan object with only basic required information filled in.
682        :param slide_id: the slide ID
683        :param n_tile: the number of this tile, which will become the number of
684                       tiles in the scan
685        :param n_roi: the number of ROIs in the scan
686        :return: a Scan object
687        """
688        # Sanitize inputs here
689        slide_id = str(slide_id).strip().upper()
690        n_tile = int(n_tile)
691        n_roi = int(n_roi)
692        # Generate the object
693        scan = cls()
694        scan.slide_id = slide_id
695        scan.roi = [cls.ROI() for _ in range(n_roi)]
696        scan.roi[0].tile_cols = n_tile
697        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.

Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).

Picture of the slide coordinate system, which assumes a slide placed horizontally
with the label on the left. The x-axis points to the right, and the y-axis points
down. The origin is at the top left corner. Key positions, such as the origin of the
slide's active area at (14500, 2500) micrometers and the bottom-right corner at
(72500, 22500) micrometers are displayed.

Scan( slide_id: str = '', path: str = '', start_date: str = '', end_date: str = '', scan_time_s: int = -1, scanner_id: str = '', tray: int = -1, slot: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_overlap_proportion: int = -1, channels=None, roi=None)
128    def __init__(
129        self,
130        slide_id: str = "",
131        path: str = "",
132        start_date: str = "",
133        end_date: str = "",
134        scan_time_s: int = -1,
135        scanner_id: str = "",
136        tray: int = -1,
137        slot: int = -1,
138        camera: str = "",
139        objective: str = "",
140        pixel_size_um: float = -1.0,
141        tile_width_px: int = -1,
142        tile_height_px: int = -1,
143        tile_overlap_proportion: int = -1,
144        channels=None,
145        roi=None,
146    ):
147        if roi is None:
148            roi = []
149        if channels is None:
150            channels = []
151        self.slide_id = slide_id
152        self.path = path
153        self.start_date = start_date
154        self.end_date = end_date
155        self.scan_time_s = scan_time_s
156        self.scanner_id = scanner_id
157        self.tray = tray
158        self.slot = slot
159        self.camera = camera
160        self.objective = objective
161        self.pixel_size_um = pixel_size_um
162        self.tile_width_px = tile_width_px
163        self.tile_height_px = tile_height_px
164        self.tile_overlap_proportion = tile_overlap_proportion
165        self.channels = channels
166        self.roi = roi
yaml_tag = 'csi_utils.scans.Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
path
start_date
end_date
scan_time_s
scanner_id
tray
slot
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
174    def has_same_profile(self, other):
175        return (
176            self.camera == other.camera
177            and self.objective == other.objective
178            and self.pixel_size_um == other.pixel_size_um
179            and self.tile_width_px == other.tile_width_px
180            and self.tile_height_px == other.tile_height_px
181            and self.tile_overlap_proportion == other.tile_overlap_proportion
182            and self.channels == other.channels
183            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
184        )
def get_channel_names(self) -> list[str]:
186    def get_channel_names(self) -> list[str]:
187        """
188        Get the channel names in the scan's channel order.
189        :return: a list of channel names.
190        """
191        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
193    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
194        """
195        Given a list of channel names, return the corresponding indices in the scan's
196        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
197        actual AlexaFluor names (AF555, AF647, AF488).
198        If a list entry is None, it will return -1 for that entry.
199        :param channel_names: a list of channel names.
200        :return: a list of channel indices.
201        """
202        # Get the scan's channel name list
203        scan_channel_names = self.get_channel_names()
204
205        channel_indices = []
206        for name in channel_names:
207            # Convert any BZScanner channel names to the actual channel names
208            if name in self.BZSCANNER_CHANNEL_MAP:
209                name = self.BZSCANNER_CHANNEL_MAP[name]
210
211            # Append the corresponding index if possible
212            if name is None:
213                channel_indices.append(-1)
214            elif name in scan_channel_names:
215                channel_indices.append(scan_channel_names.index(name))
216            else:
217                raise ValueError(
218                    f"Channel name {name} not found in scan channels {scan_channel_names}"
219                )
220        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def save_yaml(self, output_path: str):
222    def save_yaml(self, output_path: str):
223        """
224        Write the Scan object to a .yaml file.
225        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
226        :return: nothing; will raise an error on failure
227        """
228        # Create necessary folders
229        output_path = os.path.abspath(output_path)
230        if os.path.splitext(output_path)[1] == ".yaml":
231            os.makedirs(os.path.dirname(output_path), exist_ok=True)
232        else:
233            os.makedirs(output_path, exist_ok=True)
234            # Add the standard metadata file name to the path if needed
235            output_path = os.path.join(
236                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
237            )
238
239        # Populate the file
240        with open(output_path, "w") as file:
241            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
243    @classmethod
244    def load_yaml(cls, input_path: str) -> typing.Self:
245        """
246        Load a Scan object from a .yaml file.
247        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
248        :return: a Scan object
249        """
250        input_path = os.path.abspath(input_path)
251        if os.path.isdir(input_path):
252            input_path = os.path.join(
253                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
254            )
255        with open(input_path, "r") as file:
256            metadata_obj = yaml.load(file, Loader=yaml.Loader)
257        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
259    def to_dict(self) -> dict:
260        # Dump to json; then add indents and a top-level key
261        channels_json = json.dumps(
262            self.channels, default=lambda x: x.__dict__, indent=2
263        )
264        channels_json = "  ".join(channels_json.splitlines(True))
265        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
266
267        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
268        roi_json = "  ".join(roi_json.splitlines(True))
269        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
270
271        return {
272            "slide_id": self.slide_id,
273            "path": self.path,
274            "start_date": self.start_date,
275            "end_date": self.end_date,
276            "scan_time_s": self.scan_time_s,
277            "scanner_id": self.scanner_id,
278            "tray": self.tray,
279            "slot": self.slot,
280            "camera": self.camera,
281            "objective": self.objective,
282            "pixel_size_um": self.pixel_size_um,
283            "tile_width_px": self.tile_width_px,
284            "tile_height_px": self.tile_height_px,
285            "tile_overlap_proportion": self.tile_overlap_proportion,
286            "channels": channels_json,
287            "roi": roi_json,
288        }
@classmethod
def from_dict(cls, scan_dict) -> Self:
290    @classmethod
291    def from_dict(cls, scan_dict) -> typing.Self:
292        local_timezone = zoneinfo.ZoneInfo("localtime")
293        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
294        result = cls(
295            slide_id=scan_dict["slide_id"],
296            path=scan_dict["path"],
297            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
298            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
299            scan_time_s=int(dt),
300            scanner_id=scan_dict["scanner_id"],
301            tray=scan_dict["tray"],
302            slot=scan_dict["slot"],
303            camera=scan_dict["camera"],
304            objective=scan_dict["objective"],
305            pixel_size_um=scan_dict["pixel_size"],
306            tile_width_px=scan_dict["tile_width"],
307            tile_height_px=scan_dict["tile_height"],
308            tile_overlap_proportion=scan_dict["tile_overlap"],
309        )
310        for channel_json in scan_dict["channels"]["data"]:
311            result.channels.append(
312                cls.Channel(
313                    name=channel_json["name"],
314                    exposure_ms=channel_json["exposure_ms"],
315                    intensity=channel_json["intensity"],
316                )
317            )
318        for roi_json in scan_dict["roi"]["data"]:
319            result.roi.append(
320                cls.ROI(
321                    origin_x_um=roi_json["origin_x_um"],
322                    origin_y_um=roi_json["origin_y_um"],
323                    width_um=roi_json["width_um"],
324                    height_um=roi_json["height_um"],
325                    tile_rows=roi_json["tile_rows"],
326                    tile_cols=roi_json["tile_cols"],
327                    focus_points=roi_json["focus_points"],
328                )
329            )
330        return result
@classmethod
def load_czi(cls, input_path: str) -> Self:
332    @classmethod
333    def load_czi(cls, input_path: str) -> typing.Self:
334        """
335        :param input_path: the path to the .czi file
336        :return: a Scan object
337        """
338        if aicspylibczi is None:
339            raise ModuleNotFoundError(
340                "aicspylibczi library not installed. "
341                "Install csi-images with [imageio] option to resolve."
342            )
343
344        # Normalize paths
345        input_path = os.path.abspath(input_path)
346
347        # Read in metadata as XML elements
348        metadata_xml = aicspylibczi.CziFile(input_path).meta
349        # Read in shape metadata from binary
350        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
351
352        # Populate metadata
353        scan = cls()
354
355        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
356        if scan.slide_id is not None:
357            scan.slide_id = scan.slide_id.strip().upper()
358        # Map the raw scanner ID (service ID) to our IDs
359        scan.scanner_id = cls.SCANNER_IDS[
360            metadata_xml.find(".//Microscope/UserDefinedName").text
361        ]
362
363        # Extract start and finish datetimes
364        date = metadata_xml.find(".//Document/CreationDate").text
365        # Strip out sub-second precision
366        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
367        date_as_datetime = datetime.datetime.strptime(
368            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
369        )
370        scan.start_date = date_as_datetime.strftime(
371            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
372        )
373        scan.scan_time_s = round(
374            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
375        )
376        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
377        scan.end_date = date_as_datetime.strftime(
378            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
379        )
380
381        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
382        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
383
384        # Get camera and magnifying info
385        scan.camera = (
386            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
387        )["Name"]
388        magnification = metadata_xml.find(
389            ".//Objectives/Objective/NominalMagnification"
390        )
391        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
392        scan.objective = f"{magnification.text}x-{aperture.text}"
393        scan.pixel_size_um = (
394            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
395        )
396        # Round off the pixel size to nanometers; might not be optimal, but this
397        # gets rounded when we send it to the database anyways (to 7 places)
398        scan.pixel_size_um = round(scan.pixel_size_um, 3)
399
400        # Get tile information
401        # Note: X Y is untested, could be flipped. I always forget. Just don't use
402        # non-square frames and we're all good.
403        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
404        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
405
406        scan.tile_width_px = rois_shape[0]["X"][1]
407        scan.tile_height_px = rois_shape[0]["Y"][1]
408        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
409
410        # Extract channels and create Channel objects from them
411        channel_indices = []
412        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
413            channel_indices.append(int(channel.attrib["Id"][-1]))
414            intensity_xml = channel.find(".//Intensity")
415            if intensity_xml is None:
416                intensity = 0
417            else:
418                intensity = float(intensity_xml.text[:-2]) * 1e-2
419            scan.channels.append(
420                cls.Channel(
421                    name=channel.attrib["Name"].upper(),
422                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
423                    intensity=intensity,
424                )
425            )
426        # Make sure the channels are sorted
427        scan.channels = [
428            channel for _, channel in sorted(zip(channel_indices, scan.channels))
429        ]
430        # Verify that the shape corresponds to the channels
431        for roi in rois_shape:
432            if roi["C"][1] != len(scan.channels):
433                raise ValueError(
434                    f"Number of channels {len(scan.channels)} "
435                    f"is not the same as the number of channels in an ROI: "
436                    f"{roi['C'][1]}"
437                )
438
439        # Get the real ROI limits; the metadata is not always correct
440        limits_xml = metadata_xml.findall(".//AllowedScanArea")
441        limits = [
442            round(float(limits_xml[0].find("Center").text.split(",")[0])),
443            round(float(limits_xml[0].find("Center").text.split(",")[1])),
444            round(float(limits_xml[0].find("Size").text.split(",")[0])),
445            round(float(limits_xml[0].find("Size").text.split(",")[1])),
446        ]
447        # Convert to top-left and bottom-right
448        limits = [
449            round(limits[0] - limits[2] / 2),
450            round(limits[1] - limits[3] / 2),
451            round(limits[0] + limits[2] / 2),
452            round(limits[1] + limits[3] / 2),
453        ]
454
455        # Extract ROIs and create ROI objects from them
456        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
457        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
458        if len(rois_xml_metadata) != len(rois_shape):
459            raise ValueError(
460                f"Metadata and binary data from {input_path} "
461                f"do not match in number of ROIs"
462            )
463        # We need both to determine the number of rows/columns because the XML lies
464        roi_indices = []
465        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
466            name = roi_xml.attrib["Name"]
467            # Determine the index of this scene
468            scene_index = -1
469            for scene in scenes_xml_metadata:
470                if scene.attrib["Name"] == name:
471                    scene_index = int(scene.attrib["Index"])
472                    break
473            if scene_index == -1:
474                raise ValueError(f"ROI {name} does not correspond to any scenes")
475            else:
476                roi_indices.append(scene_index)
477            # Extract other metadata
478            roi_limits = [
479                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
480                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
481                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
482                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
483            ]
484            # Convert to top-left and bottom-right
485            roi_limits = [
486                round(roi_limits[0] - roi_limits[2] / 2),
487                round(roi_limits[1] - roi_limits[3] / 2),
488                round(roi_limits[0] + roi_limits[2] / 2),
489                round(roi_limits[1] + roi_limits[3] / 2),
490            ]
491            # Bound the ROI to the actual scan limits
492            roi_limits = [
493                max(roi_limits[0], limits[0]),
494                max(roi_limits[1], limits[1]),
495                min(roi_limits[2], limits[2]),
496                min(roi_limits[3], limits[3]),
497            ]
498
499            tile_rows = int(roi_xml.find("Rows").text)
500            # Current best way of reliably extracting; <Columns> entry can be wrong
501            if (roi_shape["M"][1] % tile_rows) != 0:
502                raise ValueError(
503                    f"The number of tiles {roi_shape['M'][1]} is not "
504                    f"divisible by the tile rows {tile_rows}; metadata "
505                    f"must be messed up. Thanks Zeiss"
506                )
507            else:
508                tile_cols = int(roi_shape["M"][1] / tile_rows)
509            # Support points are actually the relevant focus points for this ROI
510            focus_points = []
511            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
512                focus_points.append(
513                    [
514                        int(float(focus_point.find("X").text)),
515                        int(float(focus_point.find("Y").text)),
516                        int(float(focus_point.find("Z").text)),
517                    ]
518                )
519            # Strip all sub-micron precision, it does not matter
520            scan.roi.append(
521                cls.ROI(
522                    origin_x_um=roi_limits[0],
523                    origin_y_um=roi_limits[1],
524                    width_um=roi_limits[2] - roi_limits[0],
525                    height_um=roi_limits[3] - roi_limits[1],
526                    tile_rows=tile_rows,
527                    tile_cols=tile_cols,
528                    focus_points=focus_points,
529                )
530            )
531        # Sort based on the scene indices
532        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
533
534        return scan
Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
536    @classmethod
537    def load_txt(cls, input_path: str) -> typing.Self:
538        """
539        Loads a Scan object from a .txt file, which originates from the BZScanner.
540        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
541        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
542        :return: a Scan object
543        """
544        # Set paths
545        input_path = os.path.abspath(input_path)
546        if os.path.isdir(input_path):
547            input_path = os.path.join(
548                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
549            )
550
551        # Read in metadata as a dict
552        with open(input_path, "r") as file:
553            metadata_contents = file.read()
554            # Read each line, splitting on the = sign
555            metadata_dict = {}
556            for line in metadata_contents.splitlines():
557                key, value = line.split("=")
558                metadata_dict[key] = value
559
560        # Populate metadata
561        scan = cls()
562
563        scan.slide_id = metadata_dict["SLIDEID"]
564        scan.slide_id = scan.slide_id.strip().upper()
565
566        scan.path = metadata_dict["SLIDEDIR"]
567
568        # Extract start and finish datetimes
569        date = metadata_dict["DATE"]
570        date_as_datetime = datetime.datetime.strptime(
571            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
572        )
573        date_as_datetime = date_as_datetime.astimezone(
574            zoneinfo.ZoneInfo("America/Los_Angeles")
575        )  # Hardcoded because BZScanners are here
576        scan.start_date = date_as_datetime.strftime(
577            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
578        )
579        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
580        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
581        scan.end_date = date_as_datetime.strftime(
582            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
583        )
584
585        # Map the raw scanner ID (service ID) to our IDs
586        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
587        scan.tray = 0  # only one tray in a BZScanner
588        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
589
590        # Get camera and magnifying info
591        scan.camera = ""
592        magnification = 10
593        aperture = 0  # TODO: find the actual aperture
594        scan.objective = f"{magnification}x-{aperture}"
595        scan.pixel_size_um = 0.591  # Estimated from image metadata
596
597        # Get tile information
598        scan.tile_width_px = 1362  # Known from image metadata
599        scan.tile_height_px = 1004  # Known from image metadata
600        scan.tile_overlap_proportion = 0
601
602        # Extract channels and create Channel objects from them
603        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
604            channel_settings = metadata_dict[channel].split(",")
605            if channel_settings[0] == "0":
606                continue
607            scan.channels.append(
608                cls.Channel(
609                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
610                    exposure_ms=float(channel_settings[1]),
611                    intensity=float(channel_settings[2]),
612                )
613            )
614
615        # Get focus points
616        focus_points = []
617        for i in range(33):
618            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
619            if focus_point[0] == "0":
620                break
621            focus_points.append(
622                [
623                    int(float(focus_point[1])),
624                    int(float(focus_point[2])),
625                    int(float(focus_point[3])),
626                ]
627            )
628
629        # In the BZScanner, the slide is vertical instead of horizontal
630        # We put in nominal values for the ROI, which is oriented vertically as well
631        tile_rows = 96
632        tile_cols = 24
633        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
634        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
635        origin_x_um = 2500 + round((20000 - roi_width) / 2)
636        origin_y_um = 2500 + round((58000 - roi_height) / 2)
637        scan.roi.append(
638            cls.ROI(
639                origin_x_um=origin_x_um,
640                origin_y_um=origin_y_um,
641                width_um=roi_width,
642                height_um=roi_height,
643                tile_rows=tile_rows,
644                tile_cols=tile_cols,
645                focus_points=focus_points,
646            )
647        )
648        return scan

Loads a Scan object from a .txt file, which originates from the BZScanner. Some metadata from the slideinfo.txt file is missing or adjusted to fit.

Parameters
  • input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
650    @classmethod
651    def load_from_folder(cls, input_path: str) -> typing.Self:
652        """
653        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
654        Prefers scan.yaml if both exist.
655        :param input_path: /path/to/folder
656        :return: a Scan object
657        """
658        input_path = os.path.abspath(input_path)
659        if os.path.isfile(
660            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
661        ):
662            return cls.load_yaml(input_path)
663        elif os.path.isfile(
664            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
665        ):
666            return cls.load_txt(input_path)
667        else:
668            raise ValueError(
669                f"No scan metadata files "
670                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
671                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
672                f"{input_path}"
673            )
674        pass

Load a Scan object from a folder that contains scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist.

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder(cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0) -> Self:
676    @classmethod
677    def make_placeholder(
678        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
679    ) -> typing.Self:
680        """
681        Make a placeholder Scan object with only basic required information filled in.
682        :param slide_id: the slide ID
683        :param n_tile: the number of this tile, which will become the number of
684                       tiles in the scan
685        :param n_roi: the number of ROIs in the scan
686        :return: a Scan object
687        """
688        # Sanitize inputs here
689        slide_id = str(slide_id).strip().upper()
690        n_tile = int(n_tile)
691        n_roi = int(n_roi)
692        # Generate the object
693        scan = cls()
694        scan.slide_id = slide_id
695        scan.roi = [cls.ROI() for _ in range(n_roi)]
696        scan.roi[0].tile_cols = n_tile
697        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
33    class Type(enum.Enum):
34        BZSCANNER = "bzscanner"
35        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
58    class Channel(yaml.YAMLObject):
59        """
60        Class that comprises a channel; we usually have multiple (2-5) per scan.
61        Contains three fields:
62        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
63        - exposure_ms: the exposure time to capture a frame in milliseconds
64        - intensity: the light intensity used OR the gain applied to the channel
65        """
66
67        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
68
69        def __init__(
70            self,
71            name: str = "",
72            exposure_ms: float = -1.0,
73            intensity: float = -1.0,
74        ):
75            self.name = name
76            self.exposure_ms = exposure_ms
77            self.intensity = intensity
78
79        def __repr__(self):
80            return yaml.dump(self, sort_keys=False)
81
82        def __eq__(self, other):
83            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel(name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0)
69        def __init__(
70            self,
71            name: str = "",
72            exposure_ms: float = -1.0,
73            intensity: float = -1.0,
74        ):
75            self.name = name
76            self.exposure_ms = exposure_ms
77            self.intensity = intensity
yaml_tag = 'csi_utils.csi_scans.Scan.Channel'
name
exposure_ms
intensity
class Scan.ROI(yaml.YAMLObject):
 85    class ROI(yaml.YAMLObject):
 86        """
 87        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 88        """
 89
 90        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 91
 92        def __init__(
 93            self,
 94            origin_x_um: int = -1,
 95            origin_y_um: int = -1,
 96            width_um: int = -1,
 97            height_um: int = -1,
 98            tile_rows: int = -1,
 99            tile_cols: int = -1,
100            focus_points=None,
101        ):
102            if focus_points is None:
103                focus_points = []
104            self.origin_x_um = origin_x_um
105            self.origin_y_um = origin_y_um
106            self.width_um = width_um
107            self.height_um = height_um
108            self.tile_rows = tile_rows
109            self.tile_cols = tile_cols
110            self.focus_points = focus_points
111
112        def __repr__(self):
113            return yaml.dump(self, sort_keys=False)
114
115        def __eq__(self, other):
116            return self.__repr__() == other.__repr__()
117
118        def similar(self, other):
119            return (
120                self.origin_y_um == other.origin_y_um
121                and self.origin_x_um == other.origin_x_um
122                and self.width_um == other.width_um
123                and self.height_um == other.height_um
124                and self.tile_rows == other.tile_rows
125                and self.tile_cols == other.tile_cols
126            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 92        def __init__(
 93            self,
 94            origin_x_um: int = -1,
 95            origin_y_um: int = -1,
 96            width_um: int = -1,
 97            height_um: int = -1,
 98            tile_rows: int = -1,
 99            tile_cols: int = -1,
100            focus_points=None,
101        ):
102            if focus_points is None:
103                focus_points = []
104            self.origin_x_um = origin_x_um
105            self.origin_y_um = origin_y_um
106            self.width_um = width_um
107            self.height_um = height_um
108            self.tile_rows = tile_rows
109            self.tile_cols = tile_cols
110            self.focus_points = focus_points
yaml_tag = 'csi_utils.csi_scans.Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
118        def similar(self, other):
119            return (
120                self.origin_y_um == other.origin_y_um
121                and self.origin_x_um == other.origin_x_um
122                and self.width_um == other.width_um
123                and self.height_um == other.height_um
124                and self.tile_rows == other.tile_rows
125                and self.tile_cols == other.tile_cols
126            )