csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import enum
  9import datetime
 10import zoneinfo
 11from typing import Self, Iterable, Any
 12
 13import yaml
 14import json
 15
 16try:
 17    import aicspylibczi
 18except ImportError:
 19    aicspylibczi = None
 20
 21
 22class Scan(yaml.YAMLObject):
 23    """
 24    Class that composes a whole scan's metadata. Contains some universal data,
 25    plus lists for channels and ROIs.
 26
 27    .. include:: ../docs/coordinate_systems.md
 28    """
 29
 30    yaml_tag = "csi_utils.scans.Scan"
 31
 32    class Type(enum.Enum):
 33        BZSCANNER = "bzscanner"
 34        AXIOSCAN7 = "axioscan7"
 35
 36    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 37    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 38
 39    METADATA_FILE_NAME = {
 40        Type.AXIOSCAN7: "scan.yaml",
 41        Type.BZSCANNER: "slideinfo.txt",
 42    }
 43    DATETIME_FORMAT = {
 44        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 45        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 46    }
 47
 48    # Actual channel names, from the BZScanner's default order
 49    BZSCANNER_CHANNEL_MAP = {
 50        "DAPI": "DAPI",
 51        "TRITC": "AF555",
 52        "CY5": "AF647",
 53        "BF": "BRIGHT",
 54        "FITC": "AF488",
 55    }
 56
 57    class Channel(yaml.YAMLObject):
 58        """
 59        Class that comprises a channel; we usually have multiple (2-5) per scan.
 60        Contains three fields:
 61        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 62        - exposure_ms: the exposure time to capture a frame in milliseconds
 63        - intensity: the light intensity used OR the gain applied to the channel
 64        """
 65
 66        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 67
 68        def __init__(
 69            self,
 70            name: str = "",
 71            exposure_ms: float = -1.0,
 72            intensity: float = -1.0,
 73            gain_applied: bool = False,
 74        ):
 75            self.name = name
 76            self.exposure_ms = exposure_ms
 77            self.intensity = intensity
 78            self.gain_applied = gain_applied
 79
 80        def __repr__(self):
 81            return yaml.dump(self, sort_keys=False)
 82
 83        def __eq__(self, other):
 84            return self.__repr__() == other.__repr__()
 85
 86    class ROI(yaml.YAMLObject):
 87        """
 88        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 89        """
 90
 91        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 92
 93        def __init__(
 94            self,
 95            origin_x_um: int = -1,
 96            origin_y_um: int = -1,
 97            width_um: int = -1,
 98            height_um: int = -1,
 99            tile_rows: int = -1,
100            tile_cols: int = -1,
101            focus_points=None,
102        ):
103            if focus_points is None:
104                focus_points = []
105            self.origin_x_um = origin_x_um
106            self.origin_y_um = origin_y_um
107            self.width_um = width_um
108            self.height_um = height_um
109            self.tile_rows = tile_rows
110            self.tile_cols = tile_cols
111            self.focus_points = focus_points
112
113        def __repr__(self):
114            return yaml.dump(self, sort_keys=False)
115
116        def __eq__(self, other):
117            return self.__repr__() == other.__repr__()
118
119        def similar(self, other):
120            return (
121                self.origin_y_um == other.origin_y_um
122                and self.origin_x_um == other.origin_x_um
123                and self.width_um == other.width_um
124                and self.height_um == other.height_um
125                and self.tile_rows == other.tile_rows
126                and self.tile_cols == other.tile_cols
127            )
128
129    def __init__(
130        self,
131        slide_id: str = "",
132        exists: bool = True,
133        path: str = "",
134        start_date: str = "",
135        end_date: str = "",
136        scan_time_s: int = -1,
137        scanner_id: str = "",
138        tray_pos: int = -1,
139        slide_pos: int = -1,
140        camera: str = "",
141        objective: str = "",
142        pixel_size_um: float = -1.0,
143        tile_width_px: int = -1,
144        tile_height_px: int = -1,
145        tile_overlap_proportion: int = -1,
146        channels: list[Channel] = None,
147        roi: list[ROI] = None,
148    ):
149        if roi is None:
150            roi = []
151        if channels is None:
152            channels = []
153        self.slide_id = slide_id
154        self.exists = exists
155        self.path = path
156        self.start_date = start_date
157        self.end_date = end_date
158        self.scan_time_s = scan_time_s
159        self.scanner_id = scanner_id
160        self.tray_pos = tray_pos
161        self.slide_pos = slide_pos
162        self.camera = camera
163        self.objective = objective
164        self.pixel_size_um = pixel_size_um
165        self.tile_width_px = tile_width_px
166        self.tile_height_px = tile_height_px
167        self.tile_overlap_proportion = tile_overlap_proportion
168        self.channels = channels
169        self.roi = roi
170
171    def __repr__(self):
172        return yaml.dump(self, sort_keys=False)
173
174    def __eq__(self, other):
175        return self.__repr__() == other.__repr__()
176
177    def has_same_profile(self, other):
178        return (
179            self.camera == other.camera
180            and self.objective == other.objective
181            and self.pixel_size_um == other.pixel_size_um
182            and self.tile_width_px == other.tile_width_px
183            and self.tile_height_px == other.tile_height_px
184            and self.tile_overlap_proportion == other.tile_overlap_proportion
185            and self.channels == other.channels
186            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
187        )
188
189    def get_channel_names(self) -> list[str]:
190        """
191        Get the channel names in the scan's channel order.
192        :return: a list of channel names.
193        """
194        return [channel.name for channel in self.channels]
195
196    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
197        """
198        Given a list of channel names, return the corresponding indices in the scan's
199        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
200        actual AlexaFluor names (AF555, AF647, AF488).
201        If a list entry is None, it will return -1 for that entry.
202        :param channel_names: a list of channel names.
203        :return: a list of channel indices.
204        """
205        # Get the scan's channel name list
206        scan_channel_names = self.get_channel_names()
207
208        channel_indices = []
209        for name in channel_names:
210            # Convert any BZScanner channel names to the actual channel names
211            if name in self.BZSCANNER_CHANNEL_MAP:
212                name = self.BZSCANNER_CHANNEL_MAP[name]
213
214            # Append the corresponding index if possible
215            if name is None:
216                channel_indices.append(-1)
217            elif name in scan_channel_names:
218                channel_indices.append(scan_channel_names.index(name))
219            else:
220                raise ValueError(
221                    f"Channel name {name} not found in scan channels {scan_channel_names}"
222                )
223        return channel_indices
224
225    def save_yaml(self, output_path: str):
226        """
227        Write the Scan object to a .yaml file.
228        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
229        :return: nothing; will raise an error on failure
230        """
231        # Create necessary folders
232        output_path = os.path.abspath(output_path)
233        if os.path.splitext(output_path)[1] == ".yaml":
234            os.makedirs(os.path.dirname(output_path), exist_ok=True)
235        else:
236            os.makedirs(output_path, exist_ok=True)
237            # Add the standard metadata file name to the path if needed
238            output_path = os.path.join(
239                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
240            )
241
242        # Populate the file
243        with open(output_path, "w") as file:
244            yaml.dump(self, stream=file, sort_keys=False)
245
246    @classmethod
247    def load_yaml(cls, input_path: str) -> Self:
248        """
249        Load a Scan object from a .yaml file.
250        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
251        :return: a Scan object
252        """
253        input_path = os.path.abspath(input_path)
254        if os.path.isdir(input_path):
255            input_path = os.path.join(
256                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
257            )
258        with open(input_path, "r") as file:
259            metadata_obj = yaml.load(file, Loader=yaml.Loader)
260        return metadata_obj
261
262    def to_dict(self) -> dict:
263        # Dump to json; then add indents and a top-level key
264        channels_json = json.dumps(
265            self.channels, default=lambda x: x.__dict__, indent=2
266        )
267        channels_json = "  ".join(channels_json.splitlines(True))
268        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
269
270        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
271        roi_json = "  ".join(roi_json.splitlines(True))
272        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
273
274        return {
275            "slide_id": self.slide_id,
276            "exists": self.exists,
277            "path": self.path,
278            "start_date": self.start_date,
279            "end_date": self.end_date,
280            "scan_time_s": self.scan_time_s,
281            "scanner_id": self.scanner_id,
282            "tray_pos": self.tray_pos,
283            "slide_pos": self.slide_pos,
284            "camera": self.camera,
285            "objective": self.objective,
286            "pixel_size_um": self.pixel_size_um,
287            "tile_width_px": self.tile_width_px,
288            "tile_height_px": self.tile_height_px,
289            "tile_overlap_proportion": self.tile_overlap_proportion,
290            "channels": channels_json,
291            "roi": roi_json,
292        }
293
294    @classmethod
295    def from_dict(cls, scan_dict) -> Self:
296        local_timezone = zoneinfo.ZoneInfo("localtime")
297        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
298        result = cls(
299            slide_id=scan_dict["slide_id"],
300            exists=scan_dict["exists"],
301            path=scan_dict["path"],
302            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
303            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
304            scan_time_s=int(dt),
305            scanner_id=scan_dict["scanner_id"],
306            tray_pos=scan_dict["tray_pos"],
307            slide_pos=scan_dict["slide_pos"],
308            camera=scan_dict["camera"],
309            objective=scan_dict["objective"],
310            pixel_size_um=scan_dict["pixel_size"],
311            tile_width_px=scan_dict["tile_width"],
312            tile_height_px=scan_dict["tile_height"],
313            tile_overlap_proportion=scan_dict["tile_overlap"],
314        )
315        for channel_json in scan_dict["channels"]["data"]:
316            result.channels.append(
317                cls.Channel(
318                    name=channel_json["name"],
319                    exposure_ms=channel_json["exposure_ms"],
320                    intensity=channel_json["intensity"],
321                    gain_applied=channel_json["gain_applied"],
322                )
323            )
324        for roi_json in scan_dict["roi"]["data"]:
325            result.roi.append(
326                cls.ROI(
327                    origin_x_um=roi_json["origin_x_um"],
328                    origin_y_um=roi_json["origin_y_um"],
329                    width_um=roi_json["width_um"],
330                    height_um=roi_json["height_um"],
331                    tile_rows=roi_json["tile_rows"],
332                    tile_cols=roi_json["tile_cols"],
333                    focus_points=roi_json["focus_points"],
334                )
335            )
336        return result
337
338    @classmethod
339    def load_czi(cls, input_path: str) -> Self:
340        """
341        :param input_path: the path to the .czi file
342        :return: a Scan object
343        """
344        if aicspylibczi is None:
345            raise ModuleNotFoundError(
346                "aicspylibczi library not installed. "
347                "Install csi-images with [imageio] option to resolve."
348            )
349
350        # Normalize paths
351        input_path = os.path.abspath(input_path)
352
353        # Read in metadata as XML elements
354        metadata_xml = aicspylibczi.CziFile(input_path).meta
355        # Read in shape metadata from binary
356        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
357
358        # Populate metadata
359        scan = cls()
360
361        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
362        if scan.slide_id is not None:
363            scan.slide_id = scan.slide_id.strip().upper()
364        # Map the raw scanner ID (service ID) to our IDs
365        scan.scanner_id = cls.SCANNER_IDS[
366            metadata_xml.find(".//Microscope/UserDefinedName").text
367        ]
368
369        # Extract start and finish datetimes
370        date = metadata_xml.find(".//Document/CreationDate").text
371        # Strip out sub-second precision
372        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
373        date_as_datetime = datetime.datetime.strptime(
374            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
375        )
376        scan.start_date = date_as_datetime.strftime(
377            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
378        )
379        scan.scan_time_s = round(
380            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
381        )
382        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
383        scan.end_date = date_as_datetime.strftime(
384            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
385        )
386
387        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
388        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
389
390        # Get camera and magnifying info
391        scan.camera = (
392            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
393        )["Name"]
394        magnification = metadata_xml.find(
395            ".//Objectives/Objective/NominalMagnification"
396        )
397        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
398        scan.objective = f"{magnification.text}x-{aperture.text}"
399        scan.pixel_size_um = (
400            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
401        )
402        # Round off the pixel size to nanometers; might not be optimal, but this
403        # gets rounded when we send it to the database anyways (to 7 places)
404        scan.pixel_size_um = round(scan.pixel_size_um, 3)
405
406        # Get tile information
407        # Note: X Y is untested, could be flipped. I always forget. Just don't use
408        # non-square frames and we're all good.
409        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
410        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
411
412        scan.tile_width_px = rois_shape[0]["X"][1]
413        scan.tile_height_px = rois_shape[0]["Y"][1]
414        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
415
416        # Extract channels and create Channel objects from them
417        channel_indices = []
418        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
419            channel_indices.append(int(channel.attrib["Id"][-1]))
420            intensity_xml = channel.find(".//Intensity")
421            if intensity_xml is None:
422                intensity = 0
423            else:
424                intensity = float(intensity_xml.text[:-2]) * 1e-2
425            scan.channels.append(
426                cls.Channel(
427                    name=channel.attrib["Name"].upper(),
428                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
429                    intensity=intensity,
430                    gain_applied=True,  # In Axioscan, we will always use gain = 1
431                )
432            )
433        # Make sure the channels are sorted
434        scan.channels = [
435            channel for _, channel in sorted(zip(channel_indices, scan.channels))
436        ]
437        # Verify that the shape corresponds to the channels
438        for roi in rois_shape:
439            if roi["C"][1] != len(scan.channels):
440                raise ValueError(
441                    f"Number of channels {len(scan.channels)} "
442                    f"is not the same as the number of channels in an ROI: "
443                    f"{roi['C'][1]}"
444                )
445
446        # Get the real ROI limits; the metadata is not always correct
447        limits_xml = metadata_xml.findall(".//AllowedScanArea")
448        limits = [
449            round(float(limits_xml[0].find("Center").text.split(",")[0])),
450            round(float(limits_xml[0].find("Center").text.split(",")[1])),
451            round(float(limits_xml[0].find("Size").text.split(",")[0])),
452            round(float(limits_xml[0].find("Size").text.split(",")[1])),
453        ]
454        # Convert to top-left and bottom-right
455        limits = [
456            round(limits[0] - limits[2] / 2),
457            round(limits[1] - limits[3] / 2),
458            round(limits[0] + limits[2] / 2),
459            round(limits[1] + limits[3] / 2),
460        ]
461
462        # Extract ROIs and create ROI objects from them
463        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
464        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
465        if len(rois_xml_metadata) != len(rois_shape):
466            raise ValueError(
467                f"Metadata and binary data from {input_path} "
468                f"do not match in number of ROIs"
469            )
470        # We need both to determine the number of rows/columns because the XML lies
471        roi_indices = []
472        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
473            name = roi_xml.attrib["Name"]
474            # Determine the index of this scene
475            scene_index = -1
476            for scene in scenes_xml_metadata:
477                if scene.attrib["Name"] == name:
478                    scene_index = int(scene.attrib["Index"])
479                    break
480            if scene_index == -1:
481                raise ValueError(f"ROI {name} does not correspond to any scenes")
482            else:
483                roi_indices.append(scene_index)
484            # Extract other metadata
485            roi_limits = [
486                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
487                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
488                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
489                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
490            ]
491            # Convert to top-left and bottom-right
492            roi_limits = [
493                round(roi_limits[0] - roi_limits[2] / 2),
494                round(roi_limits[1] - roi_limits[3] / 2),
495                round(roi_limits[0] + roi_limits[2] / 2),
496                round(roi_limits[1] + roi_limits[3] / 2),
497            ]
498            # Bound the ROI to the actual scan limits
499            roi_limits = [
500                max(roi_limits[0], limits[0]),
501                max(roi_limits[1], limits[1]),
502                min(roi_limits[2], limits[2]),
503                min(roi_limits[3], limits[3]),
504            ]
505
506            tile_rows = int(roi_xml.find("Rows").text)
507            # Current best way of reliably extracting; <Columns> entry can be wrong
508            if (roi_shape["M"][1] % tile_rows) != 0:
509                raise ValueError(
510                    f"The number of tiles {roi_shape['M'][1]} is not "
511                    f"divisible by the tile rows {tile_rows}; metadata "
512                    f"must be messed up. Thanks Zeiss"
513                )
514            else:
515                tile_cols = int(roi_shape["M"][1] / tile_rows)
516            # Support points are actually the relevant focus points for this ROI
517            focus_points = []
518            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
519                focus_points.append(
520                    [
521                        int(float(focus_point.find("X").text)),
522                        int(float(focus_point.find("Y").text)),
523                        int(float(focus_point.find("Z").text)),
524                    ]
525                )
526            # Strip all sub-micron precision, it does not matter
527            scan.roi.append(
528                cls.ROI(
529                    origin_x_um=roi_limits[0],
530                    origin_y_um=roi_limits[1],
531                    width_um=roi_limits[2] - roi_limits[0],
532                    height_um=roi_limits[3] - roi_limits[1],
533                    tile_rows=tile_rows,
534                    tile_cols=tile_cols,
535                    focus_points=focus_points,
536                )
537            )
538        # Sort based on the scene indices
539        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
540
541        return scan
542
543    @classmethod
544    def load_txt(cls, input_path: str) -> Self:
545        """
546        Loads a Scan object from a .txt file, which originates from the BZScanner.
547        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
548        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
549        :return: a Scan object
550        """
551        # Set paths
552        input_path = os.path.abspath(input_path)
553        if os.path.isdir(input_path):
554            input_path = os.path.join(
555                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
556            )
557
558        # Read in metadata as a dict
559        with open(input_path, "r") as file:
560            metadata_contents = file.read()
561            # Read each line, splitting on the = sign
562            metadata_dict = {}
563            for line in metadata_contents.splitlines():
564                key, value = line.split("=")
565                metadata_dict[key] = value
566
567        # Populate metadata
568        scan = cls()
569
570        scan.slide_id = metadata_dict["SLIDEID"]
571        scan.slide_id = scan.slide_id.strip().upper()
572
573        scan.path = metadata_dict["SLIDEDIR"]
574
575        # Extract start and finish datetimes
576        date = metadata_dict["DATE"]
577        date_as_datetime = datetime.datetime.strptime(
578            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
579        )
580        date_as_datetime = date_as_datetime.astimezone(
581            zoneinfo.ZoneInfo("America/Los_Angeles")
582        )  # Hardcoded because BZScanners are here
583        scan.start_date = date_as_datetime.strftime(
584            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
585        )
586        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
587        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
588        scan.end_date = date_as_datetime.strftime(
589            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
590        )
591
592        # Map the raw scanner ID (service ID) to our IDs
593        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
594        scan.tray_pos = 0  # only one tray_pos in a BZScanner
595        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
596
597        # Get camera and magnifying info
598        scan.camera = ""
599        magnification = 10
600        aperture = 0  # TODO: find the actual aperture
601        scan.objective = f"{magnification}x-{aperture}"
602        scan.pixel_size_um = 0.591  # Estimated from image metadata
603
604        # Get tile information
605        scan.tile_width_px = 1362  # Known from image metadata
606        scan.tile_height_px = 1004  # Known from image metadata
607        scan.tile_overlap_proportion = 0
608
609        # Extract channels and create Channel objects from them
610        if "gain_applied" in metadata_dict:
611            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
612        else:
613            gain_applied = True  # Previous policy was always to apply gains
614        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
615            channel_settings = metadata_dict[channel].split(",")
616            if channel_settings[0] == "0":
617                continue
618            scan.channels.append(
619                cls.Channel(
620                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
621                    exposure_ms=float(channel_settings[1]),
622                    intensity=float(channel_settings[2]),
623                    gain_applied=gain_applied,
624                )
625            )
626
627        # Get focus points
628        focus_points = []
629        for i in range(33):
630            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
631            if focus_point[0] == "0":
632                break
633            focus_points.append(
634                [
635                    int(float(focus_point[1])),
636                    int(float(focus_point[2])),
637                    int(float(focus_point[3])),
638                ]
639            )
640
641        # In the BZScanner, the slide is vertical instead of horizontal
642        # We put in nominal values for the ROI, which is oriented vertically as well
643        tile_rows = 96
644        tile_cols = 24
645        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
646        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
647        origin_x_um = 2500 + round((20000 - roi_width) / 2)
648        origin_y_um = 2500 + round((58000 - roi_height) / 2)
649        scan.roi.append(
650            cls.ROI(
651                origin_x_um=origin_x_um,
652                origin_y_um=origin_y_um,
653                width_um=roi_width,
654                height_um=roi_height,
655                tile_rows=tile_rows,
656                tile_cols=tile_cols,
657                focus_points=focus_points,
658            )
659        )
660        return scan
661
662    @classmethod
663    def load_from_folder(cls, input_path: str) -> Self:
664        """
665        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
666        Prefers scan.yaml if both exist.
667        :param input_path: /path/to/folder
668        :return: a Scan object
669        """
670        input_path = os.path.abspath(input_path)
671        if os.path.isfile(
672            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
673        ):
674            return cls.load_yaml(input_path)
675        elif os.path.isfile(
676            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
677        ):
678            return cls.load_txt(input_path)
679        else:
680            raise ValueError(
681                f"No scan metadata files "
682                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
683                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
684                f"{input_path}"
685            )
686        pass
687
688    @classmethod
689    def make_placeholder(
690        cls,
691        slide_id: str,
692        n_tile: int = 2303,
693        n_roi: int = 0,
694        scanner_type: Type = Type.BZSCANNER,
695    ) -> Self:
696        """
697        Make a placeholder Scan object with only basic required information filled in.
698        :param slide_id: the slide ID
699        :param n_tile: the number of this tile, which will become the number of
700                       tiles in the scan
701        :param n_roi: the number of ROIs in the scan
702        :return: a Scan object
703        """
704        # Sanitize inputs here
705        slide_id = str(slide_id).strip().upper()
706        n_tile = int(n_tile)
707        n_roi = int(n_roi)
708        # Generate the object
709        scan = cls()
710        scan.slide_id = slide_id
711        if scanner_type == cls.Type.AXIOSCAN7:
712            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
713        elif scanner_type == cls.Type.BZSCANNER:
714            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
715        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
716        scan.roi[0].tile_rows = 1
717        scan.roi[0].tile_cols = n_tile + 1
718        return scan
class Scan(yaml.YAMLObject):
 23class Scan(yaml.YAMLObject):
 24    """
 25    Class that composes a whole scan's metadata. Contains some universal data,
 26    plus lists for channels and ROIs.
 27
 28    .. include:: ../docs/coordinate_systems.md
 29    """
 30
 31    yaml_tag = "csi_utils.scans.Scan"
 32
 33    class Type(enum.Enum):
 34        BZSCANNER = "bzscanner"
 35        AXIOSCAN7 = "axioscan7"
 36
 37    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 38    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 39
 40    METADATA_FILE_NAME = {
 41        Type.AXIOSCAN7: "scan.yaml",
 42        Type.BZSCANNER: "slideinfo.txt",
 43    }
 44    DATETIME_FORMAT = {
 45        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 46        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 47    }
 48
 49    # Actual channel names, from the BZScanner's default order
 50    BZSCANNER_CHANNEL_MAP = {
 51        "DAPI": "DAPI",
 52        "TRITC": "AF555",
 53        "CY5": "AF647",
 54        "BF": "BRIGHT",
 55        "FITC": "AF488",
 56    }
 57
 58    class Channel(yaml.YAMLObject):
 59        """
 60        Class that comprises a channel; we usually have multiple (2-5) per scan.
 61        Contains three fields:
 62        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 63        - exposure_ms: the exposure time to capture a frame in milliseconds
 64        - intensity: the light intensity used OR the gain applied to the channel
 65        """
 66
 67        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 68
 69        def __init__(
 70            self,
 71            name: str = "",
 72            exposure_ms: float = -1.0,
 73            intensity: float = -1.0,
 74            gain_applied: bool = False,
 75        ):
 76            self.name = name
 77            self.exposure_ms = exposure_ms
 78            self.intensity = intensity
 79            self.gain_applied = gain_applied
 80
 81        def __repr__(self):
 82            return yaml.dump(self, sort_keys=False)
 83
 84        def __eq__(self, other):
 85            return self.__repr__() == other.__repr__()
 86
 87    class ROI(yaml.YAMLObject):
 88        """
 89        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 90        """
 91
 92        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 93
 94        def __init__(
 95            self,
 96            origin_x_um: int = -1,
 97            origin_y_um: int = -1,
 98            width_um: int = -1,
 99            height_um: int = -1,
100            tile_rows: int = -1,
101            tile_cols: int = -1,
102            focus_points=None,
103        ):
104            if focus_points is None:
105                focus_points = []
106            self.origin_x_um = origin_x_um
107            self.origin_y_um = origin_y_um
108            self.width_um = width_um
109            self.height_um = height_um
110            self.tile_rows = tile_rows
111            self.tile_cols = tile_cols
112            self.focus_points = focus_points
113
114        def __repr__(self):
115            return yaml.dump(self, sort_keys=False)
116
117        def __eq__(self, other):
118            return self.__repr__() == other.__repr__()
119
120        def similar(self, other):
121            return (
122                self.origin_y_um == other.origin_y_um
123                and self.origin_x_um == other.origin_x_um
124                and self.width_um == other.width_um
125                and self.height_um == other.height_um
126                and self.tile_rows == other.tile_rows
127                and self.tile_cols == other.tile_cols
128            )
129
130    def __init__(
131        self,
132        slide_id: str = "",
133        exists: bool = True,
134        path: str = "",
135        start_date: str = "",
136        end_date: str = "",
137        scan_time_s: int = -1,
138        scanner_id: str = "",
139        tray_pos: int = -1,
140        slide_pos: int = -1,
141        camera: str = "",
142        objective: str = "",
143        pixel_size_um: float = -1.0,
144        tile_width_px: int = -1,
145        tile_height_px: int = -1,
146        tile_overlap_proportion: int = -1,
147        channels: list[Channel] = None,
148        roi: list[ROI] = None,
149    ):
150        if roi is None:
151            roi = []
152        if channels is None:
153            channels = []
154        self.slide_id = slide_id
155        self.exists = exists
156        self.path = path
157        self.start_date = start_date
158        self.end_date = end_date
159        self.scan_time_s = scan_time_s
160        self.scanner_id = scanner_id
161        self.tray_pos = tray_pos
162        self.slide_pos = slide_pos
163        self.camera = camera
164        self.objective = objective
165        self.pixel_size_um = pixel_size_um
166        self.tile_width_px = tile_width_px
167        self.tile_height_px = tile_height_px
168        self.tile_overlap_proportion = tile_overlap_proportion
169        self.channels = channels
170        self.roi = roi
171
172    def __repr__(self):
173        return yaml.dump(self, sort_keys=False)
174
175    def __eq__(self, other):
176        return self.__repr__() == other.__repr__()
177
178    def has_same_profile(self, other):
179        return (
180            self.camera == other.camera
181            and self.objective == other.objective
182            and self.pixel_size_um == other.pixel_size_um
183            and self.tile_width_px == other.tile_width_px
184            and self.tile_height_px == other.tile_height_px
185            and self.tile_overlap_proportion == other.tile_overlap_proportion
186            and self.channels == other.channels
187            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
188        )
189
190    def get_channel_names(self) -> list[str]:
191        """
192        Get the channel names in the scan's channel order.
193        :return: a list of channel names.
194        """
195        return [channel.name for channel in self.channels]
196
197    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
198        """
199        Given a list of channel names, return the corresponding indices in the scan's
200        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
201        actual AlexaFluor names (AF555, AF647, AF488).
202        If a list entry is None, it will return -1 for that entry.
203        :param channel_names: a list of channel names.
204        :return: a list of channel indices.
205        """
206        # Get the scan's channel name list
207        scan_channel_names = self.get_channel_names()
208
209        channel_indices = []
210        for name in channel_names:
211            # Convert any BZScanner channel names to the actual channel names
212            if name in self.BZSCANNER_CHANNEL_MAP:
213                name = self.BZSCANNER_CHANNEL_MAP[name]
214
215            # Append the corresponding index if possible
216            if name is None:
217                channel_indices.append(-1)
218            elif name in scan_channel_names:
219                channel_indices.append(scan_channel_names.index(name))
220            else:
221                raise ValueError(
222                    f"Channel name {name} not found in scan channels {scan_channel_names}"
223                )
224        return channel_indices
225
226    def save_yaml(self, output_path: str):
227        """
228        Write the Scan object to a .yaml file.
229        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
230        :return: nothing; will raise an error on failure
231        """
232        # Create necessary folders
233        output_path = os.path.abspath(output_path)
234        if os.path.splitext(output_path)[1] == ".yaml":
235            os.makedirs(os.path.dirname(output_path), exist_ok=True)
236        else:
237            os.makedirs(output_path, exist_ok=True)
238            # Add the standard metadata file name to the path if needed
239            output_path = os.path.join(
240                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
241            )
242
243        # Populate the file
244        with open(output_path, "w") as file:
245            yaml.dump(self, stream=file, sort_keys=False)
246
247    @classmethod
248    def load_yaml(cls, input_path: str) -> Self:
249        """
250        Load a Scan object from a .yaml file.
251        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
252        :return: a Scan object
253        """
254        input_path = os.path.abspath(input_path)
255        if os.path.isdir(input_path):
256            input_path = os.path.join(
257                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
258            )
259        with open(input_path, "r") as file:
260            metadata_obj = yaml.load(file, Loader=yaml.Loader)
261        return metadata_obj
262
263    def to_dict(self) -> dict:
264        # Dump to json; then add indents and a top-level key
265        channels_json = json.dumps(
266            self.channels, default=lambda x: x.__dict__, indent=2
267        )
268        channels_json = "  ".join(channels_json.splitlines(True))
269        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
270
271        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
272        roi_json = "  ".join(roi_json.splitlines(True))
273        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
274
275        return {
276            "slide_id": self.slide_id,
277            "exists": self.exists,
278            "path": self.path,
279            "start_date": self.start_date,
280            "end_date": self.end_date,
281            "scan_time_s": self.scan_time_s,
282            "scanner_id": self.scanner_id,
283            "tray_pos": self.tray_pos,
284            "slide_pos": self.slide_pos,
285            "camera": self.camera,
286            "objective": self.objective,
287            "pixel_size_um": self.pixel_size_um,
288            "tile_width_px": self.tile_width_px,
289            "tile_height_px": self.tile_height_px,
290            "tile_overlap_proportion": self.tile_overlap_proportion,
291            "channels": channels_json,
292            "roi": roi_json,
293        }
294
295    @classmethod
296    def from_dict(cls, scan_dict) -> Self:
297        local_timezone = zoneinfo.ZoneInfo("localtime")
298        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
299        result = cls(
300            slide_id=scan_dict["slide_id"],
301            exists=scan_dict["exists"],
302            path=scan_dict["path"],
303            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
304            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
305            scan_time_s=int(dt),
306            scanner_id=scan_dict["scanner_id"],
307            tray_pos=scan_dict["tray_pos"],
308            slide_pos=scan_dict["slide_pos"],
309            camera=scan_dict["camera"],
310            objective=scan_dict["objective"],
311            pixel_size_um=scan_dict["pixel_size"],
312            tile_width_px=scan_dict["tile_width"],
313            tile_height_px=scan_dict["tile_height"],
314            tile_overlap_proportion=scan_dict["tile_overlap"],
315        )
316        for channel_json in scan_dict["channels"]["data"]:
317            result.channels.append(
318                cls.Channel(
319                    name=channel_json["name"],
320                    exposure_ms=channel_json["exposure_ms"],
321                    intensity=channel_json["intensity"],
322                    gain_applied=channel_json["gain_applied"],
323                )
324            )
325        for roi_json in scan_dict["roi"]["data"]:
326            result.roi.append(
327                cls.ROI(
328                    origin_x_um=roi_json["origin_x_um"],
329                    origin_y_um=roi_json["origin_y_um"],
330                    width_um=roi_json["width_um"],
331                    height_um=roi_json["height_um"],
332                    tile_rows=roi_json["tile_rows"],
333                    tile_cols=roi_json["tile_cols"],
334                    focus_points=roi_json["focus_points"],
335                )
336            )
337        return result
338
339    @classmethod
340    def load_czi(cls, input_path: str) -> Self:
341        """
342        :param input_path: the path to the .czi file
343        :return: a Scan object
344        """
345        if aicspylibczi is None:
346            raise ModuleNotFoundError(
347                "aicspylibczi library not installed. "
348                "Install csi-images with [imageio] option to resolve."
349            )
350
351        # Normalize paths
352        input_path = os.path.abspath(input_path)
353
354        # Read in metadata as XML elements
355        metadata_xml = aicspylibczi.CziFile(input_path).meta
356        # Read in shape metadata from binary
357        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
358
359        # Populate metadata
360        scan = cls()
361
362        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
363        if scan.slide_id is not None:
364            scan.slide_id = scan.slide_id.strip().upper()
365        # Map the raw scanner ID (service ID) to our IDs
366        scan.scanner_id = cls.SCANNER_IDS[
367            metadata_xml.find(".//Microscope/UserDefinedName").text
368        ]
369
370        # Extract start and finish datetimes
371        date = metadata_xml.find(".//Document/CreationDate").text
372        # Strip out sub-second precision
373        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
374        date_as_datetime = datetime.datetime.strptime(
375            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
376        )
377        scan.start_date = date_as_datetime.strftime(
378            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
379        )
380        scan.scan_time_s = round(
381            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
382        )
383        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
384        scan.end_date = date_as_datetime.strftime(
385            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
386        )
387
388        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
389        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
390
391        # Get camera and magnifying info
392        scan.camera = (
393            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
394        )["Name"]
395        magnification = metadata_xml.find(
396            ".//Objectives/Objective/NominalMagnification"
397        )
398        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
399        scan.objective = f"{magnification.text}x-{aperture.text}"
400        scan.pixel_size_um = (
401            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
402        )
403        # Round off the pixel size to nanometers; might not be optimal, but this
404        # gets rounded when we send it to the database anyways (to 7 places)
405        scan.pixel_size_um = round(scan.pixel_size_um, 3)
406
407        # Get tile information
408        # Note: X Y is untested, could be flipped. I always forget. Just don't use
409        # non-square frames and we're all good.
410        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
411        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
412
413        scan.tile_width_px = rois_shape[0]["X"][1]
414        scan.tile_height_px = rois_shape[0]["Y"][1]
415        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
416
417        # Extract channels and create Channel objects from them
418        channel_indices = []
419        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
420            channel_indices.append(int(channel.attrib["Id"][-1]))
421            intensity_xml = channel.find(".//Intensity")
422            if intensity_xml is None:
423                intensity = 0
424            else:
425                intensity = float(intensity_xml.text[:-2]) * 1e-2
426            scan.channels.append(
427                cls.Channel(
428                    name=channel.attrib["Name"].upper(),
429                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
430                    intensity=intensity,
431                    gain_applied=True,  # In Axioscan, we will always use gain = 1
432                )
433            )
434        # Make sure the channels are sorted
435        scan.channels = [
436            channel for _, channel in sorted(zip(channel_indices, scan.channels))
437        ]
438        # Verify that the shape corresponds to the channels
439        for roi in rois_shape:
440            if roi["C"][1] != len(scan.channels):
441                raise ValueError(
442                    f"Number of channels {len(scan.channels)} "
443                    f"is not the same as the number of channels in an ROI: "
444                    f"{roi['C'][1]}"
445                )
446
447        # Get the real ROI limits; the metadata is not always correct
448        limits_xml = metadata_xml.findall(".//AllowedScanArea")
449        limits = [
450            round(float(limits_xml[0].find("Center").text.split(",")[0])),
451            round(float(limits_xml[0].find("Center").text.split(",")[1])),
452            round(float(limits_xml[0].find("Size").text.split(",")[0])),
453            round(float(limits_xml[0].find("Size").text.split(",")[1])),
454        ]
455        # Convert to top-left and bottom-right
456        limits = [
457            round(limits[0] - limits[2] / 2),
458            round(limits[1] - limits[3] / 2),
459            round(limits[0] + limits[2] / 2),
460            round(limits[1] + limits[3] / 2),
461        ]
462
463        # Extract ROIs and create ROI objects from them
464        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
465        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
466        if len(rois_xml_metadata) != len(rois_shape):
467            raise ValueError(
468                f"Metadata and binary data from {input_path} "
469                f"do not match in number of ROIs"
470            )
471        # We need both to determine the number of rows/columns because the XML lies
472        roi_indices = []
473        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
474            name = roi_xml.attrib["Name"]
475            # Determine the index of this scene
476            scene_index = -1
477            for scene in scenes_xml_metadata:
478                if scene.attrib["Name"] == name:
479                    scene_index = int(scene.attrib["Index"])
480                    break
481            if scene_index == -1:
482                raise ValueError(f"ROI {name} does not correspond to any scenes")
483            else:
484                roi_indices.append(scene_index)
485            # Extract other metadata
486            roi_limits = [
487                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
488                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
489                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
490                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
491            ]
492            # Convert to top-left and bottom-right
493            roi_limits = [
494                round(roi_limits[0] - roi_limits[2] / 2),
495                round(roi_limits[1] - roi_limits[3] / 2),
496                round(roi_limits[0] + roi_limits[2] / 2),
497                round(roi_limits[1] + roi_limits[3] / 2),
498            ]
499            # Bound the ROI to the actual scan limits
500            roi_limits = [
501                max(roi_limits[0], limits[0]),
502                max(roi_limits[1], limits[1]),
503                min(roi_limits[2], limits[2]),
504                min(roi_limits[3], limits[3]),
505            ]
506
507            tile_rows = int(roi_xml.find("Rows").text)
508            # Current best way of reliably extracting; <Columns> entry can be wrong
509            if (roi_shape["M"][1] % tile_rows) != 0:
510                raise ValueError(
511                    f"The number of tiles {roi_shape['M'][1]} is not "
512                    f"divisible by the tile rows {tile_rows}; metadata "
513                    f"must be messed up. Thanks Zeiss"
514                )
515            else:
516                tile_cols = int(roi_shape["M"][1] / tile_rows)
517            # Support points are actually the relevant focus points for this ROI
518            focus_points = []
519            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
520                focus_points.append(
521                    [
522                        int(float(focus_point.find("X").text)),
523                        int(float(focus_point.find("Y").text)),
524                        int(float(focus_point.find("Z").text)),
525                    ]
526                )
527            # Strip all sub-micron precision, it does not matter
528            scan.roi.append(
529                cls.ROI(
530                    origin_x_um=roi_limits[0],
531                    origin_y_um=roi_limits[1],
532                    width_um=roi_limits[2] - roi_limits[0],
533                    height_um=roi_limits[3] - roi_limits[1],
534                    tile_rows=tile_rows,
535                    tile_cols=tile_cols,
536                    focus_points=focus_points,
537                )
538            )
539        # Sort based on the scene indices
540        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
541
542        return scan
543
544    @classmethod
545    def load_txt(cls, input_path: str) -> Self:
546        """
547        Loads a Scan object from a .txt file, which originates from the BZScanner.
548        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
549        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
550        :return: a Scan object
551        """
552        # Set paths
553        input_path = os.path.abspath(input_path)
554        if os.path.isdir(input_path):
555            input_path = os.path.join(
556                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
557            )
558
559        # Read in metadata as a dict
560        with open(input_path, "r") as file:
561            metadata_contents = file.read()
562            # Read each line, splitting on the = sign
563            metadata_dict = {}
564            for line in metadata_contents.splitlines():
565                key, value = line.split("=")
566                metadata_dict[key] = value
567
568        # Populate metadata
569        scan = cls()
570
571        scan.slide_id = metadata_dict["SLIDEID"]
572        scan.slide_id = scan.slide_id.strip().upper()
573
574        scan.path = metadata_dict["SLIDEDIR"]
575
576        # Extract start and finish datetimes
577        date = metadata_dict["DATE"]
578        date_as_datetime = datetime.datetime.strptime(
579            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
580        )
581        date_as_datetime = date_as_datetime.astimezone(
582            zoneinfo.ZoneInfo("America/Los_Angeles")
583        )  # Hardcoded because BZScanners are here
584        scan.start_date = date_as_datetime.strftime(
585            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
586        )
587        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
588        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
589        scan.end_date = date_as_datetime.strftime(
590            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
591        )
592
593        # Map the raw scanner ID (service ID) to our IDs
594        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
595        scan.tray_pos = 0  # only one tray_pos in a BZScanner
596        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
597
598        # Get camera and magnifying info
599        scan.camera = ""
600        magnification = 10
601        aperture = 0  # TODO: find the actual aperture
602        scan.objective = f"{magnification}x-{aperture}"
603        scan.pixel_size_um = 0.591  # Estimated from image metadata
604
605        # Get tile information
606        scan.tile_width_px = 1362  # Known from image metadata
607        scan.tile_height_px = 1004  # Known from image metadata
608        scan.tile_overlap_proportion = 0
609
610        # Extract channels and create Channel objects from them
611        if "gain_applied" in metadata_dict:
612            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
613        else:
614            gain_applied = True  # Previous policy was always to apply gains
615        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
616            channel_settings = metadata_dict[channel].split(",")
617            if channel_settings[0] == "0":
618                continue
619            scan.channels.append(
620                cls.Channel(
621                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
622                    exposure_ms=float(channel_settings[1]),
623                    intensity=float(channel_settings[2]),
624                    gain_applied=gain_applied,
625                )
626            )
627
628        # Get focus points
629        focus_points = []
630        for i in range(33):
631            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
632            if focus_point[0] == "0":
633                break
634            focus_points.append(
635                [
636                    int(float(focus_point[1])),
637                    int(float(focus_point[2])),
638                    int(float(focus_point[3])),
639                ]
640            )
641
642        # In the BZScanner, the slide is vertical instead of horizontal
643        # We put in nominal values for the ROI, which is oriented vertically as well
644        tile_rows = 96
645        tile_cols = 24
646        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
647        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
648        origin_x_um = 2500 + round((20000 - roi_width) / 2)
649        origin_y_um = 2500 + round((58000 - roi_height) / 2)
650        scan.roi.append(
651            cls.ROI(
652                origin_x_um=origin_x_um,
653                origin_y_um=origin_y_um,
654                width_um=roi_width,
655                height_um=roi_height,
656                tile_rows=tile_rows,
657                tile_cols=tile_cols,
658                focus_points=focus_points,
659            )
660        )
661        return scan
662
663    @classmethod
664    def load_from_folder(cls, input_path: str) -> Self:
665        """
666        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
667        Prefers scan.yaml if both exist.
668        :param input_path: /path/to/folder
669        :return: a Scan object
670        """
671        input_path = os.path.abspath(input_path)
672        if os.path.isfile(
673            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
674        ):
675            return cls.load_yaml(input_path)
676        elif os.path.isfile(
677            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
678        ):
679            return cls.load_txt(input_path)
680        else:
681            raise ValueError(
682                f"No scan metadata files "
683                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
684                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
685                f"{input_path}"
686            )
687        pass
688
689    @classmethod
690    def make_placeholder(
691        cls,
692        slide_id: str,
693        n_tile: int = 2303,
694        n_roi: int = 0,
695        scanner_type: Type = Type.BZSCANNER,
696    ) -> Self:
697        """
698        Make a placeholder Scan object with only basic required information filled in.
699        :param slide_id: the slide ID
700        :param n_tile: the number of this tile, which will become the number of
701                       tiles in the scan
702        :param n_roi: the number of ROIs in the scan
703        :return: a Scan object
704        """
705        # Sanitize inputs here
706        slide_id = str(slide_id).strip().upper()
707        n_tile = int(n_tile)
708        n_roi = int(n_roi)
709        # Generate the object
710        scan = cls()
711        scan.slide_id = slide_id
712        if scanner_type == cls.Type.AXIOSCAN7:
713            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
714        elif scanner_type == cls.Type.BZSCANNER:
715            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
716        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
717        scan.roi[0].tile_rows = 1
718        scan.roi[0].tile_cols = n_tile + 1
719        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.

Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).

Picture of the slide coordinate system, which assumes a slide placed horizontally
with the label on the left. The x-axis points to the right, and the y-axis points
down. The origin is at the top left corner. Key positions, such as the origin of the
slide's active area at (14500, 2500) micrometers and the bottom-right corner at
(72500, 22500) micrometers are displayed.

Scan( slide_id: str = '', exists: bool = True, path: str = '', start_date: str = '', end_date: str = '', scan_time_s: int = -1, scanner_id: str = '', tray_pos: int = -1, slide_pos: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_overlap_proportion: int = -1, channels: list[Scan.Channel] = None, roi: list[Scan.ROI] = None)
130    def __init__(
131        self,
132        slide_id: str = "",
133        exists: bool = True,
134        path: str = "",
135        start_date: str = "",
136        end_date: str = "",
137        scan_time_s: int = -1,
138        scanner_id: str = "",
139        tray_pos: int = -1,
140        slide_pos: int = -1,
141        camera: str = "",
142        objective: str = "",
143        pixel_size_um: float = -1.0,
144        tile_width_px: int = -1,
145        tile_height_px: int = -1,
146        tile_overlap_proportion: int = -1,
147        channels: list[Channel] = None,
148        roi: list[ROI] = None,
149    ):
150        if roi is None:
151            roi = []
152        if channels is None:
153            channels = []
154        self.slide_id = slide_id
155        self.exists = exists
156        self.path = path
157        self.start_date = start_date
158        self.end_date = end_date
159        self.scan_time_s = scan_time_s
160        self.scanner_id = scanner_id
161        self.tray_pos = tray_pos
162        self.slide_pos = slide_pos
163        self.camera = camera
164        self.objective = objective
165        self.pixel_size_um = pixel_size_um
166        self.tile_width_px = tile_width_px
167        self.tile_height_px = tile_height_px
168        self.tile_overlap_proportion = tile_overlap_proportion
169        self.channels = channels
170        self.roi = roi
yaml_tag = 'csi_utils.scans.Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
exists
path
start_date
end_date
scan_time_s
scanner_id
tray_pos
slide_pos
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
178    def has_same_profile(self, other):
179        return (
180            self.camera == other.camera
181            and self.objective == other.objective
182            and self.pixel_size_um == other.pixel_size_um
183            and self.tile_width_px == other.tile_width_px
184            and self.tile_height_px == other.tile_height_px
185            and self.tile_overlap_proportion == other.tile_overlap_proportion
186            and self.channels == other.channels
187            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
188        )
def get_channel_names(self) -> list[str]:
190    def get_channel_names(self) -> list[str]:
191        """
192        Get the channel names in the scan's channel order.
193        :return: a list of channel names.
194        """
195        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
197    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
198        """
199        Given a list of channel names, return the corresponding indices in the scan's
200        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
201        actual AlexaFluor names (AF555, AF647, AF488).
202        If a list entry is None, it will return -1 for that entry.
203        :param channel_names: a list of channel names.
204        :return: a list of channel indices.
205        """
206        # Get the scan's channel name list
207        scan_channel_names = self.get_channel_names()
208
209        channel_indices = []
210        for name in channel_names:
211            # Convert any BZScanner channel names to the actual channel names
212            if name in self.BZSCANNER_CHANNEL_MAP:
213                name = self.BZSCANNER_CHANNEL_MAP[name]
214
215            # Append the corresponding index if possible
216            if name is None:
217                channel_indices.append(-1)
218            elif name in scan_channel_names:
219                channel_indices.append(scan_channel_names.index(name))
220            else:
221                raise ValueError(
222                    f"Channel name {name} not found in scan channels {scan_channel_names}"
223                )
224        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def save_yaml(self, output_path: str):
226    def save_yaml(self, output_path: str):
227        """
228        Write the Scan object to a .yaml file.
229        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
230        :return: nothing; will raise an error on failure
231        """
232        # Create necessary folders
233        output_path = os.path.abspath(output_path)
234        if os.path.splitext(output_path)[1] == ".yaml":
235            os.makedirs(os.path.dirname(output_path), exist_ok=True)
236        else:
237            os.makedirs(output_path, exist_ok=True)
238            # Add the standard metadata file name to the path if needed
239            output_path = os.path.join(
240                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
241            )
242
243        # Populate the file
244        with open(output_path, "w") as file:
245            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
247    @classmethod
248    def load_yaml(cls, input_path: str) -> Self:
249        """
250        Load a Scan object from a .yaml file.
251        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
252        :return: a Scan object
253        """
254        input_path = os.path.abspath(input_path)
255        if os.path.isdir(input_path):
256            input_path = os.path.join(
257                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
258            )
259        with open(input_path, "r") as file:
260            metadata_obj = yaml.load(file, Loader=yaml.Loader)
261        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
263    def to_dict(self) -> dict:
264        # Dump to json; then add indents and a top-level key
265        channels_json = json.dumps(
266            self.channels, default=lambda x: x.__dict__, indent=2
267        )
268        channels_json = "  ".join(channels_json.splitlines(True))
269        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
270
271        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
272        roi_json = "  ".join(roi_json.splitlines(True))
273        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
274
275        return {
276            "slide_id": self.slide_id,
277            "exists": self.exists,
278            "path": self.path,
279            "start_date": self.start_date,
280            "end_date": self.end_date,
281            "scan_time_s": self.scan_time_s,
282            "scanner_id": self.scanner_id,
283            "tray_pos": self.tray_pos,
284            "slide_pos": self.slide_pos,
285            "camera": self.camera,
286            "objective": self.objective,
287            "pixel_size_um": self.pixel_size_um,
288            "tile_width_px": self.tile_width_px,
289            "tile_height_px": self.tile_height_px,
290            "tile_overlap_proportion": self.tile_overlap_proportion,
291            "channels": channels_json,
292            "roi": roi_json,
293        }
@classmethod
def from_dict(cls, scan_dict) -> Self:
295    @classmethod
296    def from_dict(cls, scan_dict) -> Self:
297        local_timezone = zoneinfo.ZoneInfo("localtime")
298        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
299        result = cls(
300            slide_id=scan_dict["slide_id"],
301            exists=scan_dict["exists"],
302            path=scan_dict["path"],
303            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
304            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
305            scan_time_s=int(dt),
306            scanner_id=scan_dict["scanner_id"],
307            tray_pos=scan_dict["tray_pos"],
308            slide_pos=scan_dict["slide_pos"],
309            camera=scan_dict["camera"],
310            objective=scan_dict["objective"],
311            pixel_size_um=scan_dict["pixel_size"],
312            tile_width_px=scan_dict["tile_width"],
313            tile_height_px=scan_dict["tile_height"],
314            tile_overlap_proportion=scan_dict["tile_overlap"],
315        )
316        for channel_json in scan_dict["channels"]["data"]:
317            result.channels.append(
318                cls.Channel(
319                    name=channel_json["name"],
320                    exposure_ms=channel_json["exposure_ms"],
321                    intensity=channel_json["intensity"],
322                    gain_applied=channel_json["gain_applied"],
323                )
324            )
325        for roi_json in scan_dict["roi"]["data"]:
326            result.roi.append(
327                cls.ROI(
328                    origin_x_um=roi_json["origin_x_um"],
329                    origin_y_um=roi_json["origin_y_um"],
330                    width_um=roi_json["width_um"],
331                    height_um=roi_json["height_um"],
332                    tile_rows=roi_json["tile_rows"],
333                    tile_cols=roi_json["tile_cols"],
334                    focus_points=roi_json["focus_points"],
335                )
336            )
337        return result
@classmethod
def load_czi(cls, input_path: str) -> Self:
339    @classmethod
340    def load_czi(cls, input_path: str) -> Self:
341        """
342        :param input_path: the path to the .czi file
343        :return: a Scan object
344        """
345        if aicspylibczi is None:
346            raise ModuleNotFoundError(
347                "aicspylibczi library not installed. "
348                "Install csi-images with [imageio] option to resolve."
349            )
350
351        # Normalize paths
352        input_path = os.path.abspath(input_path)
353
354        # Read in metadata as XML elements
355        metadata_xml = aicspylibczi.CziFile(input_path).meta
356        # Read in shape metadata from binary
357        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
358
359        # Populate metadata
360        scan = cls()
361
362        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
363        if scan.slide_id is not None:
364            scan.slide_id = scan.slide_id.strip().upper()
365        # Map the raw scanner ID (service ID) to our IDs
366        scan.scanner_id = cls.SCANNER_IDS[
367            metadata_xml.find(".//Microscope/UserDefinedName").text
368        ]
369
370        # Extract start and finish datetimes
371        date = metadata_xml.find(".//Document/CreationDate").text
372        # Strip out sub-second precision
373        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
374        date_as_datetime = datetime.datetime.strptime(
375            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
376        )
377        scan.start_date = date_as_datetime.strftime(
378            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
379        )
380        scan.scan_time_s = round(
381            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
382        )
383        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
384        scan.end_date = date_as_datetime.strftime(
385            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
386        )
387
388        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
389        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
390
391        # Get camera and magnifying info
392        scan.camera = (
393            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
394        )["Name"]
395        magnification = metadata_xml.find(
396            ".//Objectives/Objective/NominalMagnification"
397        )
398        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
399        scan.objective = f"{magnification.text}x-{aperture.text}"
400        scan.pixel_size_um = (
401            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
402        )
403        # Round off the pixel size to nanometers; might not be optimal, but this
404        # gets rounded when we send it to the database anyways (to 7 places)
405        scan.pixel_size_um = round(scan.pixel_size_um, 3)
406
407        # Get tile information
408        # Note: X Y is untested, could be flipped. I always forget. Just don't use
409        # non-square frames and we're all good.
410        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
411        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
412
413        scan.tile_width_px = rois_shape[0]["X"][1]
414        scan.tile_height_px = rois_shape[0]["Y"][1]
415        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
416
417        # Extract channels and create Channel objects from them
418        channel_indices = []
419        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
420            channel_indices.append(int(channel.attrib["Id"][-1]))
421            intensity_xml = channel.find(".//Intensity")
422            if intensity_xml is None:
423                intensity = 0
424            else:
425                intensity = float(intensity_xml.text[:-2]) * 1e-2
426            scan.channels.append(
427                cls.Channel(
428                    name=channel.attrib["Name"].upper(),
429                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
430                    intensity=intensity,
431                    gain_applied=True,  # In Axioscan, we will always use gain = 1
432                )
433            )
434        # Make sure the channels are sorted
435        scan.channels = [
436            channel for _, channel in sorted(zip(channel_indices, scan.channels))
437        ]
438        # Verify that the shape corresponds to the channels
439        for roi in rois_shape:
440            if roi["C"][1] != len(scan.channels):
441                raise ValueError(
442                    f"Number of channels {len(scan.channels)} "
443                    f"is not the same as the number of channels in an ROI: "
444                    f"{roi['C'][1]}"
445                )
446
447        # Get the real ROI limits; the metadata is not always correct
448        limits_xml = metadata_xml.findall(".//AllowedScanArea")
449        limits = [
450            round(float(limits_xml[0].find("Center").text.split(",")[0])),
451            round(float(limits_xml[0].find("Center").text.split(",")[1])),
452            round(float(limits_xml[0].find("Size").text.split(",")[0])),
453            round(float(limits_xml[0].find("Size").text.split(",")[1])),
454        ]
455        # Convert to top-left and bottom-right
456        limits = [
457            round(limits[0] - limits[2] / 2),
458            round(limits[1] - limits[3] / 2),
459            round(limits[0] + limits[2] / 2),
460            round(limits[1] + limits[3] / 2),
461        ]
462
463        # Extract ROIs and create ROI objects from them
464        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
465        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
466        if len(rois_xml_metadata) != len(rois_shape):
467            raise ValueError(
468                f"Metadata and binary data from {input_path} "
469                f"do not match in number of ROIs"
470            )
471        # We need both to determine the number of rows/columns because the XML lies
472        roi_indices = []
473        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
474            name = roi_xml.attrib["Name"]
475            # Determine the index of this scene
476            scene_index = -1
477            for scene in scenes_xml_metadata:
478                if scene.attrib["Name"] == name:
479                    scene_index = int(scene.attrib["Index"])
480                    break
481            if scene_index == -1:
482                raise ValueError(f"ROI {name} does not correspond to any scenes")
483            else:
484                roi_indices.append(scene_index)
485            # Extract other metadata
486            roi_limits = [
487                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
488                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
489                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
490                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
491            ]
492            # Convert to top-left and bottom-right
493            roi_limits = [
494                round(roi_limits[0] - roi_limits[2] / 2),
495                round(roi_limits[1] - roi_limits[3] / 2),
496                round(roi_limits[0] + roi_limits[2] / 2),
497                round(roi_limits[1] + roi_limits[3] / 2),
498            ]
499            # Bound the ROI to the actual scan limits
500            roi_limits = [
501                max(roi_limits[0], limits[0]),
502                max(roi_limits[1], limits[1]),
503                min(roi_limits[2], limits[2]),
504                min(roi_limits[3], limits[3]),
505            ]
506
507            tile_rows = int(roi_xml.find("Rows").text)
508            # Current best way of reliably extracting; <Columns> entry can be wrong
509            if (roi_shape["M"][1] % tile_rows) != 0:
510                raise ValueError(
511                    f"The number of tiles {roi_shape['M'][1]} is not "
512                    f"divisible by the tile rows {tile_rows}; metadata "
513                    f"must be messed up. Thanks Zeiss"
514                )
515            else:
516                tile_cols = int(roi_shape["M"][1] / tile_rows)
517            # Support points are actually the relevant focus points for this ROI
518            focus_points = []
519            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
520                focus_points.append(
521                    [
522                        int(float(focus_point.find("X").text)),
523                        int(float(focus_point.find("Y").text)),
524                        int(float(focus_point.find("Z").text)),
525                    ]
526                )
527            # Strip all sub-micron precision, it does not matter
528            scan.roi.append(
529                cls.ROI(
530                    origin_x_um=roi_limits[0],
531                    origin_y_um=roi_limits[1],
532                    width_um=roi_limits[2] - roi_limits[0],
533                    height_um=roi_limits[3] - roi_limits[1],
534                    tile_rows=tile_rows,
535                    tile_cols=tile_cols,
536                    focus_points=focus_points,
537                )
538            )
539        # Sort based on the scene indices
540        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
541
542        return scan
Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
544    @classmethod
545    def load_txt(cls, input_path: str) -> Self:
546        """
547        Loads a Scan object from a .txt file, which originates from the BZScanner.
548        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
549        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
550        :return: a Scan object
551        """
552        # Set paths
553        input_path = os.path.abspath(input_path)
554        if os.path.isdir(input_path):
555            input_path = os.path.join(
556                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
557            )
558
559        # Read in metadata as a dict
560        with open(input_path, "r") as file:
561            metadata_contents = file.read()
562            # Read each line, splitting on the = sign
563            metadata_dict = {}
564            for line in metadata_contents.splitlines():
565                key, value = line.split("=")
566                metadata_dict[key] = value
567
568        # Populate metadata
569        scan = cls()
570
571        scan.slide_id = metadata_dict["SLIDEID"]
572        scan.slide_id = scan.slide_id.strip().upper()
573
574        scan.path = metadata_dict["SLIDEDIR"]
575
576        # Extract start and finish datetimes
577        date = metadata_dict["DATE"]
578        date_as_datetime = datetime.datetime.strptime(
579            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
580        )
581        date_as_datetime = date_as_datetime.astimezone(
582            zoneinfo.ZoneInfo("America/Los_Angeles")
583        )  # Hardcoded because BZScanners are here
584        scan.start_date = date_as_datetime.strftime(
585            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
586        )
587        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
588        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
589        scan.end_date = date_as_datetime.strftime(
590            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
591        )
592
593        # Map the raw scanner ID (service ID) to our IDs
594        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
595        scan.tray_pos = 0  # only one tray_pos in a BZScanner
596        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
597
598        # Get camera and magnifying info
599        scan.camera = ""
600        magnification = 10
601        aperture = 0  # TODO: find the actual aperture
602        scan.objective = f"{magnification}x-{aperture}"
603        scan.pixel_size_um = 0.591  # Estimated from image metadata
604
605        # Get tile information
606        scan.tile_width_px = 1362  # Known from image metadata
607        scan.tile_height_px = 1004  # Known from image metadata
608        scan.tile_overlap_proportion = 0
609
610        # Extract channels and create Channel objects from them
611        if "gain_applied" in metadata_dict:
612            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
613        else:
614            gain_applied = True  # Previous policy was always to apply gains
615        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
616            channel_settings = metadata_dict[channel].split(",")
617            if channel_settings[0] == "0":
618                continue
619            scan.channels.append(
620                cls.Channel(
621                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
622                    exposure_ms=float(channel_settings[1]),
623                    intensity=float(channel_settings[2]),
624                    gain_applied=gain_applied,
625                )
626            )
627
628        # Get focus points
629        focus_points = []
630        for i in range(33):
631            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
632            if focus_point[0] == "0":
633                break
634            focus_points.append(
635                [
636                    int(float(focus_point[1])),
637                    int(float(focus_point[2])),
638                    int(float(focus_point[3])),
639                ]
640            )
641
642        # In the BZScanner, the slide is vertical instead of horizontal
643        # We put in nominal values for the ROI, which is oriented vertically as well
644        tile_rows = 96
645        tile_cols = 24
646        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
647        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
648        origin_x_um = 2500 + round((20000 - roi_width) / 2)
649        origin_y_um = 2500 + round((58000 - roi_height) / 2)
650        scan.roi.append(
651            cls.ROI(
652                origin_x_um=origin_x_um,
653                origin_y_um=origin_y_um,
654                width_um=roi_width,
655                height_um=roi_height,
656                tile_rows=tile_rows,
657                tile_cols=tile_cols,
658                focus_points=focus_points,
659            )
660        )
661        return scan

Loads a Scan object from a .txt file, which originates from the BZScanner. Some metadata from the slideinfo.txt file is missing or adjusted to fit.

Parameters
  • input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
663    @classmethod
664    def load_from_folder(cls, input_path: str) -> Self:
665        """
666        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
667        Prefers scan.yaml if both exist.
668        :param input_path: /path/to/folder
669        :return: a Scan object
670        """
671        input_path = os.path.abspath(input_path)
672        if os.path.isfile(
673            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
674        ):
675            return cls.load_yaml(input_path)
676        elif os.path.isfile(
677            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
678        ):
679            return cls.load_txt(input_path)
680        else:
681            raise ValueError(
682                f"No scan metadata files "
683                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
684                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
685                f"{input_path}"
686            )
687        pass

Load a Scan object from a folder that contains scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist.

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder( cls, slide_id: str, n_tile: int = 2303, n_roi: int = 0, scanner_type: Scan.Type = <Type.BZSCANNER: 'bzscanner'>) -> Self:
689    @classmethod
690    def make_placeholder(
691        cls,
692        slide_id: str,
693        n_tile: int = 2303,
694        n_roi: int = 0,
695        scanner_type: Type = Type.BZSCANNER,
696    ) -> Self:
697        """
698        Make a placeholder Scan object with only basic required information filled in.
699        :param slide_id: the slide ID
700        :param n_tile: the number of this tile, which will become the number of
701                       tiles in the scan
702        :param n_roi: the number of ROIs in the scan
703        :return: a Scan object
704        """
705        # Sanitize inputs here
706        slide_id = str(slide_id).strip().upper()
707        n_tile = int(n_tile)
708        n_roi = int(n_roi)
709        # Generate the object
710        scan = cls()
711        scan.slide_id = slide_id
712        if scanner_type == cls.Type.AXIOSCAN7:
713            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
714        elif scanner_type == cls.Type.BZSCANNER:
715            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
716        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
717        scan.roi[0].tile_rows = 1
718        scan.roi[0].tile_cols = n_tile + 1
719        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
33    class Type(enum.Enum):
34        BZSCANNER = "bzscanner"
35        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
58    class Channel(yaml.YAMLObject):
59        """
60        Class that comprises a channel; we usually have multiple (2-5) per scan.
61        Contains three fields:
62        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
63        - exposure_ms: the exposure time to capture a frame in milliseconds
64        - intensity: the light intensity used OR the gain applied to the channel
65        """
66
67        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
68
69        def __init__(
70            self,
71            name: str = "",
72            exposure_ms: float = -1.0,
73            intensity: float = -1.0,
74            gain_applied: bool = False,
75        ):
76            self.name = name
77            self.exposure_ms = exposure_ms
78            self.intensity = intensity
79            self.gain_applied = gain_applied
80
81        def __repr__(self):
82            return yaml.dump(self, sort_keys=False)
83
84        def __eq__(self, other):
85            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel( name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0, gain_applied: bool = False)
69        def __init__(
70            self,
71            name: str = "",
72            exposure_ms: float = -1.0,
73            intensity: float = -1.0,
74            gain_applied: bool = False,
75        ):
76            self.name = name
77            self.exposure_ms = exposure_ms
78            self.intensity = intensity
79            self.gain_applied = gain_applied
yaml_tag = 'csi_utils.csi_scans.Scan.Channel'
name
exposure_ms
intensity
gain_applied
class Scan.ROI(yaml.YAMLObject):
 87    class ROI(yaml.YAMLObject):
 88        """
 89        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 90        """
 91
 92        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 93
 94        def __init__(
 95            self,
 96            origin_x_um: int = -1,
 97            origin_y_um: int = -1,
 98            width_um: int = -1,
 99            height_um: int = -1,
100            tile_rows: int = -1,
101            tile_cols: int = -1,
102            focus_points=None,
103        ):
104            if focus_points is None:
105                focus_points = []
106            self.origin_x_um = origin_x_um
107            self.origin_y_um = origin_y_um
108            self.width_um = width_um
109            self.height_um = height_um
110            self.tile_rows = tile_rows
111            self.tile_cols = tile_cols
112            self.focus_points = focus_points
113
114        def __repr__(self):
115            return yaml.dump(self, sort_keys=False)
116
117        def __eq__(self, other):
118            return self.__repr__() == other.__repr__()
119
120        def similar(self, other):
121            return (
122                self.origin_y_um == other.origin_y_um
123                and self.origin_x_um == other.origin_x_um
124                and self.width_um == other.width_um
125                and self.height_um == other.height_um
126                and self.tile_rows == other.tile_rows
127                and self.tile_cols == other.tile_cols
128            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 94        def __init__(
 95            self,
 96            origin_x_um: int = -1,
 97            origin_y_um: int = -1,
 98            width_um: int = -1,
 99            height_um: int = -1,
100            tile_rows: int = -1,
101            tile_cols: int = -1,
102            focus_points=None,
103        ):
104            if focus_points is None:
105                focus_points = []
106            self.origin_x_um = origin_x_um
107            self.origin_y_um = origin_y_um
108            self.width_um = width_um
109            self.height_um = height_um
110            self.tile_rows = tile_rows
111            self.tile_cols = tile_cols
112            self.focus_points = focus_points
yaml_tag = 'csi_utils.csi_scans.Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
120        def similar(self, other):
121            return (
122                self.origin_y_um == other.origin_y_um
123                and self.origin_x_um == other.origin_x_um
124                and self.width_um == other.width_um
125                and self.height_um == other.height_um
126                and self.tile_rows == other.tile_rows
127                and self.tile_cols == other.tile_cols
128            )