csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import math
  9import enum
 10import datetime
 11import zoneinfo
 12from typing import Self, Iterable
 13
 14import yaml
 15import json
 16
 17try:
 18    import aicspylibczi
 19except ImportError:
 20    aicspylibczi = None
 21
 22
 23class Scan(yaml.YAMLObject):
 24    """
 25    Class that composes a whole scan's metadata. Contains some universal data,
 26    plus lists for channels and ROIs.
 27
 28    .. include:: ../docs/coordinate_systems.md
 29    """
 30
 31    yaml_tag = "csi_utils.scans.Scan"
 32
 33    class Type(enum.Enum):
 34        BZSCANNER = "bzscanner"
 35        AXIOSCAN7 = "axioscan7"
 36
 37    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 38    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 39
 40    METADATA_FILE_NAME = {
 41        Type.AXIOSCAN7: "scan.yaml",
 42        Type.BZSCANNER: "slideinfo.txt",
 43    }
 44    STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
 45    DATETIME_FORMAT = {
 46        Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT,
 47        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 48    }
 49
 50    # Actual channel names, from the BZScanner's default order
 51    BZSCANNER_CHANNEL_MAP = {
 52        "DAPI": "DAPI",
 53        "TRITC": "AF555",
 54        "CY5": "AF647",
 55        "BF": "BRIGHT",
 56        "FITC": "AF488",
 57    }
 58
 59    class Channel(yaml.YAMLObject):
 60        """
 61        Class that comprises a channel; we usually have multiple (2-5) per scan.
 62        Contains three fields:
 63        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 64        - exposure_ms: the exposure time to capture a frame in milliseconds
 65        - intensity: the light intensity used OR the gain applied to the channel
 66        """
 67
 68        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 69
 70        def __init__(
 71            self,
 72            name: str = "",
 73            exposure_ms: float = -1.0,
 74            intensity: float = -1.0,
 75            gain_applied: bool = False,
 76        ):
 77            self.name = name
 78            self.exposure_ms = exposure_ms
 79            self.intensity = intensity
 80            self.gain_applied = gain_applied
 81
 82        def __repr__(self):
 83            return yaml.dump(self, sort_keys=False)
 84
 85        def __eq__(self, other):
 86            return self.__repr__() == other.__repr__()
 87
 88    class ROI(yaml.YAMLObject):
 89        """
 90        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 91        """
 92
 93        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 94
 95        def __init__(
 96            self,
 97            origin_x_um: int = -1,
 98            origin_y_um: int = -1,
 99            width_um: int = -1,
100            height_um: int = -1,
101            tile_rows: int = -1,
102            tile_cols: int = -1,
103            focus_points=None,
104        ):
105            if focus_points is None:
106                focus_points = []
107            self.origin_x_um = origin_x_um
108            self.origin_y_um = origin_y_um
109            self.width_um = width_um
110            self.height_um = height_um
111            self.tile_rows = tile_rows
112            self.tile_cols = tile_cols
113            self.focus_points = focus_points
114
115        def __repr__(self):
116            return yaml.dump(self, sort_keys=False)
117
118        def __eq__(self, other):
119            return self.__repr__() == other.__repr__()
120
121        def similar(self, other):
122            return (
123                self.origin_y_um == other.origin_y_um
124                and self.origin_x_um == other.origin_x_um
125                and self.width_um == other.width_um
126                and self.height_um == other.height_um
127                and self.tile_rows == other.tile_rows
128                and self.tile_cols == other.tile_cols
129            )
130
131    def __init__(
132        self,
133        slide_id: str = "",
134        exists: bool = True,
135        path: str = "",
136        start_datetime: str = "",
137        end_datetime: str = "",
138        scan_time_s: int = -1,
139        scanner_id: str = "",
140        tray_pos: int = -1,
141        slide_pos: int = -1,
142        camera: str = "",
143        objective: str = "",
144        pixel_size_um: float = -1.0,
145        tile_width_px: int = -1,
146        tile_height_px: int = -1,
147        tile_x_offset_px: int = -1,
148        tile_y_offset_px: int = -1,
149        tile_overlap_proportion: int = -1,
150        channels: list[Channel] = None,
151        roi: list[ROI] = None,
152    ):
153        if roi is None:
154            roi = []
155        if channels is None:
156            channels = []
157        self.slide_id = slide_id
158        self.exists = exists
159        self.path = path
160        self.start_datetime = start_datetime
161        self.end_datetime = end_datetime
162        self.scan_time_s = scan_time_s
163        self.scanner_id = scanner_id
164        self.tray_pos = tray_pos
165        self.slide_pos = slide_pos
166        self.camera = camera
167        self.objective = objective
168        self.pixel_size_um = pixel_size_um
169        self.tile_width_px = tile_width_px
170        self.tile_height_px = tile_height_px
171        self.tile_x_offset_px = tile_x_offset_px
172        self.tile_y_offset_px = tile_y_offset_px
173        self.tile_overlap_proportion = tile_overlap_proportion
174        self.channels = channels
175        self.roi = roi
176
177    def __key(self):
178        return (
179            self.slide_id,
180            self.exists,
181            self.path,
182            self.start_datetime,
183            self.end_datetime,
184            self.scan_time_s,
185            self.scanner_id,
186            self.tray_pos,
187            self.slide_pos,
188            self.camera,
189            self.objective,
190            self.pixel_size_um,
191            self.tile_width_px,
192            self.tile_height_px,
193            self.tile_overlap_proportion,
194            tuple(self.channels),
195            tuple(self.roi),
196        )
197
198    def __hash__(self):
199        return hash(self.__key())
200
201    def __repr__(self):
202        return yaml.dump(self, sort_keys=False)
203
204    def __eq__(self, other):
205        return self.__repr__() == other.__repr__()
206
207    def has_same_profile(self, other):
208        return (
209            self.camera == other.camera
210            and self.objective == other.objective
211            and self.pixel_size_um == other.pixel_size_um
212            and self.tile_width_px == other.tile_width_px
213            and self.tile_height_px == other.tile_height_px
214            and self.tile_x_offset_px == other.tile_x_offset_px
215            and self.tile_y_offset_px == other.tile_y_offset_px
216            and self.tile_overlap_proportion == other.tile_overlap_proportion
217            and self.channels == other.channels
218            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
219        )
220
221    def get_channel_names(self) -> list[str]:
222        """
223        Get the channel names in the scan's channel order.
224        :return: a list of channel names.
225        """
226        return [channel.name for channel in self.channels]
227
228    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
229        """
230        Given a list of channel names, return the corresponding indices in the scan's
231        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
232        actual AlexaFluor names (AF555, AF647, AF488).
233        If a list entry is None, it will return -1 for that entry.
234        :param channel_names: a list of channel names.
235        :return: a list of channel indices.
236        """
237        # Get the scan's channel name list
238        scan_channel_names = self.get_channel_names()
239
240        channel_indices = []
241        for name in channel_names:
242            # Convert any BZScanner channel names to the actual channel names
243            if name in self.BZSCANNER_CHANNEL_MAP:
244                name = self.BZSCANNER_CHANNEL_MAP[name]
245
246            # Append the corresponding index if possible
247            if name is None:
248                channel_indices.append(-1)
249            elif name in scan_channel_names:
250                channel_indices.append(scan_channel_names.index(name))
251            else:
252                raise ValueError(
253                    f"Channel name {name} not found in scan channels {scan_channel_names}"
254                )
255        return channel_indices
256
257    def get_image_size(self) -> tuple[int, int]:
258        """
259        Get the real size of the image in pixels after subtracting overlap.
260        :return: a tuple of (real_height, real_width) for easy comparison to arrays
261        """
262        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
263        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
264        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
265
266    def save_yaml(self, output_path: str):
267        """
268        Write the Scan object to a .yaml file.
269        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
270        :return: nothing; will raise an error on failure
271        """
272        # Create necessary folders
273        output_path = os.path.abspath(output_path)
274        if os.path.splitext(output_path)[1] == ".yaml":
275            os.makedirs(os.path.dirname(output_path), exist_ok=True)
276        else:
277            os.makedirs(output_path, exist_ok=True)
278            # Add the standard metadata file name to the path if needed
279            output_path = os.path.join(
280                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
281            )
282
283        # Populate the file
284        with open(output_path, "w") as file:
285            yaml.dump(self, stream=file, sort_keys=False)
286
287    @classmethod
288    def load_yaml(cls, input_path: str) -> Self:
289        """
290        Load a Scan object from a .yaml file.
291        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
292        :return: a Scan object
293        """
294        input_path = os.path.abspath(input_path)
295        if os.path.isdir(input_path):
296            input_path = os.path.join(
297                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
298            )
299        with open(input_path, "r") as file:
300            metadata_obj = yaml.load(file, Loader=yaml.Loader)
301        return metadata_obj
302
303    def to_dict(self) -> dict:
304        """
305        Convert the Scan object to a dictionary with keys matching database columns
306        and values matching database entries
307        :return: a dictionary
308        """
309        # Dump to json; then add indents and a top-level key
310        channels_json = json.dumps(
311            self.channels, default=lambda x: x.__dict__, indent=2
312        )
313        channels_json = "  ".join(channels_json.splitlines(True))
314        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
315
316        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
317        roi_json = "  ".join(roi_json.splitlines(True))
318        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
319
320        # Keys are named the same as database columns
321        return {
322            "scanner_id": self.scanner_id,
323            "slide_id": self.slide_id,
324            "exists": self.exists,
325            "path": self.path,
326            "start_datetime": self.start_datetime,
327            "end_datetime": self.end_datetime,
328            "tray_pos": self.tray_pos,
329            "slide_pos": self.slide_pos,
330            "tile_width": self.tile_width_px,
331            "tile_height": self.tile_height_px,
332            "tile_x_offset": self.tile_x_offset_px,
333            "tile_y_offset": self.tile_y_offset_px,
334            "tile_overlap": self.tile_overlap_proportion,
335            "camera": self.camera,
336            "objective": self.objective,
337            "pixel_size": self.pixel_size_um,
338            "channels": channels_json,
339            "roi": roi_json,
340        }
341
342    @classmethod
343    def from_dict(cls, scan_dict) -> Self:
344        """
345        Convert a dictionary from to_dict() or the database to a Scan object
346        :param scan_dict: a dictionary
347        :return: a Scan object
348        """
349        local_timezone = zoneinfo.ZoneInfo("localtime")
350        if isinstance(scan_dict["start_datetime"], str):
351            start_datetime = datetime.datetime.strptime(
352                scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT
353            ).astimezone(local_timezone)
354        else:
355            start_datetime = scan_dict["start_datetime"].astimezone(local_timezone)
356        if isinstance(scan_dict["end_datetime"], str):
357            end_datetime = datetime.datetime.strptime(
358                scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT
359            ).astimezone(local_timezone)
360        else:
361            end_datetime = scan_dict["end_datetime"].astimezone(local_timezone)
362        dt = (end_datetime - start_datetime).total_seconds()
363        result = cls(
364            scanner_id=scan_dict["scanner_id"],
365            slide_id=scan_dict["slide_id"],
366            exists=scan_dict["exists"],
367            path=scan_dict["path"],
368            start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
369            end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
370            scan_time_s=int(dt),
371            tray_pos=scan_dict["tray_pos"],
372            slide_pos=scan_dict["slide_pos"],
373            tile_width_px=scan_dict["tile_width"],
374            tile_height_px=scan_dict["tile_height"],
375            tile_x_offset_px=scan_dict["tile_x_offset"],
376            tile_y_offset_px=scan_dict["tile_y_offset"],
377            tile_overlap_proportion=scan_dict["tile_overlap"],
378            camera=scan_dict["camera"],
379            objective=scan_dict["objective"],
380            pixel_size_um=scan_dict["pixel_size"],
381        )
382        for channel_json in json.loads(scan_dict["channels"])["data"]:
383            result.channels.append(
384                cls.Channel(
385                    name=channel_json["name"],
386                    exposure_ms=channel_json["exposure_ms"],
387                    intensity=channel_json["intensity"],
388                    gain_applied=channel_json["gain_applied"],
389                )
390            )
391        for roi_json in json.loads(scan_dict["roi"])["data"]:
392            result.roi.append(
393                cls.ROI(
394                    origin_x_um=roi_json["origin_x_um"],
395                    origin_y_um=roi_json["origin_y_um"],
396                    width_um=roi_json["width_um"],
397                    height_um=roi_json["height_um"],
398                    tile_rows=roi_json["tile_rows"],
399                    tile_cols=roi_json["tile_cols"],
400                    focus_points=roi_json["focus_points"],
401                )
402            )
403        return result
404
405    @classmethod
406    def load_czi(cls, input_path: str) -> Self:
407        """
408        Extracts metadata from a .czi file, which is the output of the Axioscan
409        :param input_path: the path to the .czi file
410        :return: a Scan object
411        """
412        if aicspylibczi is None:
413            raise ModuleNotFoundError(
414                "aicspylibczi library not installed. "
415                "Install csi-images with [imageio] option to resolve."
416            )
417
418        # Normalize paths
419        input_path = os.path.abspath(input_path)
420
421        with open(input_path, "rb") as file:
422            # Read in metadata as XML elements
423            metadata_xml = aicspylibczi.CziFile(file).meta
424            # Read in shape metadata from binary
425            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
426
427        # Populate metadata
428        scan = cls()
429
430        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
431        if scan.slide_id is not None:
432            scan.slide_id = scan.slide_id.strip().upper()
433        # Map the raw scanner ID (service ID) to our IDs
434        scan.scanner_id = cls.SCANNER_IDS[
435            metadata_xml.find(".//Microscope/UserDefinedName").text
436        ]
437
438        # Extract start and finish datetimes
439        date = metadata_xml.find(".//Document/CreationDate").text
440        # Strip out sub-second precision
441        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
442        date_as_datetime = datetime.datetime.strptime(
443            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
444        )
445        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
446        scan.scan_time_s = round(
447            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
448        )
449        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
450        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
451
452        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
453        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
454
455        # Get camera and magnifying info
456        scan.camera = (
457            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
458        )["Name"]
459        magnification = metadata_xml.find(
460            ".//Objectives/Objective/NominalMagnification"
461        )
462        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
463        scan.objective = f"{magnification.text}x-{aperture.text}"
464        scan.pixel_size_um = (
465            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
466        )
467        # Round off the pixel size to nanometers; might not be optimal, but this
468        # gets rounded when we send it to the database anyways (to 7 places)
469        scan.pixel_size_um = round(scan.pixel_size_um, 3)
470
471        # Get tile information
472        # Note: X Y is untested, could be flipped. I always forget. Just don't use
473        # non-square frames and we're all good.
474        selected_detector = metadata_xml.find(".//SelectedDetector").text
475        detectors = metadata_xml.findall(".//Detectors/Detector")
476        for detector in detectors:
477            if detector.attrib["Id"] == selected_detector:
478                tile_info = detector.find(".//Frame")
479                break
480        # Convert to integers
481        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
482
483        scan.tile_x_offset_px = tile_info[0]
484        scan.tile_y_offset_px = tile_info[1]
485        scan.tile_width_px = tile_info[2]
486        scan.tile_height_px = tile_info[3]
487        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
488
489        # Extract channels and create Channel objects from them
490        channel_indices = []
491        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
492            channel_indices.append(int(channel.attrib["Id"][-1]))
493            intensity_xml = channel.find(".//Intensity")
494            if intensity_xml is None:
495                intensity = 0
496            else:
497                intensity = float(intensity_xml.text[:-2]) * 1e-2
498            scan.channels.append(
499                cls.Channel(
500                    name=channel.attrib["Name"].upper(),
501                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
502                    intensity=intensity,
503                    gain_applied=True,  # In Axioscan, we will always use gain = 1
504                )
505            )
506        # Make sure the channels are sorted
507        scan.channels = [
508            channel for _, channel in sorted(zip(channel_indices, scan.channels))
509        ]
510        # Verify that the shape corresponds to the channels
511        for roi in rois_shape:
512            if roi["C"][1] != len(scan.channels):
513                raise ValueError(
514                    f"Number of channels {len(scan.channels)} "
515                    f"is not the same as the number of channels in an ROI: "
516                    f"{roi['C'][1]}"
517                )
518
519        # Get the real ROI limits; the metadata is not always correct
520        limits_xml = metadata_xml.findall(".//AllowedScanArea")
521        limits = [
522            round(float(limits_xml[0].find("Center").text.split(",")[0])),
523            round(float(limits_xml[0].find("Center").text.split(",")[1])),
524            round(float(limits_xml[0].find("Size").text.split(",")[0])),
525            round(float(limits_xml[0].find("Size").text.split(",")[1])),
526        ]
527        # Convert to top-left and bottom-right
528        limits = [
529            round(limits[0] - limits[2] / 2),
530            round(limits[1] - limits[3] / 2),
531            round(limits[0] + limits[2] / 2),
532            round(limits[1] + limits[3] / 2),
533        ]
534
535        # Extract ROIs and create ROI objects from them
536        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
537        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
538        if len(rois_xml_metadata) != len(rois_shape):
539            raise ValueError(
540                f"Metadata and binary data from {input_path} "
541                f"do not match in number of ROIs"
542            )
543        # We need both to determine the number of rows/columns because the XML lies
544        roi_indices = []
545        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
546            name = roi_xml.attrib["Name"]
547            # Determine the index of this scene
548            scene_index = -1
549            for scene in scenes_xml_metadata:
550                if scene.attrib["Name"] == name:
551                    scene_index = int(scene.attrib["Index"])
552                    break
553            if scene_index == -1:
554                raise ValueError(f"ROI {name} does not correspond to any scenes")
555            else:
556                roi_indices.append(scene_index)
557            # Extract other metadata
558            roi_limits = [
559                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
560                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
561                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
562                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
563            ]
564            # Convert to top-left and bottom-right
565            roi_limits = [
566                round(roi_limits[0] - roi_limits[2] / 2),
567                round(roi_limits[1] - roi_limits[3] / 2),
568                round(roi_limits[0] + roi_limits[2] / 2),
569                round(roi_limits[1] + roi_limits[3] / 2),
570            ]
571            # Bound the ROI to the actual scan limits
572            roi_limits = [
573                max(roi_limits[0], limits[0]),
574                max(roi_limits[1], limits[1]),
575                min(roi_limits[2], limits[2]),
576                min(roi_limits[3], limits[3]),
577            ]
578
579            tile_rows = int(roi_xml.find("Rows").text)
580            # Current best way of reliably extracting; <Columns> entry can be wrong
581            if (roi_shape["M"][1] % tile_rows) != 0:
582                raise ValueError(
583                    f"The number of tiles {roi_shape['M'][1]} is not "
584                    f"divisible by the tile rows {tile_rows}; metadata "
585                    f"must be messed up. Thanks Zeiss"
586                )
587            else:
588                tile_cols = int(roi_shape["M"][1] / tile_rows)
589            # Support points are actually the relevant focus points for this ROI
590            focus_points = []
591            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
592                focus_points.append(
593                    [
594                        int(float(focus_point.find("X").text)),
595                        int(float(focus_point.find("Y").text)),
596                        int(float(focus_point.find("Z").text)),
597                    ]
598                )
599            # Strip all sub-micron precision, it does not matter
600            scan.roi.append(
601                cls.ROI(
602                    origin_x_um=roi_limits[0],
603                    origin_y_um=roi_limits[1],
604                    width_um=roi_limits[2] - roi_limits[0],
605                    height_um=roi_limits[3] - roi_limits[1],
606                    tile_rows=tile_rows,
607                    tile_cols=tile_cols,
608                    focus_points=focus_points,
609                )
610            )
611        # Sort based on the scene indices
612        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
613
614        return scan
615
616    @classmethod
617    def load_txt(cls, input_path: str) -> Self:
618        """
619        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
620        from the BZScanner. Some metadata is filled in or adjusted to fit
621        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
622        :return: a Scan object
623        """
624        # Set paths
625        input_path = os.path.abspath(input_path)
626        if os.path.isdir(input_path):
627            input_path = os.path.join(
628                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
629            )
630
631        # Read in metadata as a dict
632        with open(input_path, "r") as file:
633            metadata_contents = file.read()
634            # Read each line, splitting on the = sign
635            metadata_dict = {}
636            for line in metadata_contents.splitlines():
637                key, value = line.split("=")
638                metadata_dict[key] = value
639
640        # Populate metadata
641        scan = cls()
642
643        scan.slide_id = metadata_dict["SLIDEID"]
644        scan.slide_id = scan.slide_id.strip().upper()
645
646        scan.path = metadata_dict["SLIDEDIR"]
647
648        # Extract start and finish datetimes
649        date = metadata_dict["DATE"]
650        date_as_datetime = datetime.datetime.strptime(
651            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
652        )
653        date_as_datetime = date_as_datetime.astimezone(
654            zoneinfo.ZoneInfo("America/Los_Angeles")
655        )  # Hardcoded because BZScanners are here
656        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
657        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
658        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
659        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
660
661        # Map the raw scanner ID (service ID) to our IDs
662        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
663        scan.tray_pos = 0  # only one tray_pos in a BZScanner
664        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
665
666        # Get camera and magnifying info
667        scan.camera = ""
668        magnification = 10
669        aperture = 0  # TODO: find the actual aperture
670        scan.objective = f"{magnification}x-{aperture}"
671        scan.pixel_size_um = 0.591  # Estimated from image metadata
672
673        # Get tile information
674        scan.tile_width_px = 1362  # Known from image metadata
675        scan.tile_height_px = 1004  # Known from image metadata
676        scan.tile_x_offset_px = 0  # Already removed
677        scan.tile_y_offset_px = 0  # Already removed
678        scan.tile_overlap_proportion = 0  # Already removed
679
680        # Extract channels and create Channel objects from them
681        if "gain_applied" in metadata_dict:
682            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
683        else:
684            gain_applied = True  # Previous policy was always to apply gains
685        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
686            channel_settings = metadata_dict[channel].split(",")
687            if channel_settings[0] == "0":
688                continue
689            scan.channels.append(
690                cls.Channel(
691                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
692                    exposure_ms=float(channel_settings[1]),
693                    intensity=float(channel_settings[2]),
694                    gain_applied=gain_applied,
695                )
696            )
697
698        # Get focus points
699        focus_points = []
700        for i in range(33):
701            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
702            if focus_point[0] == "0":
703                break
704            focus_points.append(
705                [
706                    int(float(focus_point[1])),
707                    int(float(focus_point[2])),
708                    int(float(focus_point[3])),
709                ]
710            )
711
712        # In the BZScanner, the slide is vertical instead of horizontal
713        # We put in nominal values for the ROI, which is oriented vertically as well
714        tile_rows = 96
715        tile_cols = 24
716        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
717        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
718        origin_x_um = 2500 + round((20000 - roi_width) / 2)
719        origin_y_um = 2500 + round((58000 - roi_height) / 2)
720        scan.roi.append(
721            cls.ROI(
722                origin_x_um=origin_x_um,
723                origin_y_um=origin_y_um,
724                width_um=roi_width,
725                height_um=roi_height,
726                tile_rows=tile_rows,
727                tile_cols=tile_cols,
728                focus_points=focus_points,
729            )
730        )
731        return scan
732
733    @classmethod
734    def load_from_folder(cls, input_path: str) -> Self:
735        """
736        Load a Scan object from a folder that contains defaultly-named metadata files,
737        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
738        :param input_path: /path/to/folder
739        :return: a Scan object
740        """
741        input_path = os.path.abspath(input_path)
742        if os.path.isfile(
743            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
744        ):
745            return cls.load_yaml(input_path)
746        elif os.path.isfile(
747            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
748        ):
749            return cls.load_txt(input_path)
750        else:
751            raise ValueError(
752                f"No scan metadata files "
753                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
754                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
755                f"{input_path}"
756            )
757        pass
758
759    @classmethod
760    def make_placeholder(
761        cls,
762        slide_id: str,
763        n_tile: int = 2303,
764        n_roi: int = 0,
765        scanner_type: Type = Type.BZSCANNER,
766    ) -> Self:
767        """
768        Make a placeholder Scan object with only basic required information filled in.
769        :param slide_id: the slide ID
770        :param n_tile: the number of this tile, which will become the number of
771                       tiles in the scan
772        :param n_roi: the number of ROIs in the scan
773        :return: a Scan object
774        """
775        # Sanitize inputs here
776        slide_id = str(slide_id).strip().upper()
777        n_tile = int(n_tile)
778        n_roi = int(n_roi)
779        # Generate the object
780        scan = cls()
781        scan.slide_id = slide_id
782        if scanner_type == cls.Type.AXIOSCAN7:
783            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
784        elif scanner_type == cls.Type.BZSCANNER:
785            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
786        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
787        scan.roi[0].tile_rows = 1
788        scan.roi[0].tile_cols = n_tile + 1
789        return scan
class Scan(yaml.YAMLObject):
 24class Scan(yaml.YAMLObject):
 25    """
 26    Class that composes a whole scan's metadata. Contains some universal data,
 27    plus lists for channels and ROIs.
 28
 29    .. include:: ../docs/coordinate_systems.md
 30    """
 31
 32    yaml_tag = "csi_utils.scans.Scan"
 33
 34    class Type(enum.Enum):
 35        BZSCANNER = "bzscanner"
 36        AXIOSCAN7 = "axioscan7"
 37
 38    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 39    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 40
 41    METADATA_FILE_NAME = {
 42        Type.AXIOSCAN7: "scan.yaml",
 43        Type.BZSCANNER: "slideinfo.txt",
 44    }
 45    STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
 46    DATETIME_FORMAT = {
 47        Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT,
 48        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 49    }
 50
 51    # Actual channel names, from the BZScanner's default order
 52    BZSCANNER_CHANNEL_MAP = {
 53        "DAPI": "DAPI",
 54        "TRITC": "AF555",
 55        "CY5": "AF647",
 56        "BF": "BRIGHT",
 57        "FITC": "AF488",
 58    }
 59
 60    class Channel(yaml.YAMLObject):
 61        """
 62        Class that comprises a channel; we usually have multiple (2-5) per scan.
 63        Contains three fields:
 64        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 65        - exposure_ms: the exposure time to capture a frame in milliseconds
 66        - intensity: the light intensity used OR the gain applied to the channel
 67        """
 68
 69        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 70
 71        def __init__(
 72            self,
 73            name: str = "",
 74            exposure_ms: float = -1.0,
 75            intensity: float = -1.0,
 76            gain_applied: bool = False,
 77        ):
 78            self.name = name
 79            self.exposure_ms = exposure_ms
 80            self.intensity = intensity
 81            self.gain_applied = gain_applied
 82
 83        def __repr__(self):
 84            return yaml.dump(self, sort_keys=False)
 85
 86        def __eq__(self, other):
 87            return self.__repr__() == other.__repr__()
 88
 89    class ROI(yaml.YAMLObject):
 90        """
 91        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 92        """
 93
 94        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 95
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
115
116        def __repr__(self):
117            return yaml.dump(self, sort_keys=False)
118
119        def __eq__(self, other):
120            return self.__repr__() == other.__repr__()
121
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )
131
132    def __init__(
133        self,
134        slide_id: str = "",
135        exists: bool = True,
136        path: str = "",
137        start_datetime: str = "",
138        end_datetime: str = "",
139        scan_time_s: int = -1,
140        scanner_id: str = "",
141        tray_pos: int = -1,
142        slide_pos: int = -1,
143        camera: str = "",
144        objective: str = "",
145        pixel_size_um: float = -1.0,
146        tile_width_px: int = -1,
147        tile_height_px: int = -1,
148        tile_x_offset_px: int = -1,
149        tile_y_offset_px: int = -1,
150        tile_overlap_proportion: int = -1,
151        channels: list[Channel] = None,
152        roi: list[ROI] = None,
153    ):
154        if roi is None:
155            roi = []
156        if channels is None:
157            channels = []
158        self.slide_id = slide_id
159        self.exists = exists
160        self.path = path
161        self.start_datetime = start_datetime
162        self.end_datetime = end_datetime
163        self.scan_time_s = scan_time_s
164        self.scanner_id = scanner_id
165        self.tray_pos = tray_pos
166        self.slide_pos = slide_pos
167        self.camera = camera
168        self.objective = objective
169        self.pixel_size_um = pixel_size_um
170        self.tile_width_px = tile_width_px
171        self.tile_height_px = tile_height_px
172        self.tile_x_offset_px = tile_x_offset_px
173        self.tile_y_offset_px = tile_y_offset_px
174        self.tile_overlap_proportion = tile_overlap_proportion
175        self.channels = channels
176        self.roi = roi
177
178    def __key(self):
179        return (
180            self.slide_id,
181            self.exists,
182            self.path,
183            self.start_datetime,
184            self.end_datetime,
185            self.scan_time_s,
186            self.scanner_id,
187            self.tray_pos,
188            self.slide_pos,
189            self.camera,
190            self.objective,
191            self.pixel_size_um,
192            self.tile_width_px,
193            self.tile_height_px,
194            self.tile_overlap_proportion,
195            tuple(self.channels),
196            tuple(self.roi),
197        )
198
199    def __hash__(self):
200        return hash(self.__key())
201
202    def __repr__(self):
203        return yaml.dump(self, sort_keys=False)
204
205    def __eq__(self, other):
206        return self.__repr__() == other.__repr__()
207
208    def has_same_profile(self, other):
209        return (
210            self.camera == other.camera
211            and self.objective == other.objective
212            and self.pixel_size_um == other.pixel_size_um
213            and self.tile_width_px == other.tile_width_px
214            and self.tile_height_px == other.tile_height_px
215            and self.tile_x_offset_px == other.tile_x_offset_px
216            and self.tile_y_offset_px == other.tile_y_offset_px
217            and self.tile_overlap_proportion == other.tile_overlap_proportion
218            and self.channels == other.channels
219            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
220        )
221
222    def get_channel_names(self) -> list[str]:
223        """
224        Get the channel names in the scan's channel order.
225        :return: a list of channel names.
226        """
227        return [channel.name for channel in self.channels]
228
229    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
230        """
231        Given a list of channel names, return the corresponding indices in the scan's
232        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
233        actual AlexaFluor names (AF555, AF647, AF488).
234        If a list entry is None, it will return -1 for that entry.
235        :param channel_names: a list of channel names.
236        :return: a list of channel indices.
237        """
238        # Get the scan's channel name list
239        scan_channel_names = self.get_channel_names()
240
241        channel_indices = []
242        for name in channel_names:
243            # Convert any BZScanner channel names to the actual channel names
244            if name in self.BZSCANNER_CHANNEL_MAP:
245                name = self.BZSCANNER_CHANNEL_MAP[name]
246
247            # Append the corresponding index if possible
248            if name is None:
249                channel_indices.append(-1)
250            elif name in scan_channel_names:
251                channel_indices.append(scan_channel_names.index(name))
252            else:
253                raise ValueError(
254                    f"Channel name {name} not found in scan channels {scan_channel_names}"
255                )
256        return channel_indices
257
258    def get_image_size(self) -> tuple[int, int]:
259        """
260        Get the real size of the image in pixels after subtracting overlap.
261        :return: a tuple of (real_height, real_width) for easy comparison to arrays
262        """
263        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
264        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
265        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
266
267    def save_yaml(self, output_path: str):
268        """
269        Write the Scan object to a .yaml file.
270        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
271        :return: nothing; will raise an error on failure
272        """
273        # Create necessary folders
274        output_path = os.path.abspath(output_path)
275        if os.path.splitext(output_path)[1] == ".yaml":
276            os.makedirs(os.path.dirname(output_path), exist_ok=True)
277        else:
278            os.makedirs(output_path, exist_ok=True)
279            # Add the standard metadata file name to the path if needed
280            output_path = os.path.join(
281                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
282            )
283
284        # Populate the file
285        with open(output_path, "w") as file:
286            yaml.dump(self, stream=file, sort_keys=False)
287
288    @classmethod
289    def load_yaml(cls, input_path: str) -> Self:
290        """
291        Load a Scan object from a .yaml file.
292        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
293        :return: a Scan object
294        """
295        input_path = os.path.abspath(input_path)
296        if os.path.isdir(input_path):
297            input_path = os.path.join(
298                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
299            )
300        with open(input_path, "r") as file:
301            metadata_obj = yaml.load(file, Loader=yaml.Loader)
302        return metadata_obj
303
304    def to_dict(self) -> dict:
305        """
306        Convert the Scan object to a dictionary with keys matching database columns
307        and values matching database entries
308        :return: a dictionary
309        """
310        # Dump to json; then add indents and a top-level key
311        channels_json = json.dumps(
312            self.channels, default=lambda x: x.__dict__, indent=2
313        )
314        channels_json = "  ".join(channels_json.splitlines(True))
315        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
316
317        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
318        roi_json = "  ".join(roi_json.splitlines(True))
319        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
320
321        # Keys are named the same as database columns
322        return {
323            "scanner_id": self.scanner_id,
324            "slide_id": self.slide_id,
325            "exists": self.exists,
326            "path": self.path,
327            "start_datetime": self.start_datetime,
328            "end_datetime": self.end_datetime,
329            "tray_pos": self.tray_pos,
330            "slide_pos": self.slide_pos,
331            "tile_width": self.tile_width_px,
332            "tile_height": self.tile_height_px,
333            "tile_x_offset": self.tile_x_offset_px,
334            "tile_y_offset": self.tile_y_offset_px,
335            "tile_overlap": self.tile_overlap_proportion,
336            "camera": self.camera,
337            "objective": self.objective,
338            "pixel_size": self.pixel_size_um,
339            "channels": channels_json,
340            "roi": roi_json,
341        }
342
343    @classmethod
344    def from_dict(cls, scan_dict) -> Self:
345        """
346        Convert a dictionary from to_dict() or the database to a Scan object
347        :param scan_dict: a dictionary
348        :return: a Scan object
349        """
350        local_timezone = zoneinfo.ZoneInfo("localtime")
351        if isinstance(scan_dict["start_datetime"], str):
352            start_datetime = datetime.datetime.strptime(
353                scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT
354            ).astimezone(local_timezone)
355        else:
356            start_datetime = scan_dict["start_datetime"].astimezone(local_timezone)
357        if isinstance(scan_dict["end_datetime"], str):
358            end_datetime = datetime.datetime.strptime(
359                scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT
360            ).astimezone(local_timezone)
361        else:
362            end_datetime = scan_dict["end_datetime"].astimezone(local_timezone)
363        dt = (end_datetime - start_datetime).total_seconds()
364        result = cls(
365            scanner_id=scan_dict["scanner_id"],
366            slide_id=scan_dict["slide_id"],
367            exists=scan_dict["exists"],
368            path=scan_dict["path"],
369            start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
370            end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
371            scan_time_s=int(dt),
372            tray_pos=scan_dict["tray_pos"],
373            slide_pos=scan_dict["slide_pos"],
374            tile_width_px=scan_dict["tile_width"],
375            tile_height_px=scan_dict["tile_height"],
376            tile_x_offset_px=scan_dict["tile_x_offset"],
377            tile_y_offset_px=scan_dict["tile_y_offset"],
378            tile_overlap_proportion=scan_dict["tile_overlap"],
379            camera=scan_dict["camera"],
380            objective=scan_dict["objective"],
381            pixel_size_um=scan_dict["pixel_size"],
382        )
383        for channel_json in json.loads(scan_dict["channels"])["data"]:
384            result.channels.append(
385                cls.Channel(
386                    name=channel_json["name"],
387                    exposure_ms=channel_json["exposure_ms"],
388                    intensity=channel_json["intensity"],
389                    gain_applied=channel_json["gain_applied"],
390                )
391            )
392        for roi_json in json.loads(scan_dict["roi"])["data"]:
393            result.roi.append(
394                cls.ROI(
395                    origin_x_um=roi_json["origin_x_um"],
396                    origin_y_um=roi_json["origin_y_um"],
397                    width_um=roi_json["width_um"],
398                    height_um=roi_json["height_um"],
399                    tile_rows=roi_json["tile_rows"],
400                    tile_cols=roi_json["tile_cols"],
401                    focus_points=roi_json["focus_points"],
402                )
403            )
404        return result
405
406    @classmethod
407    def load_czi(cls, input_path: str) -> Self:
408        """
409        Extracts metadata from a .czi file, which is the output of the Axioscan
410        :param input_path: the path to the .czi file
411        :return: a Scan object
412        """
413        if aicspylibczi is None:
414            raise ModuleNotFoundError(
415                "aicspylibczi library not installed. "
416                "Install csi-images with [imageio] option to resolve."
417            )
418
419        # Normalize paths
420        input_path = os.path.abspath(input_path)
421
422        with open(input_path, "rb") as file:
423            # Read in metadata as XML elements
424            metadata_xml = aicspylibczi.CziFile(file).meta
425            # Read in shape metadata from binary
426            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
427
428        # Populate metadata
429        scan = cls()
430
431        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
432        if scan.slide_id is not None:
433            scan.slide_id = scan.slide_id.strip().upper()
434        # Map the raw scanner ID (service ID) to our IDs
435        scan.scanner_id = cls.SCANNER_IDS[
436            metadata_xml.find(".//Microscope/UserDefinedName").text
437        ]
438
439        # Extract start and finish datetimes
440        date = metadata_xml.find(".//Document/CreationDate").text
441        # Strip out sub-second precision
442        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
443        date_as_datetime = datetime.datetime.strptime(
444            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
445        )
446        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
447        scan.scan_time_s = round(
448            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
449        )
450        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
451        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
452
453        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
454        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
455
456        # Get camera and magnifying info
457        scan.camera = (
458            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
459        )["Name"]
460        magnification = metadata_xml.find(
461            ".//Objectives/Objective/NominalMagnification"
462        )
463        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
464        scan.objective = f"{magnification.text}x-{aperture.text}"
465        scan.pixel_size_um = (
466            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
467        )
468        # Round off the pixel size to nanometers; might not be optimal, but this
469        # gets rounded when we send it to the database anyways (to 7 places)
470        scan.pixel_size_um = round(scan.pixel_size_um, 3)
471
472        # Get tile information
473        # Note: X Y is untested, could be flipped. I always forget. Just don't use
474        # non-square frames and we're all good.
475        selected_detector = metadata_xml.find(".//SelectedDetector").text
476        detectors = metadata_xml.findall(".//Detectors/Detector")
477        for detector in detectors:
478            if detector.attrib["Id"] == selected_detector:
479                tile_info = detector.find(".//Frame")
480                break
481        # Convert to integers
482        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
483
484        scan.tile_x_offset_px = tile_info[0]
485        scan.tile_y_offset_px = tile_info[1]
486        scan.tile_width_px = tile_info[2]
487        scan.tile_height_px = tile_info[3]
488        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
489
490        # Extract channels and create Channel objects from them
491        channel_indices = []
492        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
493            channel_indices.append(int(channel.attrib["Id"][-1]))
494            intensity_xml = channel.find(".//Intensity")
495            if intensity_xml is None:
496                intensity = 0
497            else:
498                intensity = float(intensity_xml.text[:-2]) * 1e-2
499            scan.channels.append(
500                cls.Channel(
501                    name=channel.attrib["Name"].upper(),
502                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
503                    intensity=intensity,
504                    gain_applied=True,  # In Axioscan, we will always use gain = 1
505                )
506            )
507        # Make sure the channels are sorted
508        scan.channels = [
509            channel for _, channel in sorted(zip(channel_indices, scan.channels))
510        ]
511        # Verify that the shape corresponds to the channels
512        for roi in rois_shape:
513            if roi["C"][1] != len(scan.channels):
514                raise ValueError(
515                    f"Number of channels {len(scan.channels)} "
516                    f"is not the same as the number of channels in an ROI: "
517                    f"{roi['C'][1]}"
518                )
519
520        # Get the real ROI limits; the metadata is not always correct
521        limits_xml = metadata_xml.findall(".//AllowedScanArea")
522        limits = [
523            round(float(limits_xml[0].find("Center").text.split(",")[0])),
524            round(float(limits_xml[0].find("Center").text.split(",")[1])),
525            round(float(limits_xml[0].find("Size").text.split(",")[0])),
526            round(float(limits_xml[0].find("Size").text.split(",")[1])),
527        ]
528        # Convert to top-left and bottom-right
529        limits = [
530            round(limits[0] - limits[2] / 2),
531            round(limits[1] - limits[3] / 2),
532            round(limits[0] + limits[2] / 2),
533            round(limits[1] + limits[3] / 2),
534        ]
535
536        # Extract ROIs and create ROI objects from them
537        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
538        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
539        if len(rois_xml_metadata) != len(rois_shape):
540            raise ValueError(
541                f"Metadata and binary data from {input_path} "
542                f"do not match in number of ROIs"
543            )
544        # We need both to determine the number of rows/columns because the XML lies
545        roi_indices = []
546        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
547            name = roi_xml.attrib["Name"]
548            # Determine the index of this scene
549            scene_index = -1
550            for scene in scenes_xml_metadata:
551                if scene.attrib["Name"] == name:
552                    scene_index = int(scene.attrib["Index"])
553                    break
554            if scene_index == -1:
555                raise ValueError(f"ROI {name} does not correspond to any scenes")
556            else:
557                roi_indices.append(scene_index)
558            # Extract other metadata
559            roi_limits = [
560                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
561                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
562                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
563                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
564            ]
565            # Convert to top-left and bottom-right
566            roi_limits = [
567                round(roi_limits[0] - roi_limits[2] / 2),
568                round(roi_limits[1] - roi_limits[3] / 2),
569                round(roi_limits[0] + roi_limits[2] / 2),
570                round(roi_limits[1] + roi_limits[3] / 2),
571            ]
572            # Bound the ROI to the actual scan limits
573            roi_limits = [
574                max(roi_limits[0], limits[0]),
575                max(roi_limits[1], limits[1]),
576                min(roi_limits[2], limits[2]),
577                min(roi_limits[3], limits[3]),
578            ]
579
580            tile_rows = int(roi_xml.find("Rows").text)
581            # Current best way of reliably extracting; <Columns> entry can be wrong
582            if (roi_shape["M"][1] % tile_rows) != 0:
583                raise ValueError(
584                    f"The number of tiles {roi_shape['M'][1]} is not "
585                    f"divisible by the tile rows {tile_rows}; metadata "
586                    f"must be messed up. Thanks Zeiss"
587                )
588            else:
589                tile_cols = int(roi_shape["M"][1] / tile_rows)
590            # Support points are actually the relevant focus points for this ROI
591            focus_points = []
592            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
593                focus_points.append(
594                    [
595                        int(float(focus_point.find("X").text)),
596                        int(float(focus_point.find("Y").text)),
597                        int(float(focus_point.find("Z").text)),
598                    ]
599                )
600            # Strip all sub-micron precision, it does not matter
601            scan.roi.append(
602                cls.ROI(
603                    origin_x_um=roi_limits[0],
604                    origin_y_um=roi_limits[1],
605                    width_um=roi_limits[2] - roi_limits[0],
606                    height_um=roi_limits[3] - roi_limits[1],
607                    tile_rows=tile_rows,
608                    tile_cols=tile_cols,
609                    focus_points=focus_points,
610                )
611            )
612        # Sort based on the scene indices
613        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
614
615        return scan
616
617    @classmethod
618    def load_txt(cls, input_path: str) -> Self:
619        """
620        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
621        from the BZScanner. Some metadata is filled in or adjusted to fit
622        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
623        :return: a Scan object
624        """
625        # Set paths
626        input_path = os.path.abspath(input_path)
627        if os.path.isdir(input_path):
628            input_path = os.path.join(
629                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
630            )
631
632        # Read in metadata as a dict
633        with open(input_path, "r") as file:
634            metadata_contents = file.read()
635            # Read each line, splitting on the = sign
636            metadata_dict = {}
637            for line in metadata_contents.splitlines():
638                key, value = line.split("=")
639                metadata_dict[key] = value
640
641        # Populate metadata
642        scan = cls()
643
644        scan.slide_id = metadata_dict["SLIDEID"]
645        scan.slide_id = scan.slide_id.strip().upper()
646
647        scan.path = metadata_dict["SLIDEDIR"]
648
649        # Extract start and finish datetimes
650        date = metadata_dict["DATE"]
651        date_as_datetime = datetime.datetime.strptime(
652            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
653        )
654        date_as_datetime = date_as_datetime.astimezone(
655            zoneinfo.ZoneInfo("America/Los_Angeles")
656        )  # Hardcoded because BZScanners are here
657        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
658        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
659        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
660        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
661
662        # Map the raw scanner ID (service ID) to our IDs
663        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
664        scan.tray_pos = 0  # only one tray_pos in a BZScanner
665        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
666
667        # Get camera and magnifying info
668        scan.camera = ""
669        magnification = 10
670        aperture = 0  # TODO: find the actual aperture
671        scan.objective = f"{magnification}x-{aperture}"
672        scan.pixel_size_um = 0.591  # Estimated from image metadata
673
674        # Get tile information
675        scan.tile_width_px = 1362  # Known from image metadata
676        scan.tile_height_px = 1004  # Known from image metadata
677        scan.tile_x_offset_px = 0  # Already removed
678        scan.tile_y_offset_px = 0  # Already removed
679        scan.tile_overlap_proportion = 0  # Already removed
680
681        # Extract channels and create Channel objects from them
682        if "gain_applied" in metadata_dict:
683            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
684        else:
685            gain_applied = True  # Previous policy was always to apply gains
686        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
687            channel_settings = metadata_dict[channel].split(",")
688            if channel_settings[0] == "0":
689                continue
690            scan.channels.append(
691                cls.Channel(
692                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
693                    exposure_ms=float(channel_settings[1]),
694                    intensity=float(channel_settings[2]),
695                    gain_applied=gain_applied,
696                )
697            )
698
699        # Get focus points
700        focus_points = []
701        for i in range(33):
702            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
703            if focus_point[0] == "0":
704                break
705            focus_points.append(
706                [
707                    int(float(focus_point[1])),
708                    int(float(focus_point[2])),
709                    int(float(focus_point[3])),
710                ]
711            )
712
713        # In the BZScanner, the slide is vertical instead of horizontal
714        # We put in nominal values for the ROI, which is oriented vertically as well
715        tile_rows = 96
716        tile_cols = 24
717        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
718        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
719        origin_x_um = 2500 + round((20000 - roi_width) / 2)
720        origin_y_um = 2500 + round((58000 - roi_height) / 2)
721        scan.roi.append(
722            cls.ROI(
723                origin_x_um=origin_x_um,
724                origin_y_um=origin_y_um,
725                width_um=roi_width,
726                height_um=roi_height,
727                tile_rows=tile_rows,
728                tile_cols=tile_cols,
729                focus_points=focus_points,
730            )
731        )
732        return scan
733
734    @classmethod
735    def load_from_folder(cls, input_path: str) -> Self:
736        """
737        Load a Scan object from a folder that contains defaultly-named metadata files,
738        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
739        :param input_path: /path/to/folder
740        :return: a Scan object
741        """
742        input_path = os.path.abspath(input_path)
743        if os.path.isfile(
744            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
745        ):
746            return cls.load_yaml(input_path)
747        elif os.path.isfile(
748            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
749        ):
750            return cls.load_txt(input_path)
751        else:
752            raise ValueError(
753                f"No scan metadata files "
754                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
755                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
756                f"{input_path}"
757            )
758        pass
759
760    @classmethod
761    def make_placeholder(
762        cls,
763        slide_id: str,
764        n_tile: int = 2303,
765        n_roi: int = 0,
766        scanner_type: Type = Type.BZSCANNER,
767    ) -> Self:
768        """
769        Make a placeholder Scan object with only basic required information filled in.
770        :param slide_id: the slide ID
771        :param n_tile: the number of this tile, which will become the number of
772                       tiles in the scan
773        :param n_roi: the number of ROIs in the scan
774        :return: a Scan object
775        """
776        # Sanitize inputs here
777        slide_id = str(slide_id).strip().upper()
778        n_tile = int(n_tile)
779        n_roi = int(n_roi)
780        # Generate the object
781        scan = cls()
782        scan.slide_id = slide_id
783        if scanner_type == cls.Type.AXIOSCAN7:
784            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
785        elif scanner_type == cls.Type.BZSCANNER:
786            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
787        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
788        scan.roi[0].tile_rows = 1
789        scan.roi[0].tile_cols = n_tile + 1
790        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.

Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).

Picture of the slide coordinate system, which assumes a slide placed horizontally
with the label on the left. The x-axis points to the right, and the y-axis points
down. The origin is at the top left corner. Key positions, such as the origin of the
slide's active area at (14500, 2500) micrometers and the bottom-right corner at
(72500, 22500) micrometers are displayed.

Scan( slide_id: str = '', exists: bool = True, path: str = '', start_datetime: str = '', end_datetime: str = '', scan_time_s: int = -1, scanner_id: str = '', tray_pos: int = -1, slide_pos: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_x_offset_px: int = -1, tile_y_offset_px: int = -1, tile_overlap_proportion: int = -1, channels: list[Scan.Channel] = None, roi: list[Scan.ROI] = None)
132    def __init__(
133        self,
134        slide_id: str = "",
135        exists: bool = True,
136        path: str = "",
137        start_datetime: str = "",
138        end_datetime: str = "",
139        scan_time_s: int = -1,
140        scanner_id: str = "",
141        tray_pos: int = -1,
142        slide_pos: int = -1,
143        camera: str = "",
144        objective: str = "",
145        pixel_size_um: float = -1.0,
146        tile_width_px: int = -1,
147        tile_height_px: int = -1,
148        tile_x_offset_px: int = -1,
149        tile_y_offset_px: int = -1,
150        tile_overlap_proportion: int = -1,
151        channels: list[Channel] = None,
152        roi: list[ROI] = None,
153    ):
154        if roi is None:
155            roi = []
156        if channels is None:
157            channels = []
158        self.slide_id = slide_id
159        self.exists = exists
160        self.path = path
161        self.start_datetime = start_datetime
162        self.end_datetime = end_datetime
163        self.scan_time_s = scan_time_s
164        self.scanner_id = scanner_id
165        self.tray_pos = tray_pos
166        self.slide_pos = slide_pos
167        self.camera = camera
168        self.objective = objective
169        self.pixel_size_um = pixel_size_um
170        self.tile_width_px = tile_width_px
171        self.tile_height_px = tile_height_px
172        self.tile_x_offset_px = tile_x_offset_px
173        self.tile_y_offset_px = tile_y_offset_px
174        self.tile_overlap_proportion = tile_overlap_proportion
175        self.channels = channels
176        self.roi = roi
yaml_tag = 'csi_utils.scans.Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
STANDARD_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S%z'
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
exists
path
start_datetime
end_datetime
scan_time_s
scanner_id
tray_pos
slide_pos
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_x_offset_px
tile_y_offset_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
208    def has_same_profile(self, other):
209        return (
210            self.camera == other.camera
211            and self.objective == other.objective
212            and self.pixel_size_um == other.pixel_size_um
213            and self.tile_width_px == other.tile_width_px
214            and self.tile_height_px == other.tile_height_px
215            and self.tile_x_offset_px == other.tile_x_offset_px
216            and self.tile_y_offset_px == other.tile_y_offset_px
217            and self.tile_overlap_proportion == other.tile_overlap_proportion
218            and self.channels == other.channels
219            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
220        )
def get_channel_names(self) -> list[str]:
222    def get_channel_names(self) -> list[str]:
223        """
224        Get the channel names in the scan's channel order.
225        :return: a list of channel names.
226        """
227        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
229    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
230        """
231        Given a list of channel names, return the corresponding indices in the scan's
232        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
233        actual AlexaFluor names (AF555, AF647, AF488).
234        If a list entry is None, it will return -1 for that entry.
235        :param channel_names: a list of channel names.
236        :return: a list of channel indices.
237        """
238        # Get the scan's channel name list
239        scan_channel_names = self.get_channel_names()
240
241        channel_indices = []
242        for name in channel_names:
243            # Convert any BZScanner channel names to the actual channel names
244            if name in self.BZSCANNER_CHANNEL_MAP:
245                name = self.BZSCANNER_CHANNEL_MAP[name]
246
247            # Append the corresponding index if possible
248            if name is None:
249                channel_indices.append(-1)
250            elif name in scan_channel_names:
251                channel_indices.append(scan_channel_names.index(name))
252            else:
253                raise ValueError(
254                    f"Channel name {name} not found in scan channels {scan_channel_names}"
255                )
256        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def get_image_size(self) -> tuple[int, int]:
258    def get_image_size(self) -> tuple[int, int]:
259        """
260        Get the real size of the image in pixels after subtracting overlap.
261        :return: a tuple of (real_height, real_width) for easy comparison to arrays
262        """
263        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
264        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
265        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap

Get the real size of the image in pixels after subtracting overlap.

Returns

a tuple of (real_height, real_width) for easy comparison to arrays

def save_yaml(self, output_path: str):
267    def save_yaml(self, output_path: str):
268        """
269        Write the Scan object to a .yaml file.
270        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
271        :return: nothing; will raise an error on failure
272        """
273        # Create necessary folders
274        output_path = os.path.abspath(output_path)
275        if os.path.splitext(output_path)[1] == ".yaml":
276            os.makedirs(os.path.dirname(output_path), exist_ok=True)
277        else:
278            os.makedirs(output_path, exist_ok=True)
279            # Add the standard metadata file name to the path if needed
280            output_path = os.path.join(
281                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
282            )
283
284        # Populate the file
285        with open(output_path, "w") as file:
286            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
288    @classmethod
289    def load_yaml(cls, input_path: str) -> Self:
290        """
291        Load a Scan object from a .yaml file.
292        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
293        :return: a Scan object
294        """
295        input_path = os.path.abspath(input_path)
296        if os.path.isdir(input_path):
297            input_path = os.path.join(
298                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
299            )
300        with open(input_path, "r") as file:
301            metadata_obj = yaml.load(file, Loader=yaml.Loader)
302        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
304    def to_dict(self) -> dict:
305        """
306        Convert the Scan object to a dictionary with keys matching database columns
307        and values matching database entries
308        :return: a dictionary
309        """
310        # Dump to json; then add indents and a top-level key
311        channels_json = json.dumps(
312            self.channels, default=lambda x: x.__dict__, indent=2
313        )
314        channels_json = "  ".join(channels_json.splitlines(True))
315        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
316
317        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
318        roi_json = "  ".join(roi_json.splitlines(True))
319        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
320
321        # Keys are named the same as database columns
322        return {
323            "scanner_id": self.scanner_id,
324            "slide_id": self.slide_id,
325            "exists": self.exists,
326            "path": self.path,
327            "start_datetime": self.start_datetime,
328            "end_datetime": self.end_datetime,
329            "tray_pos": self.tray_pos,
330            "slide_pos": self.slide_pos,
331            "tile_width": self.tile_width_px,
332            "tile_height": self.tile_height_px,
333            "tile_x_offset": self.tile_x_offset_px,
334            "tile_y_offset": self.tile_y_offset_px,
335            "tile_overlap": self.tile_overlap_proportion,
336            "camera": self.camera,
337            "objective": self.objective,
338            "pixel_size": self.pixel_size_um,
339            "channels": channels_json,
340            "roi": roi_json,
341        }

Convert the Scan object to a dictionary with keys matching database columns and values matching database entries

Returns

a dictionary

@classmethod
def from_dict(cls, scan_dict) -> Self:
343    @classmethod
344    def from_dict(cls, scan_dict) -> Self:
345        """
346        Convert a dictionary from to_dict() or the database to a Scan object
347        :param scan_dict: a dictionary
348        :return: a Scan object
349        """
350        local_timezone = zoneinfo.ZoneInfo("localtime")
351        if isinstance(scan_dict["start_datetime"], str):
352            start_datetime = datetime.datetime.strptime(
353                scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT
354            ).astimezone(local_timezone)
355        else:
356            start_datetime = scan_dict["start_datetime"].astimezone(local_timezone)
357        if isinstance(scan_dict["end_datetime"], str):
358            end_datetime = datetime.datetime.strptime(
359                scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT
360            ).astimezone(local_timezone)
361        else:
362            end_datetime = scan_dict["end_datetime"].astimezone(local_timezone)
363        dt = (end_datetime - start_datetime).total_seconds()
364        result = cls(
365            scanner_id=scan_dict["scanner_id"],
366            slide_id=scan_dict["slide_id"],
367            exists=scan_dict["exists"],
368            path=scan_dict["path"],
369            start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
370            end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT),
371            scan_time_s=int(dt),
372            tray_pos=scan_dict["tray_pos"],
373            slide_pos=scan_dict["slide_pos"],
374            tile_width_px=scan_dict["tile_width"],
375            tile_height_px=scan_dict["tile_height"],
376            tile_x_offset_px=scan_dict["tile_x_offset"],
377            tile_y_offset_px=scan_dict["tile_y_offset"],
378            tile_overlap_proportion=scan_dict["tile_overlap"],
379            camera=scan_dict["camera"],
380            objective=scan_dict["objective"],
381            pixel_size_um=scan_dict["pixel_size"],
382        )
383        for channel_json in json.loads(scan_dict["channels"])["data"]:
384            result.channels.append(
385                cls.Channel(
386                    name=channel_json["name"],
387                    exposure_ms=channel_json["exposure_ms"],
388                    intensity=channel_json["intensity"],
389                    gain_applied=channel_json["gain_applied"],
390                )
391            )
392        for roi_json in json.loads(scan_dict["roi"])["data"]:
393            result.roi.append(
394                cls.ROI(
395                    origin_x_um=roi_json["origin_x_um"],
396                    origin_y_um=roi_json["origin_y_um"],
397                    width_um=roi_json["width_um"],
398                    height_um=roi_json["height_um"],
399                    tile_rows=roi_json["tile_rows"],
400                    tile_cols=roi_json["tile_cols"],
401                    focus_points=roi_json["focus_points"],
402                )
403            )
404        return result

Convert a dictionary from to_dict() or the database to a Scan object

Parameters
  • scan_dict: a dictionary
Returns

a Scan object

@classmethod
def load_czi(cls, input_path: str) -> Self:
406    @classmethod
407    def load_czi(cls, input_path: str) -> Self:
408        """
409        Extracts metadata from a .czi file, which is the output of the Axioscan
410        :param input_path: the path to the .czi file
411        :return: a Scan object
412        """
413        if aicspylibczi is None:
414            raise ModuleNotFoundError(
415                "aicspylibczi library not installed. "
416                "Install csi-images with [imageio] option to resolve."
417            )
418
419        # Normalize paths
420        input_path = os.path.abspath(input_path)
421
422        with open(input_path, "rb") as file:
423            # Read in metadata as XML elements
424            metadata_xml = aicspylibczi.CziFile(file).meta
425            # Read in shape metadata from binary
426            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
427
428        # Populate metadata
429        scan = cls()
430
431        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
432        if scan.slide_id is not None:
433            scan.slide_id = scan.slide_id.strip().upper()
434        # Map the raw scanner ID (service ID) to our IDs
435        scan.scanner_id = cls.SCANNER_IDS[
436            metadata_xml.find(".//Microscope/UserDefinedName").text
437        ]
438
439        # Extract start and finish datetimes
440        date = metadata_xml.find(".//Document/CreationDate").text
441        # Strip out sub-second precision
442        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
443        date_as_datetime = datetime.datetime.strptime(
444            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
445        )
446        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
447        scan.scan_time_s = round(
448            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
449        )
450        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
451        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
452
453        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
454        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
455
456        # Get camera and magnifying info
457        scan.camera = (
458            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
459        )["Name"]
460        magnification = metadata_xml.find(
461            ".//Objectives/Objective/NominalMagnification"
462        )
463        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
464        scan.objective = f"{magnification.text}x-{aperture.text}"
465        scan.pixel_size_um = (
466            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
467        )
468        # Round off the pixel size to nanometers; might not be optimal, but this
469        # gets rounded when we send it to the database anyways (to 7 places)
470        scan.pixel_size_um = round(scan.pixel_size_um, 3)
471
472        # Get tile information
473        # Note: X Y is untested, could be flipped. I always forget. Just don't use
474        # non-square frames and we're all good.
475        selected_detector = metadata_xml.find(".//SelectedDetector").text
476        detectors = metadata_xml.findall(".//Detectors/Detector")
477        for detector in detectors:
478            if detector.attrib["Id"] == selected_detector:
479                tile_info = detector.find(".//Frame")
480                break
481        # Convert to integers
482        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
483
484        scan.tile_x_offset_px = tile_info[0]
485        scan.tile_y_offset_px = tile_info[1]
486        scan.tile_width_px = tile_info[2]
487        scan.tile_height_px = tile_info[3]
488        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
489
490        # Extract channels and create Channel objects from them
491        channel_indices = []
492        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
493            channel_indices.append(int(channel.attrib["Id"][-1]))
494            intensity_xml = channel.find(".//Intensity")
495            if intensity_xml is None:
496                intensity = 0
497            else:
498                intensity = float(intensity_xml.text[:-2]) * 1e-2
499            scan.channels.append(
500                cls.Channel(
501                    name=channel.attrib["Name"].upper(),
502                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
503                    intensity=intensity,
504                    gain_applied=True,  # In Axioscan, we will always use gain = 1
505                )
506            )
507        # Make sure the channels are sorted
508        scan.channels = [
509            channel for _, channel in sorted(zip(channel_indices, scan.channels))
510        ]
511        # Verify that the shape corresponds to the channels
512        for roi in rois_shape:
513            if roi["C"][1] != len(scan.channels):
514                raise ValueError(
515                    f"Number of channels {len(scan.channels)} "
516                    f"is not the same as the number of channels in an ROI: "
517                    f"{roi['C'][1]}"
518                )
519
520        # Get the real ROI limits; the metadata is not always correct
521        limits_xml = metadata_xml.findall(".//AllowedScanArea")
522        limits = [
523            round(float(limits_xml[0].find("Center").text.split(",")[0])),
524            round(float(limits_xml[0].find("Center").text.split(",")[1])),
525            round(float(limits_xml[0].find("Size").text.split(",")[0])),
526            round(float(limits_xml[0].find("Size").text.split(",")[1])),
527        ]
528        # Convert to top-left and bottom-right
529        limits = [
530            round(limits[0] - limits[2] / 2),
531            round(limits[1] - limits[3] / 2),
532            round(limits[0] + limits[2] / 2),
533            round(limits[1] + limits[3] / 2),
534        ]
535
536        # Extract ROIs and create ROI objects from them
537        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
538        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
539        if len(rois_xml_metadata) != len(rois_shape):
540            raise ValueError(
541                f"Metadata and binary data from {input_path} "
542                f"do not match in number of ROIs"
543            )
544        # We need both to determine the number of rows/columns because the XML lies
545        roi_indices = []
546        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
547            name = roi_xml.attrib["Name"]
548            # Determine the index of this scene
549            scene_index = -1
550            for scene in scenes_xml_metadata:
551                if scene.attrib["Name"] == name:
552                    scene_index = int(scene.attrib["Index"])
553                    break
554            if scene_index == -1:
555                raise ValueError(f"ROI {name} does not correspond to any scenes")
556            else:
557                roi_indices.append(scene_index)
558            # Extract other metadata
559            roi_limits = [
560                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
561                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
562                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
563                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
564            ]
565            # Convert to top-left and bottom-right
566            roi_limits = [
567                round(roi_limits[0] - roi_limits[2] / 2),
568                round(roi_limits[1] - roi_limits[3] / 2),
569                round(roi_limits[0] + roi_limits[2] / 2),
570                round(roi_limits[1] + roi_limits[3] / 2),
571            ]
572            # Bound the ROI to the actual scan limits
573            roi_limits = [
574                max(roi_limits[0], limits[0]),
575                max(roi_limits[1], limits[1]),
576                min(roi_limits[2], limits[2]),
577                min(roi_limits[3], limits[3]),
578            ]
579
580            tile_rows = int(roi_xml.find("Rows").text)
581            # Current best way of reliably extracting; <Columns> entry can be wrong
582            if (roi_shape["M"][1] % tile_rows) != 0:
583                raise ValueError(
584                    f"The number of tiles {roi_shape['M'][1]} is not "
585                    f"divisible by the tile rows {tile_rows}; metadata "
586                    f"must be messed up. Thanks Zeiss"
587                )
588            else:
589                tile_cols = int(roi_shape["M"][1] / tile_rows)
590            # Support points are actually the relevant focus points for this ROI
591            focus_points = []
592            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
593                focus_points.append(
594                    [
595                        int(float(focus_point.find("X").text)),
596                        int(float(focus_point.find("Y").text)),
597                        int(float(focus_point.find("Z").text)),
598                    ]
599                )
600            # Strip all sub-micron precision, it does not matter
601            scan.roi.append(
602                cls.ROI(
603                    origin_x_um=roi_limits[0],
604                    origin_y_um=roi_limits[1],
605                    width_um=roi_limits[2] - roi_limits[0],
606                    height_um=roi_limits[3] - roi_limits[1],
607                    tile_rows=tile_rows,
608                    tile_cols=tile_cols,
609                    focus_points=focus_points,
610                )
611            )
612        # Sort based on the scene indices
613        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
614
615        return scan

Extracts metadata from a .czi file, which is the output of the Axioscan

Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
617    @classmethod
618    def load_txt(cls, input_path: str) -> Self:
619        """
620        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
621        from the BZScanner. Some metadata is filled in or adjusted to fit
622        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
623        :return: a Scan object
624        """
625        # Set paths
626        input_path = os.path.abspath(input_path)
627        if os.path.isdir(input_path):
628            input_path = os.path.join(
629                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
630            )
631
632        # Read in metadata as a dict
633        with open(input_path, "r") as file:
634            metadata_contents = file.read()
635            # Read each line, splitting on the = sign
636            metadata_dict = {}
637            for line in metadata_contents.splitlines():
638                key, value = line.split("=")
639                metadata_dict[key] = value
640
641        # Populate metadata
642        scan = cls()
643
644        scan.slide_id = metadata_dict["SLIDEID"]
645        scan.slide_id = scan.slide_id.strip().upper()
646
647        scan.path = metadata_dict["SLIDEDIR"]
648
649        # Extract start and finish datetimes
650        date = metadata_dict["DATE"]
651        date_as_datetime = datetime.datetime.strptime(
652            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
653        )
654        date_as_datetime = date_as_datetime.astimezone(
655            zoneinfo.ZoneInfo("America/Los_Angeles")
656        )  # Hardcoded because BZScanners are here
657        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
658        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
659        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
660        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
661
662        # Map the raw scanner ID (service ID) to our IDs
663        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
664        scan.tray_pos = 0  # only one tray_pos in a BZScanner
665        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
666
667        # Get camera and magnifying info
668        scan.camera = ""
669        magnification = 10
670        aperture = 0  # TODO: find the actual aperture
671        scan.objective = f"{magnification}x-{aperture}"
672        scan.pixel_size_um = 0.591  # Estimated from image metadata
673
674        # Get tile information
675        scan.tile_width_px = 1362  # Known from image metadata
676        scan.tile_height_px = 1004  # Known from image metadata
677        scan.tile_x_offset_px = 0  # Already removed
678        scan.tile_y_offset_px = 0  # Already removed
679        scan.tile_overlap_proportion = 0  # Already removed
680
681        # Extract channels and create Channel objects from them
682        if "gain_applied" in metadata_dict:
683            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
684        else:
685            gain_applied = True  # Previous policy was always to apply gains
686        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
687            channel_settings = metadata_dict[channel].split(",")
688            if channel_settings[0] == "0":
689                continue
690            scan.channels.append(
691                cls.Channel(
692                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
693                    exposure_ms=float(channel_settings[1]),
694                    intensity=float(channel_settings[2]),
695                    gain_applied=gain_applied,
696                )
697            )
698
699        # Get focus points
700        focus_points = []
701        for i in range(33):
702            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
703            if focus_point[0] == "0":
704                break
705            focus_points.append(
706                [
707                    int(float(focus_point[1])),
708                    int(float(focus_point[2])),
709                    int(float(focus_point[3])),
710                ]
711            )
712
713        # In the BZScanner, the slide is vertical instead of horizontal
714        # We put in nominal values for the ROI, which is oriented vertically as well
715        tile_rows = 96
716        tile_cols = 24
717        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
718        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
719        origin_x_um = 2500 + round((20000 - roi_width) / 2)
720        origin_y_um = 2500 + round((58000 - roi_height) / 2)
721        scan.roi.append(
722            cls.ROI(
723                origin_x_um=origin_x_um,
724                origin_y_um=origin_y_um,
725                width_um=roi_width,
726                height_um=roi_height,
727                tile_rows=tile_rows,
728                tile_cols=tile_cols,
729                focus_points=focus_points,
730            )
731        )
732        return scan

Loads a Scan object from a .txt file, usually slideinfo.txt, which originates from the BZScanner. Some metadata is filled in or adjusted to fit

Parameters
  • input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
734    @classmethod
735    def load_from_folder(cls, input_path: str) -> Self:
736        """
737        Load a Scan object from a folder that contains defaultly-named metadata files,
738        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
739        :param input_path: /path/to/folder
740        :return: a Scan object
741        """
742        input_path = os.path.abspath(input_path)
743        if os.path.isfile(
744            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
745        ):
746            return cls.load_yaml(input_path)
747        elif os.path.isfile(
748            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
749        ):
750            return cls.load_txt(input_path)
751        else:
752            raise ValueError(
753                f"No scan metadata files "
754                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
755                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
756                f"{input_path}"
757            )
758        pass

Load a Scan object from a folder that contains defaultly-named metadata files, scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder( cls, slide_id: str, n_tile: int = 2303, n_roi: int = 0, scanner_type: Scan.Type = <Type.BZSCANNER: 'bzscanner'>) -> Self:
760    @classmethod
761    def make_placeholder(
762        cls,
763        slide_id: str,
764        n_tile: int = 2303,
765        n_roi: int = 0,
766        scanner_type: Type = Type.BZSCANNER,
767    ) -> Self:
768        """
769        Make a placeholder Scan object with only basic required information filled in.
770        :param slide_id: the slide ID
771        :param n_tile: the number of this tile, which will become the number of
772                       tiles in the scan
773        :param n_roi: the number of ROIs in the scan
774        :return: a Scan object
775        """
776        # Sanitize inputs here
777        slide_id = str(slide_id).strip().upper()
778        n_tile = int(n_tile)
779        n_roi = int(n_roi)
780        # Generate the object
781        scan = cls()
782        scan.slide_id = slide_id
783        if scanner_type == cls.Type.AXIOSCAN7:
784            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
785        elif scanner_type == cls.Type.BZSCANNER:
786            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
787        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
788        scan.roi[0].tile_rows = 1
789        scan.roi[0].tile_cols = n_tile + 1
790        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
34    class Type(enum.Enum):
35        BZSCANNER = "bzscanner"
36        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
60    class Channel(yaml.YAMLObject):
61        """
62        Class that comprises a channel; we usually have multiple (2-5) per scan.
63        Contains three fields:
64        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
65        - exposure_ms: the exposure time to capture a frame in milliseconds
66        - intensity: the light intensity used OR the gain applied to the channel
67        """
68
69        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
70
71        def __init__(
72            self,
73            name: str = "",
74            exposure_ms: float = -1.0,
75            intensity: float = -1.0,
76            gain_applied: bool = False,
77        ):
78            self.name = name
79            self.exposure_ms = exposure_ms
80            self.intensity = intensity
81            self.gain_applied = gain_applied
82
83        def __repr__(self):
84            return yaml.dump(self, sort_keys=False)
85
86        def __eq__(self, other):
87            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel( name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0, gain_applied: bool = False)
71        def __init__(
72            self,
73            name: str = "",
74            exposure_ms: float = -1.0,
75            intensity: float = -1.0,
76            gain_applied: bool = False,
77        ):
78            self.name = name
79            self.exposure_ms = exposure_ms
80            self.intensity = intensity
81            self.gain_applied = gain_applied
yaml_tag = 'csi_utils.csi_scans.Scan.Channel'
name
exposure_ms
intensity
gain_applied
class Scan.ROI(yaml.YAMLObject):
 89    class ROI(yaml.YAMLObject):
 90        """
 91        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 92        """
 93
 94        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 95
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
115
116        def __repr__(self):
117            return yaml.dump(self, sort_keys=False)
118
119        def __eq__(self, other):
120            return self.__repr__() == other.__repr__()
121
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
yaml_tag = 'csi_utils.csi_scans.Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )