csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import math
  9import enum
 10import datetime
 11import zoneinfo
 12from typing import Self, Iterable
 13
 14import yaml
 15import json
 16
 17try:
 18    import aicspylibczi
 19except ImportError:
 20    aicspylibczi = None
 21
 22
 23class Scan(yaml.YAMLObject):
 24    """
 25    Class that composes a whole scan's metadata. Contains some universal data,
 26    plus lists for channels and ROIs.
 27
 28    .. include:: ../docs/coordinate_systems.md
 29    """
 30
 31    yaml_tag = "csi_images.csi_scans.Scan"
 32
 33    class Type(enum.Enum):
 34        BZSCANNER = "bzscanner"
 35        AXIOSCAN7 = "axioscan7"
 36
 37    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 38    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 39
 40    METADATA_FILE_NAME = {
 41        Type.AXIOSCAN7: "scan.yaml",
 42        Type.BZSCANNER: "slideinfo.txt",
 43    }
 44    STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
 45    DATETIME_FORMAT = {
 46        Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT,
 47        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 48    }
 49
 50    # Actual channel names, from the BZScanner's default order
 51    BZSCANNER_CHANNEL_MAP = {
 52        "DAPI": "DAPI",
 53        "TRITC": "AF555",
 54        "CY5": "AF647",
 55        "BF": "BRIGHT",
 56        "FITC": "AF488",
 57    }
 58
 59    class Channel(yaml.YAMLObject):
 60        """
 61        Class that comprises a channel; we usually have multiple (2-5) per scan.
 62        Contains three fields:
 63        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 64        - exposure_ms: the exposure time to capture a frame in milliseconds
 65        - intensity: the light intensity used OR the gain applied to the channel
 66        """
 67
 68        yaml_tag = "csi_images.csi_scans.Scan.Channel"
 69
 70        def __init__(
 71            self,
 72            name: str = "",
 73            exposure_ms: float = -1.0,
 74            intensity: float = -1.0,
 75            gain_applied: bool = False,
 76        ):
 77            self.name = name
 78            self.exposure_ms = exposure_ms
 79            self.intensity = intensity
 80            self.gain_applied = gain_applied
 81
 82        def __repr__(self):
 83            return yaml.dump(self, sort_keys=False)
 84
 85        def __eq__(self, other):
 86            return self.__repr__() == other.__repr__()
 87
 88    class ROI(yaml.YAMLObject):
 89        """
 90        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 91        """
 92
 93        yaml_tag = "csi_images.csi_scans.Scan.ROI"
 94
 95        def __init__(
 96            self,
 97            origin_x_um: int = -1,
 98            origin_y_um: int = -1,
 99            width_um: int = -1,
100            height_um: int = -1,
101            tile_rows: int = -1,
102            tile_cols: int = -1,
103            focus_points=None,
104        ):
105            if focus_points is None:
106                focus_points = []
107            self.origin_x_um = origin_x_um
108            self.origin_y_um = origin_y_um
109            self.width_um = width_um
110            self.height_um = height_um
111            self.tile_rows = tile_rows
112            self.tile_cols = tile_cols
113            self.focus_points = focus_points
114
115        def __repr__(self):
116            return yaml.dump(self, sort_keys=False)
117
118        def __eq__(self, other):
119            return self.__repr__() == other.__repr__()
120
121        def similar(self, other):
122            return (
123                self.origin_y_um == other.origin_y_um
124                and self.origin_x_um == other.origin_x_um
125                and self.width_um == other.width_um
126                and self.height_um == other.height_um
127                and self.tile_rows == other.tile_rows
128                and self.tile_cols == other.tile_cols
129            )
130
131    def __init__(
132        self,
133        slide_id: str = "",
134        scanner_id: str = "",
135        path: str = "",
136        exists: bool = True,
137        start_datetime: str = "",
138        end_datetime: str = "",
139        scan_time_s: int = -1,
140        tray_pos: int = -1,
141        slide_pos: int = -1,
142        camera: str = "",
143        objective: str = "",
144        pixel_size_um: float = -1.0,
145        tile_width_px: int = -1,
146        tile_height_px: int = -1,
147        tile_x_offset_px: int = -1,
148        tile_y_offset_px: int = -1,
149        tile_overlap_proportion: int = -1,
150        channels: list[Channel] = None,
151        roi: list[ROI] = None,
152    ):
153        if roi is None:
154            roi = []
155        if channels is None:
156            channels = []
157        self.slide_id = slide_id
158        self.scanner_id = scanner_id
159        self.path = path
160        self.exists = exists
161        self.start_datetime = start_datetime
162        self.end_datetime = end_datetime
163        self.scan_time_s = scan_time_s
164        self.tray_pos = tray_pos
165        self.slide_pos = slide_pos
166        self.camera = camera
167        self.objective = objective
168        self.pixel_size_um = pixel_size_um
169        self.tile_width_px = tile_width_px
170        self.tile_height_px = tile_height_px
171        self.tile_x_offset_px = tile_x_offset_px
172        self.tile_y_offset_px = tile_y_offset_px
173        self.tile_overlap_proportion = tile_overlap_proportion
174        self.channels = channels
175        self.roi = roi
176
177    def __key(self):
178        return (
179            self.slide_id,
180            self.scanner_id,
181            self.path,
182            self.exists,
183            self.start_datetime,
184            self.end_datetime,
185            self.scan_time_s,
186            self.tray_pos,
187            self.slide_pos,
188            self.camera,
189            self.objective,
190            self.pixel_size_um,
191            self.tile_width_px,
192            self.tile_height_px,
193            self.tile_overlap_proportion,
194            tuple(self.channels),
195            tuple(self.roi),
196        )
197
198    def __hash__(self):
199        return hash(self.__key())
200
201    def __repr__(self):
202        return yaml.dump(self, sort_keys=False)
203
204    def __eq__(self, other):
205        return self.__repr__() == other.__repr__()
206
207    def has_same_profile(self, other):
208        return (
209            self.camera == other.camera
210            and self.objective == other.objective
211            and self.pixel_size_um == other.pixel_size_um
212            and self.tile_width_px == other.tile_width_px
213            and self.tile_height_px == other.tile_height_px
214            and self.tile_x_offset_px == other.tile_x_offset_px
215            and self.tile_y_offset_px == other.tile_y_offset_px
216            and self.tile_overlap_proportion == other.tile_overlap_proportion
217            and self.channels == other.channels
218            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
219        )
220
221    def get_channel_names(self) -> list[str]:
222        """
223        Get the channel names in the scan's channel order.
224        :return: a list of channel names.
225        """
226        return [channel.name for channel in self.channels]
227
228    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
229        """
230        Given a list of channel names, return the corresponding indices in the scan's
231        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
232        actual AlexaFluor names (AF555, AF647, AF488).
233        If a list entry is not found or None, it will return -1 for that entry.
234        :param channel_names: a list of channel names.
235        :return: a list of channel indices.
236        """
237        # Get the scan's channel name list
238        scan_channel_names = self.get_channel_names()
239
240        channel_indices = []
241        for name in channel_names:
242            # Convert any BZScanner channel names to the actual channel names
243            if name in self.BZSCANNER_CHANNEL_MAP:
244                name = self.BZSCANNER_CHANNEL_MAP[name]
245
246            # Append the corresponding index if possible
247            if name in scan_channel_names:
248                channel_indices.append(scan_channel_names.index(name))
249            else:
250                channel_indices.append(-1)
251        return channel_indices
252
253    def get_image_size(self) -> tuple[int, int]:
254        """
255        Get the real size of the image in pixels after subtracting overlap.
256        :return: a tuple of (real_height, real_width) for easy comparison to arrays
257        """
258        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
259        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
260        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
261
262    def save_yaml(self, output_path: str):
263        """
264        Write the Scan object to a .yaml file.
265        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
266        :return: nothing; will raise an error on failure
267        """
268        # Create necessary folders
269        output_path = os.path.abspath(output_path)
270        if os.path.splitext(output_path)[1] == ".yaml":
271            os.makedirs(os.path.dirname(output_path), exist_ok=True)
272        else:
273            os.makedirs(output_path, exist_ok=True)
274            # Add the standard metadata file name to the path if needed
275            output_path = os.path.join(
276                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
277            )
278
279        # Populate the file
280        with open(output_path, "w") as file:
281            yaml.dump(self, stream=file, sort_keys=False)
282
283    @classmethod
284    def load_yaml(cls, input_path: str) -> Self:
285        """
286        Load a Scan object from a .yaml file.
287        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
288        :return: a Scan object
289        """
290        input_path = os.path.abspath(input_path)
291        if os.path.isdir(input_path):
292            input_path = os.path.join(
293                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
294            )
295        with open(input_path, "r") as file:
296            metadata_obj = yaml.load(file, Loader=yaml.Loader)
297        return metadata_obj
298
299    def to_dict(self) -> dict:
300        """
301        Convert the Scan object to a dictionary with keys matching database columns
302        and values matching database entries
303        :return: a dictionary
304        """
305        # Dump to json; then add indents and a top-level key
306        channels_json = json.dumps(
307            self.channels, default=lambda x: x.__dict__, indent=2
308        )
309        channels_json = "  ".join(channels_json.splitlines(True))
310        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
311
312        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
313        roi_json = "  ".join(roi_json.splitlines(True))
314        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
315
316        # Keys are named the same as database columns
317        return {
318            "scanner_id": self.scanner_id,
319            "slide_id": self.slide_id,
320            "exists": self.exists,
321            "path": self.path,
322            "start_datetime": self.start_datetime,
323            "end_datetime": self.end_datetime,
324            "scan_time_s": self.scan_time_s,
325            "tray_pos": self.tray_pos,
326            "slide_pos": self.slide_pos,
327            "tile_width": self.tile_width_px,
328            "tile_height": self.tile_height_px,
329            "tile_x_offset": self.tile_x_offset_px,
330            "tile_y_offset": self.tile_y_offset_px,
331            "tile_overlap": self.tile_overlap_proportion,
332            "camera": self.camera,
333            "objective": self.objective,
334            "pixel_size": self.pixel_size_um,
335            "channels": channels_json,
336            "roi": roi_json,
337        }
338
339    @classmethod
340    def from_dict(cls, scan_dict) -> Self:
341        """
342        Convert a dictionary from to_dict() or the database to a Scan object
343        :param scan_dict: a dictionary
344        :return: a Scan object
345        """
346        result = cls(
347            scanner_id=scan_dict["scanner_id"],
348            slide_id=scan_dict["slide_id"],
349            path=scan_dict["path"],
350            exists=scan_dict["exists"],
351            start_datetime=scan_dict["start_datetime"],
352            end_datetime=scan_dict["end_datetime"],
353            scan_time_s=scan_dict["scan_time_s"],
354            tray_pos=scan_dict["tray_pos"],
355            slide_pos=scan_dict["slide_pos"],
356            camera=scan_dict["camera"],
357            objective=scan_dict["objective"],
358            pixel_size_um=scan_dict["pixel_size"],
359            tile_width_px=scan_dict["tile_width"],
360            tile_height_px=scan_dict["tile_height"],
361            tile_x_offset_px=scan_dict["tile_x_offset"],
362            tile_y_offset_px=scan_dict["tile_y_offset"],
363            tile_overlap_proportion=scan_dict["tile_overlap"],
364        )
365        # Handle JSON and dictionaries
366        if isinstance(scan_dict["channels"], str):
367            channels_dict = json.loads(scan_dict["channels"])["data"]
368        else:
369            channels_dict = scan_dict["channels"]["data"]
370        for channel in channels_dict:
371            result.channels.append(
372                cls.Channel(
373                    name=channel["name"],
374                    exposure_ms=channel["exposure_ms"],
375                    intensity=channel["intensity"],
376                    gain_applied=channel["gain_applied"],
377                )
378            )
379        # Handle JSON and dictionaries
380        if isinstance(scan_dict["channels"], str):
381            roi_dict = json.loads(scan_dict["roi"])["data"]
382        else:
383            roi_dict = scan_dict["roi"]["data"]
384        for roi in roi_dict:
385            result.roi.append(
386                cls.ROI(
387                    origin_x_um=roi["origin_x_um"],
388                    origin_y_um=roi["origin_y_um"],
389                    width_um=roi["width_um"],
390                    height_um=roi["height_um"],
391                    tile_rows=roi["tile_rows"],
392                    tile_cols=roi["tile_cols"],
393                    focus_points=roi["focus_points"],
394                )
395            )
396        return result
397
398    @classmethod
399    def load_czi(cls, input_path: str) -> Self:
400        """
401        Extracts metadata from a .czi file, which is the output of the Axioscan
402        :param input_path: the path to the .czi file
403        :return: a Scan object
404        """
405        if aicspylibczi is None:
406            raise ModuleNotFoundError(
407                "aicspylibczi library not installed. "
408                "Install csi-images with [imageio] option to resolve."
409            )
410
411        # Normalize paths
412        input_path = os.path.abspath(input_path)
413
414        with open(input_path, "rb") as file:
415            # Read in metadata as XML elements
416            metadata_xml = aicspylibczi.CziFile(file).meta
417            # Read in shape metadata from binary
418            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
419
420        # Populate metadata
421        scan = cls()
422
423        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
424        if scan.slide_id is not None:
425            scan.slide_id = scan.slide_id.strip().upper()
426        # Map the raw scanner ID (service ID) to our IDs
427        scan.scanner_id = cls.SCANNER_IDS[
428            metadata_xml.find(".//Microscope/UserDefinedName").text
429        ]
430
431        # Extract start and finish datetimes
432        date = metadata_xml.find(".//Document/CreationDate").text
433        # Strip out sub-second precision
434        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
435        date_as_datetime = datetime.datetime.strptime(
436            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
437        )
438        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
439        scan.scan_time_s = round(
440            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
441        )
442        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
443        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
444
445        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
446        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
447
448        # Get camera and magnifying info
449        scan.camera = (
450            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
451        )["Name"]
452        magnification = metadata_xml.find(
453            ".//Objectives/Objective/NominalMagnification"
454        )
455        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
456        scan.objective = f"{magnification.text}x-{aperture.text}"
457        scan.pixel_size_um = (
458            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
459        )
460        # Round off the pixel size to nanometers; might not be optimal, but this
461        # gets rounded when we send it to the database anyways (to 7 places)
462        scan.pixel_size_um = round(scan.pixel_size_um, 3)
463
464        # Get tile information
465        # Note: X Y is untested, could be flipped. I always forget. Just don't use
466        # non-square frames and we're all good.
467        selected_detector = metadata_xml.find(".//SelectedDetector").text
468        detectors = metadata_xml.findall(".//Detectors/Detector")
469        for detector in detectors:
470            if detector.attrib["Id"] == selected_detector:
471                tile_info = detector.find(".//Frame")
472                break
473        # Convert to integers
474        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
475
476        scan.tile_x_offset_px = tile_info[0]
477        scan.tile_y_offset_px = tile_info[1]
478        scan.tile_width_px = tile_info[2]
479        scan.tile_height_px = tile_info[3]
480        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
481
482        # Extract channels and create Channel objects from them
483        channel_indices = []
484        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
485            channel_indices.append(int(channel.attrib["Id"][-1]))
486            intensity_xml = channel.find(".//Intensity")
487            if intensity_xml is None:
488                intensity = 0
489            else:
490                intensity = float(intensity_xml.text[:-2]) * 1e-2
491            scan.channels.append(
492                cls.Channel(
493                    name=channel.attrib["Name"].upper(),
494                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
495                    intensity=intensity,
496                    gain_applied=True,  # In Axioscan, we will always use gain = 1
497                )
498            )
499        # Make sure the channels are sorted
500        scan.channels = [
501            channel for _, channel in sorted(zip(channel_indices, scan.channels))
502        ]
503        # Verify that the shape corresponds to the channels
504        for roi in rois_shape:
505            if roi["C"][1] != len(scan.channels):
506                raise ValueError(
507                    f"Number of channels {len(scan.channels)} "
508                    f"is not the same as the number of channels in an ROI: "
509                    f"{roi['C'][1]}"
510                )
511
512        # Get the real ROI limits; the metadata is not always correct
513        limits_xml = metadata_xml.findall(".//AllowedScanArea")
514        limits = [
515            round(float(limits_xml[0].find("Center").text.split(",")[0])),
516            round(float(limits_xml[0].find("Center").text.split(",")[1])),
517            round(float(limits_xml[0].find("Size").text.split(",")[0])),
518            round(float(limits_xml[0].find("Size").text.split(",")[1])),
519        ]
520        # Convert to top-left and bottom-right
521        limits = [
522            round(limits[0] - limits[2] / 2),
523            round(limits[1] - limits[3] / 2),
524            round(limits[0] + limits[2] / 2),
525            round(limits[1] + limits[3] / 2),
526        ]
527
528        # Extract ROIs and create ROI objects from them
529        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
530        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
531        if len(rois_xml_metadata) != len(rois_shape):
532            raise ValueError(
533                f"Metadata and binary data from {input_path} "
534                f"do not match in number of ROIs"
535            )
536        # We need both to determine the number of rows/columns because the XML lies
537        roi_indices = []
538        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
539            name = roi_xml.attrib["Name"]
540            # Determine the index of this scene
541            scene_index = -1
542            for scene in scenes_xml_metadata:
543                if scene.attrib["Name"] == name:
544                    scene_index = int(scene.attrib["Index"])
545                    break
546            if scene_index == -1:
547                raise ValueError(f"ROI {name} does not correspond to any scenes")
548            else:
549                roi_indices.append(scene_index)
550            # Extract other metadata
551            roi_limits = [
552                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
553                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
554                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
555                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
556            ]
557            # Convert to top-left and bottom-right
558            roi_limits = [
559                round(roi_limits[0] - roi_limits[2] / 2),
560                round(roi_limits[1] - roi_limits[3] / 2),
561                round(roi_limits[0] + roi_limits[2] / 2),
562                round(roi_limits[1] + roi_limits[3] / 2),
563            ]
564            # Bound the ROI to the actual scan limits
565            roi_limits = [
566                max(roi_limits[0], limits[0]),
567                max(roi_limits[1], limits[1]),
568                min(roi_limits[2], limits[2]),
569                min(roi_limits[3], limits[3]),
570            ]
571
572            tile_rows = int(roi_xml.find("Rows").text)
573            # Current best way of reliably extracting; <Columns> entry can be wrong
574            if (roi_shape["M"][1] % tile_rows) != 0:
575                raise ValueError(
576                    f"The number of tiles {roi_shape['M'][1]} is not "
577                    f"divisible by the tile rows {tile_rows}; metadata "
578                    f"must be messed up. Thanks Zeiss"
579                )
580            else:
581                tile_cols = int(roi_shape["M"][1] / tile_rows)
582            # Support points are actually the relevant focus points for this ROI
583            focus_points = []
584            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
585                focus_points.append(
586                    [
587                        int(float(focus_point.find("X").text)),
588                        int(float(focus_point.find("Y").text)),
589                        int(float(focus_point.find("Z").text)),
590                    ]
591                )
592            # Strip all sub-micron precision, it does not matter
593            scan.roi.append(
594                cls.ROI(
595                    origin_x_um=roi_limits[0],
596                    origin_y_um=roi_limits[1],
597                    width_um=roi_limits[2] - roi_limits[0],
598                    height_um=roi_limits[3] - roi_limits[1],
599                    tile_rows=tile_rows,
600                    tile_cols=tile_cols,
601                    focus_points=focus_points,
602                )
603            )
604        # Sort based on the scene indices
605        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
606
607        return scan
608
609    @classmethod
610    def load_txt(cls, input_path: str) -> Self:
611        """
612        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
613        from the BZScanner. Some metadata is filled in or adjusted to fit
614        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
615        :return: a Scan object
616        """
617        # Set paths
618        input_path = os.path.abspath(input_path)
619        if os.path.isdir(input_path):
620            input_path = os.path.join(
621                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
622            )
623
624        # Read in metadata as a dict
625        with open(input_path, "r") as file:
626            metadata_contents = file.read()
627            # Read each line, splitting on the = sign
628            metadata_dict = {}
629            for line in metadata_contents.splitlines():
630                key, value = line.split("=")
631                metadata_dict[key] = value
632
633        # Populate metadata
634        scan = cls()
635
636        scan.slide_id = metadata_dict["SLIDEID"]
637        scan.slide_id = scan.slide_id.strip().upper()
638
639        scan.path = metadata_dict["SLIDEDIR"]
640
641        # Extract start and finish datetimes
642        date = metadata_dict["DATE"]
643        date_as_datetime = datetime.datetime.strptime(
644            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
645        )
646        date_as_datetime = date_as_datetime.astimezone(
647            zoneinfo.ZoneInfo("America/Los_Angeles")
648        )  # Hardcoded because BZScanners are here
649        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
650        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
651        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
652        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
653
654        # Map the raw scanner ID (service ID) to our IDs
655        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
656        scan.tray_pos = 0  # only one tray_pos in a BZScanner
657        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
658
659        # Get camera and magnifying info
660        scan.camera = ""
661        magnification = 10
662        aperture = 0  # TODO: find the actual aperture
663        scan.objective = f"{magnification}x-{aperture}"
664        scan.pixel_size_um = 0.591  # Estimated from image metadata
665
666        # Get tile information
667        scan.tile_width_px = 1362  # Known from image metadata
668        scan.tile_height_px = 1004  # Known from image metadata
669        scan.tile_x_offset_px = 0  # Already removed
670        scan.tile_y_offset_px = 0  # Already removed
671        scan.tile_overlap_proportion = 0  # Already removed
672
673        # Extract channels and create Channel objects from them
674        if "gain_applied" in metadata_dict:
675            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
676        else:
677            gain_applied = True  # Previous policy was always to apply gains
678        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
679            channel_settings = metadata_dict[channel].split(",")
680            if channel_settings[0] == "0":
681                continue
682            scan.channels.append(
683                cls.Channel(
684                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
685                    exposure_ms=float(channel_settings[1]),
686                    intensity=float(channel_settings[2]),
687                    gain_applied=gain_applied,
688                )
689            )
690
691        # Get focus points
692        focus_points = []
693        for i in range(33):
694            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
695            if focus_point[0] == "0":
696                break
697            focus_points.append(
698                [
699                    int(float(focus_point[1])),
700                    int(float(focus_point[2])),
701                    int(float(focus_point[3])),
702                ]
703            )
704
705        # In the BZScanner, the slide is vertical instead of horizontal
706        # We put in nominal values for the ROI, which is oriented vertically as well
707        tile_rows = 96
708        tile_cols = 24
709        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
710        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
711        origin_x_um = 2500 + round((20000 - roi_width) / 2)
712        origin_y_um = 2500 + round((58000 - roi_height) / 2)
713        scan.roi.append(
714            cls.ROI(
715                origin_x_um=origin_x_um,
716                origin_y_um=origin_y_um,
717                width_um=roi_width,
718                height_um=roi_height,
719                tile_rows=tile_rows,
720                tile_cols=tile_cols,
721                focus_points=focus_points,
722            )
723        )
724        return scan
725
726    @classmethod
727    def load_from_folder(cls, input_path: str) -> Self:
728        """
729        Load a Scan object from a folder that contains defaultly-named metadata files,
730        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
731        :param input_path: /path/to/folder
732        :return: a Scan object
733        """
734        input_path = os.path.abspath(input_path)
735        if os.path.isfile(
736            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
737        ):
738            return cls.load_yaml(input_path)
739        elif os.path.isfile(
740            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
741        ):
742            return cls.load_txt(input_path)
743        else:
744            raise ValueError(
745                f"No scan metadata files "
746                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
747                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
748                f"{input_path}"
749            )
750        pass
751
752    @classmethod
753    def make_placeholder(
754        cls,
755        slide_id: str,
756        n_tile: int = 2303,
757        n_roi: int = 0,
758        scanner_type: Type = Type.BZSCANNER,
759    ) -> Self:
760        """
761        Make a placeholder Scan object with only basic required information filled in.
762        :param slide_id: the slide ID
763        :param n_tile: the number of this tile, which will become the number of
764                       tiles in the scan
765        :param n_roi: the number of ROIs in the scan
766        :return: a Scan object
767        """
768        # Sanitize inputs here
769        slide_id = str(slide_id).strip().upper()
770        n_tile = int(n_tile)
771        n_roi = int(n_roi)
772        # Generate the object
773        scan = cls()
774        scan.slide_id = slide_id
775        if scanner_type == cls.Type.AXIOSCAN7:
776            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
777        elif scanner_type == cls.Type.BZSCANNER:
778            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
779        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
780        scan.roi[0].tile_rows = 1
781        scan.roi[0].tile_cols = n_tile + 1
782        return scan
class Scan(yaml.YAMLObject):
 24class Scan(yaml.YAMLObject):
 25    """
 26    Class that composes a whole scan's metadata. Contains some universal data,
 27    plus lists for channels and ROIs.
 28
 29    .. include:: ../docs/coordinate_systems.md
 30    """
 31
 32    yaml_tag = "csi_images.csi_scans.Scan"
 33
 34    class Type(enum.Enum):
 35        BZSCANNER = "bzscanner"
 36        AXIOSCAN7 = "axioscan7"
 37
 38    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 39    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 40
 41    METADATA_FILE_NAME = {
 42        Type.AXIOSCAN7: "scan.yaml",
 43        Type.BZSCANNER: "slideinfo.txt",
 44    }
 45    STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z"
 46    DATETIME_FORMAT = {
 47        Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT,
 48        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 49    }
 50
 51    # Actual channel names, from the BZScanner's default order
 52    BZSCANNER_CHANNEL_MAP = {
 53        "DAPI": "DAPI",
 54        "TRITC": "AF555",
 55        "CY5": "AF647",
 56        "BF": "BRIGHT",
 57        "FITC": "AF488",
 58    }
 59
 60    class Channel(yaml.YAMLObject):
 61        """
 62        Class that comprises a channel; we usually have multiple (2-5) per scan.
 63        Contains three fields:
 64        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 65        - exposure_ms: the exposure time to capture a frame in milliseconds
 66        - intensity: the light intensity used OR the gain applied to the channel
 67        """
 68
 69        yaml_tag = "csi_images.csi_scans.Scan.Channel"
 70
 71        def __init__(
 72            self,
 73            name: str = "",
 74            exposure_ms: float = -1.0,
 75            intensity: float = -1.0,
 76            gain_applied: bool = False,
 77        ):
 78            self.name = name
 79            self.exposure_ms = exposure_ms
 80            self.intensity = intensity
 81            self.gain_applied = gain_applied
 82
 83        def __repr__(self):
 84            return yaml.dump(self, sort_keys=False)
 85
 86        def __eq__(self, other):
 87            return self.__repr__() == other.__repr__()
 88
 89    class ROI(yaml.YAMLObject):
 90        """
 91        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 92        """
 93
 94        yaml_tag = "csi_images.csi_scans.Scan.ROI"
 95
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
115
116        def __repr__(self):
117            return yaml.dump(self, sort_keys=False)
118
119        def __eq__(self, other):
120            return self.__repr__() == other.__repr__()
121
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )
131
132    def __init__(
133        self,
134        slide_id: str = "",
135        scanner_id: str = "",
136        path: str = "",
137        exists: bool = True,
138        start_datetime: str = "",
139        end_datetime: str = "",
140        scan_time_s: int = -1,
141        tray_pos: int = -1,
142        slide_pos: int = -1,
143        camera: str = "",
144        objective: str = "",
145        pixel_size_um: float = -1.0,
146        tile_width_px: int = -1,
147        tile_height_px: int = -1,
148        tile_x_offset_px: int = -1,
149        tile_y_offset_px: int = -1,
150        tile_overlap_proportion: int = -1,
151        channels: list[Channel] = None,
152        roi: list[ROI] = None,
153    ):
154        if roi is None:
155            roi = []
156        if channels is None:
157            channels = []
158        self.slide_id = slide_id
159        self.scanner_id = scanner_id
160        self.path = path
161        self.exists = exists
162        self.start_datetime = start_datetime
163        self.end_datetime = end_datetime
164        self.scan_time_s = scan_time_s
165        self.tray_pos = tray_pos
166        self.slide_pos = slide_pos
167        self.camera = camera
168        self.objective = objective
169        self.pixel_size_um = pixel_size_um
170        self.tile_width_px = tile_width_px
171        self.tile_height_px = tile_height_px
172        self.tile_x_offset_px = tile_x_offset_px
173        self.tile_y_offset_px = tile_y_offset_px
174        self.tile_overlap_proportion = tile_overlap_proportion
175        self.channels = channels
176        self.roi = roi
177
178    def __key(self):
179        return (
180            self.slide_id,
181            self.scanner_id,
182            self.path,
183            self.exists,
184            self.start_datetime,
185            self.end_datetime,
186            self.scan_time_s,
187            self.tray_pos,
188            self.slide_pos,
189            self.camera,
190            self.objective,
191            self.pixel_size_um,
192            self.tile_width_px,
193            self.tile_height_px,
194            self.tile_overlap_proportion,
195            tuple(self.channels),
196            tuple(self.roi),
197        )
198
199    def __hash__(self):
200        return hash(self.__key())
201
202    def __repr__(self):
203        return yaml.dump(self, sort_keys=False)
204
205    def __eq__(self, other):
206        return self.__repr__() == other.__repr__()
207
208    def has_same_profile(self, other):
209        return (
210            self.camera == other.camera
211            and self.objective == other.objective
212            and self.pixel_size_um == other.pixel_size_um
213            and self.tile_width_px == other.tile_width_px
214            and self.tile_height_px == other.tile_height_px
215            and self.tile_x_offset_px == other.tile_x_offset_px
216            and self.tile_y_offset_px == other.tile_y_offset_px
217            and self.tile_overlap_proportion == other.tile_overlap_proportion
218            and self.channels == other.channels
219            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
220        )
221
222    def get_channel_names(self) -> list[str]:
223        """
224        Get the channel names in the scan's channel order.
225        :return: a list of channel names.
226        """
227        return [channel.name for channel in self.channels]
228
229    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
230        """
231        Given a list of channel names, return the corresponding indices in the scan's
232        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
233        actual AlexaFluor names (AF555, AF647, AF488).
234        If a list entry is not found or None, it will return -1 for that entry.
235        :param channel_names: a list of channel names.
236        :return: a list of channel indices.
237        """
238        # Get the scan's channel name list
239        scan_channel_names = self.get_channel_names()
240
241        channel_indices = []
242        for name in channel_names:
243            # Convert any BZScanner channel names to the actual channel names
244            if name in self.BZSCANNER_CHANNEL_MAP:
245                name = self.BZSCANNER_CHANNEL_MAP[name]
246
247            # Append the corresponding index if possible
248            if name in scan_channel_names:
249                channel_indices.append(scan_channel_names.index(name))
250            else:
251                channel_indices.append(-1)
252        return channel_indices
253
254    def get_image_size(self) -> tuple[int, int]:
255        """
256        Get the real size of the image in pixels after subtracting overlap.
257        :return: a tuple of (real_height, real_width) for easy comparison to arrays
258        """
259        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
260        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
261        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
262
263    def save_yaml(self, output_path: str):
264        """
265        Write the Scan object to a .yaml file.
266        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
267        :return: nothing; will raise an error on failure
268        """
269        # Create necessary folders
270        output_path = os.path.abspath(output_path)
271        if os.path.splitext(output_path)[1] == ".yaml":
272            os.makedirs(os.path.dirname(output_path), exist_ok=True)
273        else:
274            os.makedirs(output_path, exist_ok=True)
275            # Add the standard metadata file name to the path if needed
276            output_path = os.path.join(
277                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
278            )
279
280        # Populate the file
281        with open(output_path, "w") as file:
282            yaml.dump(self, stream=file, sort_keys=False)
283
284    @classmethod
285    def load_yaml(cls, input_path: str) -> Self:
286        """
287        Load a Scan object from a .yaml file.
288        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
289        :return: a Scan object
290        """
291        input_path = os.path.abspath(input_path)
292        if os.path.isdir(input_path):
293            input_path = os.path.join(
294                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
295            )
296        with open(input_path, "r") as file:
297            metadata_obj = yaml.load(file, Loader=yaml.Loader)
298        return metadata_obj
299
300    def to_dict(self) -> dict:
301        """
302        Convert the Scan object to a dictionary with keys matching database columns
303        and values matching database entries
304        :return: a dictionary
305        """
306        # Dump to json; then add indents and a top-level key
307        channels_json = json.dumps(
308            self.channels, default=lambda x: x.__dict__, indent=2
309        )
310        channels_json = "  ".join(channels_json.splitlines(True))
311        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
312
313        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
314        roi_json = "  ".join(roi_json.splitlines(True))
315        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
316
317        # Keys are named the same as database columns
318        return {
319            "scanner_id": self.scanner_id,
320            "slide_id": self.slide_id,
321            "exists": self.exists,
322            "path": self.path,
323            "start_datetime": self.start_datetime,
324            "end_datetime": self.end_datetime,
325            "scan_time_s": self.scan_time_s,
326            "tray_pos": self.tray_pos,
327            "slide_pos": self.slide_pos,
328            "tile_width": self.tile_width_px,
329            "tile_height": self.tile_height_px,
330            "tile_x_offset": self.tile_x_offset_px,
331            "tile_y_offset": self.tile_y_offset_px,
332            "tile_overlap": self.tile_overlap_proportion,
333            "camera": self.camera,
334            "objective": self.objective,
335            "pixel_size": self.pixel_size_um,
336            "channels": channels_json,
337            "roi": roi_json,
338        }
339
340    @classmethod
341    def from_dict(cls, scan_dict) -> Self:
342        """
343        Convert a dictionary from to_dict() or the database to a Scan object
344        :param scan_dict: a dictionary
345        :return: a Scan object
346        """
347        result = cls(
348            scanner_id=scan_dict["scanner_id"],
349            slide_id=scan_dict["slide_id"],
350            path=scan_dict["path"],
351            exists=scan_dict["exists"],
352            start_datetime=scan_dict["start_datetime"],
353            end_datetime=scan_dict["end_datetime"],
354            scan_time_s=scan_dict["scan_time_s"],
355            tray_pos=scan_dict["tray_pos"],
356            slide_pos=scan_dict["slide_pos"],
357            camera=scan_dict["camera"],
358            objective=scan_dict["objective"],
359            pixel_size_um=scan_dict["pixel_size"],
360            tile_width_px=scan_dict["tile_width"],
361            tile_height_px=scan_dict["tile_height"],
362            tile_x_offset_px=scan_dict["tile_x_offset"],
363            tile_y_offset_px=scan_dict["tile_y_offset"],
364            tile_overlap_proportion=scan_dict["tile_overlap"],
365        )
366        # Handle JSON and dictionaries
367        if isinstance(scan_dict["channels"], str):
368            channels_dict = json.loads(scan_dict["channels"])["data"]
369        else:
370            channels_dict = scan_dict["channels"]["data"]
371        for channel in channels_dict:
372            result.channels.append(
373                cls.Channel(
374                    name=channel["name"],
375                    exposure_ms=channel["exposure_ms"],
376                    intensity=channel["intensity"],
377                    gain_applied=channel["gain_applied"],
378                )
379            )
380        # Handle JSON and dictionaries
381        if isinstance(scan_dict["channels"], str):
382            roi_dict = json.loads(scan_dict["roi"])["data"]
383        else:
384            roi_dict = scan_dict["roi"]["data"]
385        for roi in roi_dict:
386            result.roi.append(
387                cls.ROI(
388                    origin_x_um=roi["origin_x_um"],
389                    origin_y_um=roi["origin_y_um"],
390                    width_um=roi["width_um"],
391                    height_um=roi["height_um"],
392                    tile_rows=roi["tile_rows"],
393                    tile_cols=roi["tile_cols"],
394                    focus_points=roi["focus_points"],
395                )
396            )
397        return result
398
399    @classmethod
400    def load_czi(cls, input_path: str) -> Self:
401        """
402        Extracts metadata from a .czi file, which is the output of the Axioscan
403        :param input_path: the path to the .czi file
404        :return: a Scan object
405        """
406        if aicspylibczi is None:
407            raise ModuleNotFoundError(
408                "aicspylibczi library not installed. "
409                "Install csi-images with [imageio] option to resolve."
410            )
411
412        # Normalize paths
413        input_path = os.path.abspath(input_path)
414
415        with open(input_path, "rb") as file:
416            # Read in metadata as XML elements
417            metadata_xml = aicspylibczi.CziFile(file).meta
418            # Read in shape metadata from binary
419            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
420
421        # Populate metadata
422        scan = cls()
423
424        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
425        if scan.slide_id is not None:
426            scan.slide_id = scan.slide_id.strip().upper()
427        # Map the raw scanner ID (service ID) to our IDs
428        scan.scanner_id = cls.SCANNER_IDS[
429            metadata_xml.find(".//Microscope/UserDefinedName").text
430        ]
431
432        # Extract start and finish datetimes
433        date = metadata_xml.find(".//Document/CreationDate").text
434        # Strip out sub-second precision
435        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
436        date_as_datetime = datetime.datetime.strptime(
437            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
438        )
439        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
440        scan.scan_time_s = round(
441            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
442        )
443        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
444        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
445
446        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
447        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
448
449        # Get camera and magnifying info
450        scan.camera = (
451            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
452        )["Name"]
453        magnification = metadata_xml.find(
454            ".//Objectives/Objective/NominalMagnification"
455        )
456        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
457        scan.objective = f"{magnification.text}x-{aperture.text}"
458        scan.pixel_size_um = (
459            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
460        )
461        # Round off the pixel size to nanometers; might not be optimal, but this
462        # gets rounded when we send it to the database anyways (to 7 places)
463        scan.pixel_size_um = round(scan.pixel_size_um, 3)
464
465        # Get tile information
466        # Note: X Y is untested, could be flipped. I always forget. Just don't use
467        # non-square frames and we're all good.
468        selected_detector = metadata_xml.find(".//SelectedDetector").text
469        detectors = metadata_xml.findall(".//Detectors/Detector")
470        for detector in detectors:
471            if detector.attrib["Id"] == selected_detector:
472                tile_info = detector.find(".//Frame")
473                break
474        # Convert to integers
475        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
476
477        scan.tile_x_offset_px = tile_info[0]
478        scan.tile_y_offset_px = tile_info[1]
479        scan.tile_width_px = tile_info[2]
480        scan.tile_height_px = tile_info[3]
481        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
482
483        # Extract channels and create Channel objects from them
484        channel_indices = []
485        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
486            channel_indices.append(int(channel.attrib["Id"][-1]))
487            intensity_xml = channel.find(".//Intensity")
488            if intensity_xml is None:
489                intensity = 0
490            else:
491                intensity = float(intensity_xml.text[:-2]) * 1e-2
492            scan.channels.append(
493                cls.Channel(
494                    name=channel.attrib["Name"].upper(),
495                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
496                    intensity=intensity,
497                    gain_applied=True,  # In Axioscan, we will always use gain = 1
498                )
499            )
500        # Make sure the channels are sorted
501        scan.channels = [
502            channel for _, channel in sorted(zip(channel_indices, scan.channels))
503        ]
504        # Verify that the shape corresponds to the channels
505        for roi in rois_shape:
506            if roi["C"][1] != len(scan.channels):
507                raise ValueError(
508                    f"Number of channels {len(scan.channels)} "
509                    f"is not the same as the number of channels in an ROI: "
510                    f"{roi['C'][1]}"
511                )
512
513        # Get the real ROI limits; the metadata is not always correct
514        limits_xml = metadata_xml.findall(".//AllowedScanArea")
515        limits = [
516            round(float(limits_xml[0].find("Center").text.split(",")[0])),
517            round(float(limits_xml[0].find("Center").text.split(",")[1])),
518            round(float(limits_xml[0].find("Size").text.split(",")[0])),
519            round(float(limits_xml[0].find("Size").text.split(",")[1])),
520        ]
521        # Convert to top-left and bottom-right
522        limits = [
523            round(limits[0] - limits[2] / 2),
524            round(limits[1] - limits[3] / 2),
525            round(limits[0] + limits[2] / 2),
526            round(limits[1] + limits[3] / 2),
527        ]
528
529        # Extract ROIs and create ROI objects from them
530        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
531        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
532        if len(rois_xml_metadata) != len(rois_shape):
533            raise ValueError(
534                f"Metadata and binary data from {input_path} "
535                f"do not match in number of ROIs"
536            )
537        # We need both to determine the number of rows/columns because the XML lies
538        roi_indices = []
539        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
540            name = roi_xml.attrib["Name"]
541            # Determine the index of this scene
542            scene_index = -1
543            for scene in scenes_xml_metadata:
544                if scene.attrib["Name"] == name:
545                    scene_index = int(scene.attrib["Index"])
546                    break
547            if scene_index == -1:
548                raise ValueError(f"ROI {name} does not correspond to any scenes")
549            else:
550                roi_indices.append(scene_index)
551            # Extract other metadata
552            roi_limits = [
553                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
554                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
555                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
556                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
557            ]
558            # Convert to top-left and bottom-right
559            roi_limits = [
560                round(roi_limits[0] - roi_limits[2] / 2),
561                round(roi_limits[1] - roi_limits[3] / 2),
562                round(roi_limits[0] + roi_limits[2] / 2),
563                round(roi_limits[1] + roi_limits[3] / 2),
564            ]
565            # Bound the ROI to the actual scan limits
566            roi_limits = [
567                max(roi_limits[0], limits[0]),
568                max(roi_limits[1], limits[1]),
569                min(roi_limits[2], limits[2]),
570                min(roi_limits[3], limits[3]),
571            ]
572
573            tile_rows = int(roi_xml.find("Rows").text)
574            # Current best way of reliably extracting; <Columns> entry can be wrong
575            if (roi_shape["M"][1] % tile_rows) != 0:
576                raise ValueError(
577                    f"The number of tiles {roi_shape['M'][1]} is not "
578                    f"divisible by the tile rows {tile_rows}; metadata "
579                    f"must be messed up. Thanks Zeiss"
580                )
581            else:
582                tile_cols = int(roi_shape["M"][1] / tile_rows)
583            # Support points are actually the relevant focus points for this ROI
584            focus_points = []
585            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
586                focus_points.append(
587                    [
588                        int(float(focus_point.find("X").text)),
589                        int(float(focus_point.find("Y").text)),
590                        int(float(focus_point.find("Z").text)),
591                    ]
592                )
593            # Strip all sub-micron precision, it does not matter
594            scan.roi.append(
595                cls.ROI(
596                    origin_x_um=roi_limits[0],
597                    origin_y_um=roi_limits[1],
598                    width_um=roi_limits[2] - roi_limits[0],
599                    height_um=roi_limits[3] - roi_limits[1],
600                    tile_rows=tile_rows,
601                    tile_cols=tile_cols,
602                    focus_points=focus_points,
603                )
604            )
605        # Sort based on the scene indices
606        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
607
608        return scan
609
610    @classmethod
611    def load_txt(cls, input_path: str) -> Self:
612        """
613        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
614        from the BZScanner. Some metadata is filled in or adjusted to fit
615        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
616        :return: a Scan object
617        """
618        # Set paths
619        input_path = os.path.abspath(input_path)
620        if os.path.isdir(input_path):
621            input_path = os.path.join(
622                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
623            )
624
625        # Read in metadata as a dict
626        with open(input_path, "r") as file:
627            metadata_contents = file.read()
628            # Read each line, splitting on the = sign
629            metadata_dict = {}
630            for line in metadata_contents.splitlines():
631                key, value = line.split("=")
632                metadata_dict[key] = value
633
634        # Populate metadata
635        scan = cls()
636
637        scan.slide_id = metadata_dict["SLIDEID"]
638        scan.slide_id = scan.slide_id.strip().upper()
639
640        scan.path = metadata_dict["SLIDEDIR"]
641
642        # Extract start and finish datetimes
643        date = metadata_dict["DATE"]
644        date_as_datetime = datetime.datetime.strptime(
645            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
646        )
647        date_as_datetime = date_as_datetime.astimezone(
648            zoneinfo.ZoneInfo("America/Los_Angeles")
649        )  # Hardcoded because BZScanners are here
650        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
651        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
652        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
653        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
654
655        # Map the raw scanner ID (service ID) to our IDs
656        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
657        scan.tray_pos = 0  # only one tray_pos in a BZScanner
658        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
659
660        # Get camera and magnifying info
661        scan.camera = ""
662        magnification = 10
663        aperture = 0  # TODO: find the actual aperture
664        scan.objective = f"{magnification}x-{aperture}"
665        scan.pixel_size_um = 0.591  # Estimated from image metadata
666
667        # Get tile information
668        scan.tile_width_px = 1362  # Known from image metadata
669        scan.tile_height_px = 1004  # Known from image metadata
670        scan.tile_x_offset_px = 0  # Already removed
671        scan.tile_y_offset_px = 0  # Already removed
672        scan.tile_overlap_proportion = 0  # Already removed
673
674        # Extract channels and create Channel objects from them
675        if "gain_applied" in metadata_dict:
676            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
677        else:
678            gain_applied = True  # Previous policy was always to apply gains
679        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
680            channel_settings = metadata_dict[channel].split(",")
681            if channel_settings[0] == "0":
682                continue
683            scan.channels.append(
684                cls.Channel(
685                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
686                    exposure_ms=float(channel_settings[1]),
687                    intensity=float(channel_settings[2]),
688                    gain_applied=gain_applied,
689                )
690            )
691
692        # Get focus points
693        focus_points = []
694        for i in range(33):
695            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
696            if focus_point[0] == "0":
697                break
698            focus_points.append(
699                [
700                    int(float(focus_point[1])),
701                    int(float(focus_point[2])),
702                    int(float(focus_point[3])),
703                ]
704            )
705
706        # In the BZScanner, the slide is vertical instead of horizontal
707        # We put in nominal values for the ROI, which is oriented vertically as well
708        tile_rows = 96
709        tile_cols = 24
710        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
711        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
712        origin_x_um = 2500 + round((20000 - roi_width) / 2)
713        origin_y_um = 2500 + round((58000 - roi_height) / 2)
714        scan.roi.append(
715            cls.ROI(
716                origin_x_um=origin_x_um,
717                origin_y_um=origin_y_um,
718                width_um=roi_width,
719                height_um=roi_height,
720                tile_rows=tile_rows,
721                tile_cols=tile_cols,
722                focus_points=focus_points,
723            )
724        )
725        return scan
726
727    @classmethod
728    def load_from_folder(cls, input_path: str) -> Self:
729        """
730        Load a Scan object from a folder that contains defaultly-named metadata files,
731        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
732        :param input_path: /path/to/folder
733        :return: a Scan object
734        """
735        input_path = os.path.abspath(input_path)
736        if os.path.isfile(
737            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
738        ):
739            return cls.load_yaml(input_path)
740        elif os.path.isfile(
741            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
742        ):
743            return cls.load_txt(input_path)
744        else:
745            raise ValueError(
746                f"No scan metadata files "
747                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
748                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
749                f"{input_path}"
750            )
751        pass
752
753    @classmethod
754    def make_placeholder(
755        cls,
756        slide_id: str,
757        n_tile: int = 2303,
758        n_roi: int = 0,
759        scanner_type: Type = Type.BZSCANNER,
760    ) -> Self:
761        """
762        Make a placeholder Scan object with only basic required information filled in.
763        :param slide_id: the slide ID
764        :param n_tile: the number of this tile, which will become the number of
765                       tiles in the scan
766        :param n_roi: the number of ROIs in the scan
767        :return: a Scan object
768        """
769        # Sanitize inputs here
770        slide_id = str(slide_id).strip().upper()
771        n_tile = int(n_tile)
772        n_roi = int(n_roi)
773        # Generate the object
774        scan = cls()
775        scan.slide_id = slide_id
776        if scanner_type == cls.Type.AXIOSCAN7:
777            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
778        elif scanner_type == cls.Type.BZSCANNER:
779            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
780        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
781        scan.roi[0].tile_rows = 1
782        scan.roi[0].tile_cols = n_tile + 1
783        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans & Scanners

Scans are the highest-level data structure, indicating the key parameters of a scan such as the area scanned, the dimensions of output images, and the channels used. Scans also include metadata such as the scanner ID, slide ID, where the images should be, etc.

Coordinate Frames

There are three levels of coordinate frames in a scan. From inside-out, we have:

  • Tile coordinate frame. Events are provided with simple integer (x, y) pixel coordinates, which makes it easy to crop and manipulate images. The origin is at the top-left corner as with normal image axes.
  • Scan coordinate frame. Each scanner has its own coordinate frame, which is determined by the scanner's hardware. The scanner coordinate frame is used to convert between in-frame pixel coordinates and micrometers. The origin varies by the scanner, but generally resides in the top-left of the scanner's movable stage. In cases where there are multiple slide slots, the origin is assumed to be at the top left of the current slide. The slide may be oriented horizontally, vertically, or upside-down; this all depends on the scanner.
  • Slide coordinate frame. This is a set coordinate frame where:
    • Slide is active area up.
    • Slide is oriented horizontally.
    • Slide label area is on the left.
    • Origin is at the top-left corner.

Generally speaking, we should always compare scanners by converting them to the slide coordinate frame. Events in the scan and slide coordinate frame are referred to in micrometers ($\mu$m).

Diagram of the BZScanner coordinate system, which uses a vertical slide alignment with
the label at the bottom. The slide is active area down and tiles zigzag from the
top-left corner across and back. The x-axis points right, the y-axis points
down, and the origin is in the top-left corner.

Diagram of the Axioscan coordinate system, which uses a horizontal slide alignment
the label on the left. The slide is active area up and tiles go across in row-major
order from the top-left corner. The x-axis points right, and the y-axis points
down, but the origin is in the top-right corner.

Diagram of the slide coordinate system, which uses a horizontal slide alignment
the label on the left. The slide is active area up. The x-axis points right, the y-axis
points down, and the origin is in the top-left corner.

Scan( slide_id: str = '', scanner_id: str = '', path: str = '', exists: bool = True, start_datetime: str = '', end_datetime: str = '', scan_time_s: int = -1, tray_pos: int = -1, slide_pos: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_x_offset_px: int = -1, tile_y_offset_px: int = -1, tile_overlap_proportion: int = -1, channels: list[Scan.Channel] = None, roi: list[Scan.ROI] = None)
132    def __init__(
133        self,
134        slide_id: str = "",
135        scanner_id: str = "",
136        path: str = "",
137        exists: bool = True,
138        start_datetime: str = "",
139        end_datetime: str = "",
140        scan_time_s: int = -1,
141        tray_pos: int = -1,
142        slide_pos: int = -1,
143        camera: str = "",
144        objective: str = "",
145        pixel_size_um: float = -1.0,
146        tile_width_px: int = -1,
147        tile_height_px: int = -1,
148        tile_x_offset_px: int = -1,
149        tile_y_offset_px: int = -1,
150        tile_overlap_proportion: int = -1,
151        channels: list[Channel] = None,
152        roi: list[ROI] = None,
153    ):
154        if roi is None:
155            roi = []
156        if channels is None:
157            channels = []
158        self.slide_id = slide_id
159        self.scanner_id = scanner_id
160        self.path = path
161        self.exists = exists
162        self.start_datetime = start_datetime
163        self.end_datetime = end_datetime
164        self.scan_time_s = scan_time_s
165        self.tray_pos = tray_pos
166        self.slide_pos = slide_pos
167        self.camera = camera
168        self.objective = objective
169        self.pixel_size_um = pixel_size_um
170        self.tile_width_px = tile_width_px
171        self.tile_height_px = tile_height_px
172        self.tile_x_offset_px = tile_x_offset_px
173        self.tile_y_offset_px = tile_y_offset_px
174        self.tile_overlap_proportion = tile_overlap_proportion
175        self.channels = channels
176        self.roi = roi
yaml_tag = 'Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
STANDARD_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S%z'
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
scanner_id
path
exists
start_datetime
end_datetime
scan_time_s
tray_pos
slide_pos
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_x_offset_px
tile_y_offset_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
208    def has_same_profile(self, other):
209        return (
210            self.camera == other.camera
211            and self.objective == other.objective
212            and self.pixel_size_um == other.pixel_size_um
213            and self.tile_width_px == other.tile_width_px
214            and self.tile_height_px == other.tile_height_px
215            and self.tile_x_offset_px == other.tile_x_offset_px
216            and self.tile_y_offset_px == other.tile_y_offset_px
217            and self.tile_overlap_proportion == other.tile_overlap_proportion
218            and self.channels == other.channels
219            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
220        )
def get_channel_names(self) -> list[str]:
222    def get_channel_names(self) -> list[str]:
223        """
224        Get the channel names in the scan's channel order.
225        :return: a list of channel names.
226        """
227        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
229    def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]:
230        """
231        Given a list of channel names, return the corresponding indices in the scan's
232        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
233        actual AlexaFluor names (AF555, AF647, AF488).
234        If a list entry is not found or None, it will return -1 for that entry.
235        :param channel_names: a list of channel names.
236        :return: a list of channel indices.
237        """
238        # Get the scan's channel name list
239        scan_channel_names = self.get_channel_names()
240
241        channel_indices = []
242        for name in channel_names:
243            # Convert any BZScanner channel names to the actual channel names
244            if name in self.BZSCANNER_CHANNEL_MAP:
245                name = self.BZSCANNER_CHANNEL_MAP[name]
246
247            # Append the corresponding index if possible
248            if name in scan_channel_names:
249                channel_indices.append(scan_channel_names.index(name))
250            else:
251                channel_indices.append(-1)
252        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is not found or None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def get_image_size(self) -> tuple[int, int]:
254    def get_image_size(self) -> tuple[int, int]:
255        """
256        Get the real size of the image in pixels after subtracting overlap.
257        :return: a tuple of (real_height, real_width) for easy comparison to arrays
258        """
259        width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion)
260        height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion)
261        return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap

Get the real size of the image in pixels after subtracting overlap.

Returns

a tuple of (real_height, real_width) for easy comparison to arrays

def save_yaml(self, output_path: str):
263    def save_yaml(self, output_path: str):
264        """
265        Write the Scan object to a .yaml file.
266        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
267        :return: nothing; will raise an error on failure
268        """
269        # Create necessary folders
270        output_path = os.path.abspath(output_path)
271        if os.path.splitext(output_path)[1] == ".yaml":
272            os.makedirs(os.path.dirname(output_path), exist_ok=True)
273        else:
274            os.makedirs(output_path, exist_ok=True)
275            # Add the standard metadata file name to the path if needed
276            output_path = os.path.join(
277                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
278            )
279
280        # Populate the file
281        with open(output_path, "w") as file:
282            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
284    @classmethod
285    def load_yaml(cls, input_path: str) -> Self:
286        """
287        Load a Scan object from a .yaml file.
288        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
289        :return: a Scan object
290        """
291        input_path = os.path.abspath(input_path)
292        if os.path.isdir(input_path):
293            input_path = os.path.join(
294                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
295            )
296        with open(input_path, "r") as file:
297            metadata_obj = yaml.load(file, Loader=yaml.Loader)
298        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
300    def to_dict(self) -> dict:
301        """
302        Convert the Scan object to a dictionary with keys matching database columns
303        and values matching database entries
304        :return: a dictionary
305        """
306        # Dump to json; then add indents and a top-level key
307        channels_json = json.dumps(
308            self.channels, default=lambda x: x.__dict__, indent=2
309        )
310        channels_json = "  ".join(channels_json.splitlines(True))
311        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
312
313        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
314        roi_json = "  ".join(roi_json.splitlines(True))
315        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
316
317        # Keys are named the same as database columns
318        return {
319            "scanner_id": self.scanner_id,
320            "slide_id": self.slide_id,
321            "exists": self.exists,
322            "path": self.path,
323            "start_datetime": self.start_datetime,
324            "end_datetime": self.end_datetime,
325            "scan_time_s": self.scan_time_s,
326            "tray_pos": self.tray_pos,
327            "slide_pos": self.slide_pos,
328            "tile_width": self.tile_width_px,
329            "tile_height": self.tile_height_px,
330            "tile_x_offset": self.tile_x_offset_px,
331            "tile_y_offset": self.tile_y_offset_px,
332            "tile_overlap": self.tile_overlap_proportion,
333            "camera": self.camera,
334            "objective": self.objective,
335            "pixel_size": self.pixel_size_um,
336            "channels": channels_json,
337            "roi": roi_json,
338        }

Convert the Scan object to a dictionary with keys matching database columns and values matching database entries

Returns

a dictionary

@classmethod
def from_dict(cls, scan_dict) -> Self:
340    @classmethod
341    def from_dict(cls, scan_dict) -> Self:
342        """
343        Convert a dictionary from to_dict() or the database to a Scan object
344        :param scan_dict: a dictionary
345        :return: a Scan object
346        """
347        result = cls(
348            scanner_id=scan_dict["scanner_id"],
349            slide_id=scan_dict["slide_id"],
350            path=scan_dict["path"],
351            exists=scan_dict["exists"],
352            start_datetime=scan_dict["start_datetime"],
353            end_datetime=scan_dict["end_datetime"],
354            scan_time_s=scan_dict["scan_time_s"],
355            tray_pos=scan_dict["tray_pos"],
356            slide_pos=scan_dict["slide_pos"],
357            camera=scan_dict["camera"],
358            objective=scan_dict["objective"],
359            pixel_size_um=scan_dict["pixel_size"],
360            tile_width_px=scan_dict["tile_width"],
361            tile_height_px=scan_dict["tile_height"],
362            tile_x_offset_px=scan_dict["tile_x_offset"],
363            tile_y_offset_px=scan_dict["tile_y_offset"],
364            tile_overlap_proportion=scan_dict["tile_overlap"],
365        )
366        # Handle JSON and dictionaries
367        if isinstance(scan_dict["channels"], str):
368            channels_dict = json.loads(scan_dict["channels"])["data"]
369        else:
370            channels_dict = scan_dict["channels"]["data"]
371        for channel in channels_dict:
372            result.channels.append(
373                cls.Channel(
374                    name=channel["name"],
375                    exposure_ms=channel["exposure_ms"],
376                    intensity=channel["intensity"],
377                    gain_applied=channel["gain_applied"],
378                )
379            )
380        # Handle JSON and dictionaries
381        if isinstance(scan_dict["channels"], str):
382            roi_dict = json.loads(scan_dict["roi"])["data"]
383        else:
384            roi_dict = scan_dict["roi"]["data"]
385        for roi in roi_dict:
386            result.roi.append(
387                cls.ROI(
388                    origin_x_um=roi["origin_x_um"],
389                    origin_y_um=roi["origin_y_um"],
390                    width_um=roi["width_um"],
391                    height_um=roi["height_um"],
392                    tile_rows=roi["tile_rows"],
393                    tile_cols=roi["tile_cols"],
394                    focus_points=roi["focus_points"],
395                )
396            )
397        return result

Convert a dictionary from to_dict() or the database to a Scan object

Parameters
  • scan_dict: a dictionary
Returns

a Scan object

@classmethod
def load_czi(cls, input_path: str) -> Self:
399    @classmethod
400    def load_czi(cls, input_path: str) -> Self:
401        """
402        Extracts metadata from a .czi file, which is the output of the Axioscan
403        :param input_path: the path to the .czi file
404        :return: a Scan object
405        """
406        if aicspylibczi is None:
407            raise ModuleNotFoundError(
408                "aicspylibczi library not installed. "
409                "Install csi-images with [imageio] option to resolve."
410            )
411
412        # Normalize paths
413        input_path = os.path.abspath(input_path)
414
415        with open(input_path, "rb") as file:
416            # Read in metadata as XML elements
417            metadata_xml = aicspylibczi.CziFile(file).meta
418            # Read in shape metadata from binary
419            rois_shape = aicspylibczi.CziFile(file).get_dims_shape()
420
421        # Populate metadata
422        scan = cls()
423
424        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
425        if scan.slide_id is not None:
426            scan.slide_id = scan.slide_id.strip().upper()
427        # Map the raw scanner ID (service ID) to our IDs
428        scan.scanner_id = cls.SCANNER_IDS[
429            metadata_xml.find(".//Microscope/UserDefinedName").text
430        ]
431
432        # Extract start and finish datetimes
433        date = metadata_xml.find(".//Document/CreationDate").text
434        # Strip out sub-second precision
435        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
436        date_as_datetime = datetime.datetime.strptime(
437            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
438        )
439        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
440        scan.scan_time_s = round(
441            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
442        )
443        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
444        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
445
446        scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
447        scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
448
449        # Get camera and magnifying info
450        scan.camera = (
451            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
452        )["Name"]
453        magnification = metadata_xml.find(
454            ".//Objectives/Objective/NominalMagnification"
455        )
456        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
457        scan.objective = f"{magnification.text}x-{aperture.text}"
458        scan.pixel_size_um = (
459            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
460        )
461        # Round off the pixel size to nanometers; might not be optimal, but this
462        # gets rounded when we send it to the database anyways (to 7 places)
463        scan.pixel_size_um = round(scan.pixel_size_um, 3)
464
465        # Get tile information
466        # Note: X Y is untested, could be flipped. I always forget. Just don't use
467        # non-square frames and we're all good.
468        selected_detector = metadata_xml.find(".//SelectedDetector").text
469        detectors = metadata_xml.findall(".//Detectors/Detector")
470        for detector in detectors:
471            if detector.attrib["Id"] == selected_detector:
472                tile_info = detector.find(".//Frame")
473                break
474        # Convert to integers
475        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
476
477        scan.tile_x_offset_px = tile_info[0]
478        scan.tile_y_offset_px = tile_info[1]
479        scan.tile_width_px = tile_info[2]
480        scan.tile_height_px = tile_info[3]
481        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
482
483        # Extract channels and create Channel objects from them
484        channel_indices = []
485        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
486            channel_indices.append(int(channel.attrib["Id"][-1]))
487            intensity_xml = channel.find(".//Intensity")
488            if intensity_xml is None:
489                intensity = 0
490            else:
491                intensity = float(intensity_xml.text[:-2]) * 1e-2
492            scan.channels.append(
493                cls.Channel(
494                    name=channel.attrib["Name"].upper(),
495                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
496                    intensity=intensity,
497                    gain_applied=True,  # In Axioscan, we will always use gain = 1
498                )
499            )
500        # Make sure the channels are sorted
501        scan.channels = [
502            channel for _, channel in sorted(zip(channel_indices, scan.channels))
503        ]
504        # Verify that the shape corresponds to the channels
505        for roi in rois_shape:
506            if roi["C"][1] != len(scan.channels):
507                raise ValueError(
508                    f"Number of channels {len(scan.channels)} "
509                    f"is not the same as the number of channels in an ROI: "
510                    f"{roi['C'][1]}"
511                )
512
513        # Get the real ROI limits; the metadata is not always correct
514        limits_xml = metadata_xml.findall(".//AllowedScanArea")
515        limits = [
516            round(float(limits_xml[0].find("Center").text.split(",")[0])),
517            round(float(limits_xml[0].find("Center").text.split(",")[1])),
518            round(float(limits_xml[0].find("Size").text.split(",")[0])),
519            round(float(limits_xml[0].find("Size").text.split(",")[1])),
520        ]
521        # Convert to top-left and bottom-right
522        limits = [
523            round(limits[0] - limits[2] / 2),
524            round(limits[1] - limits[3] / 2),
525            round(limits[0] + limits[2] / 2),
526            round(limits[1] + limits[3] / 2),
527        ]
528
529        # Extract ROIs and create ROI objects from them
530        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
531        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
532        if len(rois_xml_metadata) != len(rois_shape):
533            raise ValueError(
534                f"Metadata and binary data from {input_path} "
535                f"do not match in number of ROIs"
536            )
537        # We need both to determine the number of rows/columns because the XML lies
538        roi_indices = []
539        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
540            name = roi_xml.attrib["Name"]
541            # Determine the index of this scene
542            scene_index = -1
543            for scene in scenes_xml_metadata:
544                if scene.attrib["Name"] == name:
545                    scene_index = int(scene.attrib["Index"])
546                    break
547            if scene_index == -1:
548                raise ValueError(f"ROI {name} does not correspond to any scenes")
549            else:
550                roi_indices.append(scene_index)
551            # Extract other metadata
552            roi_limits = [
553                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
554                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
555                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
556                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
557            ]
558            # Convert to top-left and bottom-right
559            roi_limits = [
560                round(roi_limits[0] - roi_limits[2] / 2),
561                round(roi_limits[1] - roi_limits[3] / 2),
562                round(roi_limits[0] + roi_limits[2] / 2),
563                round(roi_limits[1] + roi_limits[3] / 2),
564            ]
565            # Bound the ROI to the actual scan limits
566            roi_limits = [
567                max(roi_limits[0], limits[0]),
568                max(roi_limits[1], limits[1]),
569                min(roi_limits[2], limits[2]),
570                min(roi_limits[3], limits[3]),
571            ]
572
573            tile_rows = int(roi_xml.find("Rows").text)
574            # Current best way of reliably extracting; <Columns> entry can be wrong
575            if (roi_shape["M"][1] % tile_rows) != 0:
576                raise ValueError(
577                    f"The number of tiles {roi_shape['M'][1]} is not "
578                    f"divisible by the tile rows {tile_rows}; metadata "
579                    f"must be messed up. Thanks Zeiss"
580                )
581            else:
582                tile_cols = int(roi_shape["M"][1] / tile_rows)
583            # Support points are actually the relevant focus points for this ROI
584            focus_points = []
585            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
586                focus_points.append(
587                    [
588                        int(float(focus_point.find("X").text)),
589                        int(float(focus_point.find("Y").text)),
590                        int(float(focus_point.find("Z").text)),
591                    ]
592                )
593            # Strip all sub-micron precision, it does not matter
594            scan.roi.append(
595                cls.ROI(
596                    origin_x_um=roi_limits[0],
597                    origin_y_um=roi_limits[1],
598                    width_um=roi_limits[2] - roi_limits[0],
599                    height_um=roi_limits[3] - roi_limits[1],
600                    tile_rows=tile_rows,
601                    tile_cols=tile_cols,
602                    focus_points=focus_points,
603                )
604            )
605        # Sort based on the scene indices
606        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
607
608        return scan

Extracts metadata from a .czi file, which is the output of the Axioscan

Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
610    @classmethod
611    def load_txt(cls, input_path: str) -> Self:
612        """
613        Loads a Scan object from a .txt file, usually slideinfo.txt, which originates
614        from the BZScanner. Some metadata is filled in or adjusted to fit
615        :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
616        :return: a Scan object
617        """
618        # Set paths
619        input_path = os.path.abspath(input_path)
620        if os.path.isdir(input_path):
621            input_path = os.path.join(
622                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
623            )
624
625        # Read in metadata as a dict
626        with open(input_path, "r") as file:
627            metadata_contents = file.read()
628            # Read each line, splitting on the = sign
629            metadata_dict = {}
630            for line in metadata_contents.splitlines():
631                key, value = line.split("=")
632                metadata_dict[key] = value
633
634        # Populate metadata
635        scan = cls()
636
637        scan.slide_id = metadata_dict["SLIDEID"]
638        scan.slide_id = scan.slide_id.strip().upper()
639
640        scan.path = metadata_dict["SLIDEDIR"]
641
642        # Extract start and finish datetimes
643        date = metadata_dict["DATE"]
644        date_as_datetime = datetime.datetime.strptime(
645            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
646        )
647        date_as_datetime = date_as_datetime.astimezone(
648            zoneinfo.ZoneInfo("America/Los_Angeles")
649        )  # Hardcoded because BZScanners are here
650        scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
651        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
652        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
653        scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT)
654
655        # Map the raw scanner ID (service ID) to our IDs
656        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
657        scan.tray_pos = 0  # only one tray_pos in a BZScanner
658        scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
659
660        # Get camera and magnifying info
661        scan.camera = ""
662        magnification = 10
663        aperture = 0  # TODO: find the actual aperture
664        scan.objective = f"{magnification}x-{aperture}"
665        scan.pixel_size_um = 0.591  # Estimated from image metadata
666
667        # Get tile information
668        scan.tile_width_px = 1362  # Known from image metadata
669        scan.tile_height_px = 1004  # Known from image metadata
670        scan.tile_x_offset_px = 0  # Already removed
671        scan.tile_y_offset_px = 0  # Already removed
672        scan.tile_overlap_proportion = 0  # Already removed
673
674        # Extract channels and create Channel objects from them
675        if "gain_applied" in metadata_dict:
676            gain_applied = True if metadata_dict["gain_applied"] == "1" else False
677        else:
678            gain_applied = True  # Previous policy was always to apply gains
679        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
680            channel_settings = metadata_dict[channel].split(",")
681            if channel_settings[0] == "0":
682                continue
683            scan.channels.append(
684                cls.Channel(
685                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
686                    exposure_ms=float(channel_settings[1]),
687                    intensity=float(channel_settings[2]),
688                    gain_applied=gain_applied,
689                )
690            )
691
692        # Get focus points
693        focus_points = []
694        for i in range(33):
695            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
696            if focus_point[0] == "0":
697                break
698            focus_points.append(
699                [
700                    int(float(focus_point[1])),
701                    int(float(focus_point[2])),
702                    int(float(focus_point[3])),
703                ]
704            )
705
706        # In the BZScanner, the slide is vertical instead of horizontal
707        # We put in nominal values for the ROI, which is oriented vertically as well
708        tile_rows = 96
709        tile_cols = 24
710        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
711        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
712        origin_x_um = 2500 + round((20000 - roi_width) / 2)
713        origin_y_um = 2500 + round((58000 - roi_height) / 2)
714        scan.roi.append(
715            cls.ROI(
716                origin_x_um=origin_x_um,
717                origin_y_um=origin_y_um,
718                width_um=roi_width,
719                height_um=roi_height,
720                tile_rows=tile_rows,
721                tile_cols=tile_cols,
722                focus_points=focus_points,
723            )
724        )
725        return scan

Loads a Scan object from a .txt file, usually slideinfo.txt, which originates from the BZScanner. Some metadata is filled in or adjusted to fit

Parameters
  • input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
727    @classmethod
728    def load_from_folder(cls, input_path: str) -> Self:
729        """
730        Load a Scan object from a folder that contains defaultly-named metadata files,
731        scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
732        :param input_path: /path/to/folder
733        :return: a Scan object
734        """
735        input_path = os.path.abspath(input_path)
736        if os.path.isfile(
737            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
738        ):
739            return cls.load_yaml(input_path)
740        elif os.path.isfile(
741            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
742        ):
743            return cls.load_txt(input_path)
744        else:
745            raise ValueError(
746                f"No scan metadata files "
747                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
748                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
749                f"{input_path}"
750            )
751        pass

Load a Scan object from a folder that contains defaultly-named metadata files, scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder( cls, slide_id: str, n_tile: int = 2303, n_roi: int = 0, scanner_type: Scan.Type = <Type.BZSCANNER: 'bzscanner'>) -> Self:
753    @classmethod
754    def make_placeholder(
755        cls,
756        slide_id: str,
757        n_tile: int = 2303,
758        n_roi: int = 0,
759        scanner_type: Type = Type.BZSCANNER,
760    ) -> Self:
761        """
762        Make a placeholder Scan object with only basic required information filled in.
763        :param slide_id: the slide ID
764        :param n_tile: the number of this tile, which will become the number of
765                       tiles in the scan
766        :param n_roi: the number of ROIs in the scan
767        :return: a Scan object
768        """
769        # Sanitize inputs here
770        slide_id = str(slide_id).strip().upper()
771        n_tile = int(n_tile)
772        n_roi = int(n_roi)
773        # Generate the object
774        scan = cls()
775        scan.slide_id = slide_id
776        if scanner_type == cls.Type.AXIOSCAN7:
777            scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder"
778        elif scanner_type == cls.Type.BZSCANNER:
779            scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder"
780        scan.roi = [cls.ROI() for _ in range(n_roi + 1)]
781        scan.roi[0].tile_rows = 1
782        scan.roi[0].tile_cols = n_tile + 1
783        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
34    class Type(enum.Enum):
35        BZSCANNER = "bzscanner"
36        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
60    class Channel(yaml.YAMLObject):
61        """
62        Class that comprises a channel; we usually have multiple (2-5) per scan.
63        Contains three fields:
64        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
65        - exposure_ms: the exposure time to capture a frame in milliseconds
66        - intensity: the light intensity used OR the gain applied to the channel
67        """
68
69        yaml_tag = "csi_images.csi_scans.Scan.Channel"
70
71        def __init__(
72            self,
73            name: str = "",
74            exposure_ms: float = -1.0,
75            intensity: float = -1.0,
76            gain_applied: bool = False,
77        ):
78            self.name = name
79            self.exposure_ms = exposure_ms
80            self.intensity = intensity
81            self.gain_applied = gain_applied
82
83        def __repr__(self):
84            return yaml.dump(self, sort_keys=False)
85
86        def __eq__(self, other):
87            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel( name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0, gain_applied: bool = False)
71        def __init__(
72            self,
73            name: str = "",
74            exposure_ms: float = -1.0,
75            intensity: float = -1.0,
76            gain_applied: bool = False,
77        ):
78            self.name = name
79            self.exposure_ms = exposure_ms
80            self.intensity = intensity
81            self.gain_applied = gain_applied
yaml_tag = 'Scan.Channel'
name
exposure_ms
intensity
gain_applied
class Scan.ROI(yaml.YAMLObject):
 89    class ROI(yaml.YAMLObject):
 90        """
 91        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 92        """
 93
 94        yaml_tag = "csi_images.csi_scans.Scan.ROI"
 95
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
115
116        def __repr__(self):
117            return yaml.dump(self, sort_keys=False)
118
119        def __eq__(self, other):
120            return self.__repr__() == other.__repr__()
121
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 96        def __init__(
 97            self,
 98            origin_x_um: int = -1,
 99            origin_y_um: int = -1,
100            width_um: int = -1,
101            height_um: int = -1,
102            tile_rows: int = -1,
103            tile_cols: int = -1,
104            focus_points=None,
105        ):
106            if focus_points is None:
107                focus_points = []
108            self.origin_x_um = origin_x_um
109            self.origin_y_um = origin_y_um
110            self.width_um = width_um
111            self.height_um = height_um
112            self.tile_rows = tile_rows
113            self.tile_cols = tile_cols
114            self.focus_points = focus_points
yaml_tag = 'Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
122        def similar(self, other):
123            return (
124                self.origin_y_um == other.origin_y_um
125                and self.origin_x_um == other.origin_x_um
126                and self.width_um == other.width_um
127                and self.height_um == other.height_um
128                and self.tile_rows == other.tile_rows
129                and self.tile_cols == other.tile_cols
130            )