csi_images.csi_scans

Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.

  1"""
  2Contains the Scan class, which holds important metadata from a scan. This metadata
  3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan
  4object can also be loaded from a .czi file or a .txt file.
  5"""
  6
  7import os
  8import enum
  9import datetime
 10import zoneinfo
 11import typing
 12
 13import aicspylibczi
 14
 15import yaml
 16import json
 17
 18
 19class Scan(yaml.YAMLObject):
 20    """
 21    Class that composes a whole scan's metadata. Contains some universal data,
 22    plus lists for channels and ROIs.
 23
 24    .. include:: ../docs/csi_images/coordinate_systems.md
 25    """
 26
 27    yaml_tag = "csi_utils.scans.Scan"
 28
 29    class Type(enum.Enum):
 30        BZSCANNER = "bzscanner"
 31        AXIOSCAN7 = "axioscan7"
 32
 33    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 34    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 35
 36    METADATA_FILE_NAME = {
 37        Type.AXIOSCAN7: "scan.yaml",
 38        Type.BZSCANNER: "slideinfo.txt",
 39    }
 40    DATETIME_FORMAT = {
 41        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 42        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 43    }
 44
 45    # Actual channel names, from the BZScanner's default order
 46    BZSCANNER_CHANNEL_MAP = {
 47        "DAPI": "DAPI",
 48        "TRITC": "AF555",
 49        "CY5": "AF647",
 50        "BF": "BRIGHT",
 51        "FITC": "AF488",
 52    }
 53
 54    class Channel(yaml.YAMLObject):
 55        """
 56        Class that comprises a channel; we usually have multiple (2-5) per scan.
 57        Contains three fields:
 58        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 59        - exposure_ms: the exposure time to capture a frame in milliseconds
 60        - intensity: the light intensity used OR the gain applied to the channel
 61        """
 62
 63        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 64
 65        def __init__(
 66            self,
 67            name: str = "",
 68            exposure_ms: float = -1.0,
 69            intensity: float = -1.0,
 70        ):
 71            self.name = name
 72            self.exposure_ms = exposure_ms
 73            self.intensity = intensity
 74
 75        def __repr__(self):
 76            return yaml.dump(self, sort_keys=False)
 77
 78        def __eq__(self, other):
 79            return self.__repr__() == other.__repr__()
 80
 81    class ROI(yaml.YAMLObject):
 82        """
 83        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 84        """
 85
 86        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 87
 88        def __init__(
 89            self,
 90            origin_x_um: int = -1,
 91            origin_y_um: int = -1,
 92            width_um: int = -1,
 93            height_um: int = -1,
 94            tile_rows: int = -1,
 95            tile_cols: int = -1,
 96            focus_points=None,
 97        ):
 98            if focus_points is None:
 99                focus_points = []
100            self.origin_x_um = origin_x_um
101            self.origin_y_um = origin_y_um
102            self.width_um = width_um
103            self.height_um = height_um
104            self.tile_rows = tile_rows
105            self.tile_cols = tile_cols
106            self.focus_points = focus_points
107
108        def __repr__(self):
109            return yaml.dump(self, sort_keys=False)
110
111        def __eq__(self, other):
112            return self.__repr__() == other.__repr__()
113
114        def similar(self, other):
115            return (
116                self.origin_y_um == other.origin_y_um
117                and self.origin_x_um == other.origin_x_um
118                and self.width_um == other.width_um
119                and self.height_um == other.height_um
120                and self.tile_rows == other.tile_rows
121                and self.tile_cols == other.tile_cols
122            )
123
124    def __init__(
125        self,
126        slide_id: str = "",
127        path: str = "",
128        start_date: str = "",
129        end_date: str = "",
130        scan_time_s: int = -1,
131        scanner_id: str = "",
132        tray: int = -1,
133        slot: int = -1,
134        camera: str = "",
135        objective: str = "",
136        pixel_size_um: float = -1.0,
137        tile_width_px: int = -1,
138        tile_height_px: int = -1,
139        tile_overlap_proportion: int = -1,
140        channels=None,
141        roi=None,
142    ):
143        if roi is None:
144            roi = []
145        if channels is None:
146            channels = []
147        self.slide_id = slide_id
148        self.path = path
149        self.start_date = start_date
150        self.end_date = end_date
151        self.scan_time_s = scan_time_s
152        self.scanner_id = scanner_id
153        self.tray = tray
154        self.slot = slot
155        self.camera = camera
156        self.objective = objective
157        self.pixel_size_um = pixel_size_um
158        self.tile_width_px = tile_width_px
159        self.tile_height_px = tile_height_px
160        self.tile_overlap_proportion = tile_overlap_proportion
161        self.channels = channels
162        self.roi = roi
163
164    def __repr__(self):
165        return yaml.dump(self, sort_keys=False)
166
167    def __eq__(self, other):
168        return self.__repr__() == other.__repr__()
169
170    def has_same_profile(self, other):
171        return (
172            self.camera == other.camera
173            and self.objective == other.objective
174            and self.pixel_size_um == other.pixel_size_um
175            and self.tile_width_px == other.tile_width_px
176            and self.tile_height_px == other.tile_height_px
177            and self.tile_overlap_proportion == other.tile_overlap_proportion
178            and self.channels == other.channels
179            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
180        )
181
182    def get_channel_names(self) -> list[str]:
183        """
184        Get the channel names in the scan's channel order.
185        :return: a list of channel names.
186        """
187        return [channel.name for channel in self.channels]
188
189    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
190        """
191        Given a list of channel names, return the corresponding indices in the scan's
192        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
193        actual AlexaFluor names (AF555, AF647, AF488).
194        If a list entry is None, it will return -1 for that entry.
195        :param channel_names: a list of channel names.
196        :return: a list of channel indices.
197        """
198        # Get the scan's channel name list
199        scan_channel_names = self.get_channel_names()
200
201        channel_indices = []
202        for name in channel_names:
203            # Convert any BZScanner channel names to the actual channel names
204            if name in self.BZSCANNER_CHANNEL_MAP:
205                name = self.BZSCANNER_CHANNEL_MAP[name]
206
207            # Append the corresponding index if possible
208            if name is None:
209                channel_indices.append(-1)
210            elif name in scan_channel_names:
211                channel_indices.append(scan_channel_names.index(name))
212            else:
213                raise ValueError(
214                    f"Channel name {name} not found in scan channels {scan_channel_names}"
215                )
216        return channel_indices
217
218    def save_yaml(self, output_path: str):
219        """
220        Write the Scan object to a .yaml file.
221        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
222        :return: nothing; will raise an error on failure
223        """
224        # Create necessary folders
225        output_path = os.path.abspath(output_path)
226        if os.path.splitext(output_path)[1] == ".yaml":
227            os.makedirs(os.path.dirname(output_path), exist_ok=True)
228        else:
229            os.makedirs(output_path, exist_ok=True)
230            # Add the standard metadata file name to the path if needed
231            output_path = os.path.join(
232                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
233            )
234
235        # Populate the file
236        with open(output_path, "w") as file:
237            yaml.dump(self, stream=file, sort_keys=False)
238
239    @classmethod
240    def load_yaml(cls, input_path: str) -> typing.Self:
241        """
242        Load a Scan object from a .yaml file.
243        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
244        :return: a Scan object
245        """
246        input_path = os.path.abspath(input_path)
247        if os.path.isdir(input_path):
248            input_path = os.path.join(
249                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
250            )
251        with open(input_path, "r") as file:
252            metadata_obj = yaml.load(file, Loader=yaml.Loader)
253        return metadata_obj
254
255    def to_dict(self) -> dict:
256        # Dump to json; then add indents and a top-level key
257        channels_json = json.dumps(
258            self.channels, default=lambda x: x.__dict__, indent=2
259        )
260        channels_json = "  ".join(channels_json.splitlines(True))
261        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
262
263        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
264        roi_json = "  ".join(roi_json.splitlines(True))
265        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
266
267        return {
268            "slide_id": self.slide_id,
269            "path": self.path,
270            "start_date": self.start_date,
271            "end_date": self.end_date,
272            "scan_time_s": self.scan_time_s,
273            "scanner_id": self.scanner_id,
274            "tray": self.tray,
275            "slot": self.slot,
276            "camera": self.camera,
277            "objective": self.objective,
278            "pixel_size_um": self.pixel_size_um,
279            "tile_width_px": self.tile_width_px,
280            "tile_height_px": self.tile_height_px,
281            "tile_overlap_proportion": self.tile_overlap_proportion,
282            "channels": channels_json,
283            "roi": roi_json,
284        }
285
286    @classmethod
287    def from_dict(cls, scan_dict) -> typing.Self:
288        local_timezone = zoneinfo.ZoneInfo("localtime")
289        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
290        result = cls(
291            slide_id=scan_dict["slide_id"],
292            path=scan_dict["path"],
293            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
294            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
295            scan_time_s=int(dt),
296            scanner_id=scan_dict["scanner_id"],
297            tray=scan_dict["tray"],
298            slot=scan_dict["slot"],
299            camera=scan_dict["camera"],
300            objective=scan_dict["objective"],
301            pixel_size_um=scan_dict["pixel_size"],
302            tile_width_px=scan_dict["tile_width"],
303            tile_height_px=scan_dict["tile_height"],
304            tile_overlap_proportion=scan_dict["tile_overlap"],
305        )
306        for channel_json in scan_dict["channels"]["data"]:
307            result.channels.append(
308                cls.Channel(
309                    name=channel_json["name"],
310                    exposure_ms=channel_json["exposure_ms"],
311                    intensity=channel_json["intensity"],
312                )
313            )
314        for roi_json in scan_dict["roi"]["data"]:
315            result.roi.append(
316                cls.ROI(
317                    origin_x_um=roi_json["origin_x_um"],
318                    origin_y_um=roi_json["origin_y_um"],
319                    width_um=roi_json["width_um"],
320                    height_um=roi_json["height_um"],
321                    tile_rows=roi_json["tile_rows"],
322                    tile_cols=roi_json["tile_cols"],
323                    focus_points=roi_json["focus_points"],
324                )
325            )
326        return result
327
328    @classmethod
329    def load_czi(cls, input_path: str) -> typing.Self:
330        """
331        :param input_path: the path to the .czi file
332        :return: a Scan object
333        """
334        # Normalize paths
335        input_path = os.path.abspath(input_path)
336
337        # Read in metadata as XML elements
338        metadata_xml = aicspylibczi.CziFile(input_path).meta
339        # Read in shape metadata from binary
340        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
341
342        # Populate metadata
343        scan = cls()
344
345        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
346        if scan.slide_id is not None:
347            scan.slide_id = scan.slide_id.strip().upper()
348        # Map the raw scanner ID (service ID) to our IDs
349        scan.scanner_id = cls.SCANNER_IDS[
350            metadata_xml.find(".//Microscope/UserDefinedName").text
351        ]
352
353        # Extract start and finish datetimes
354        date = metadata_xml.find(".//Document/CreationDate").text
355        # Strip out sub-second precision
356        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
357        date_as_datetime = datetime.datetime.strptime(
358            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
359        )
360        scan.start_date = date_as_datetime.strftime(
361            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
362        )
363        scan.scan_time_s = round(
364            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
365        )
366        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
367        scan.end_date = date_as_datetime.strftime(
368            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
369        )
370
371        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
372        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
373
374        # Get camera and magnifying info
375        scan.camera = (
376            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
377        )["Name"]
378        magnification = metadata_xml.find(
379            ".//Objectives/Objective/NominalMagnification"
380        )
381        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
382        scan.objective = f"{magnification.text}x-{aperture.text}"
383        scan.pixel_size_um = (
384            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
385        )
386        # Round off the pixel size to nanometers; might not be optimal, but this
387        # gets rounded when we send it to the database anyways (to 7 places)
388        scan.pixel_size_um = round(scan.pixel_size_um, 3)
389
390        # Get tile information
391        # Note: X Y is untested, could be flipped. I always forget. Just don't use
392        # non-square frames and we're all good.
393        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
394        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
395
396        scan.tile_width_px = rois_shape[0]["X"][1]
397        scan.tile_height_px = rois_shape[0]["Y"][1]
398        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
399
400        # Extract channels and create Channel objects from them
401        channel_indices = []
402        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
403            channel_indices.append(int(channel.attrib["Id"][-1]))
404            intensity_xml = channel.find(".//Intensity")
405            if intensity_xml is None:
406                intensity = 0
407            else:
408                intensity = float(intensity_xml.text[:-2]) * 1e-2
409            scan.channels.append(
410                cls.Channel(
411                    name=channel.attrib["Name"].upper(),
412                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
413                    intensity=intensity,
414                )
415            )
416        # Make sure the channels are sorted
417        scan.channels = [
418            channel for _, channel in sorted(zip(channel_indices, scan.channels))
419        ]
420        # Verify that the shape corresponds to the channels
421        for roi in rois_shape:
422            if roi["C"][1] != len(scan.channels):
423                raise ValueError(
424                    f"Number of channels {len(scan.channels)} "
425                    f"is not the same as the number of channels in an ROI: "
426                    f"{roi['C'][1]}"
427                )
428
429        # Get the real ROI limits; the metadata is not always correct
430        limits_xml = metadata_xml.findall(".//AllowedScanArea")
431        limits = [
432            round(float(limits_xml[0].find("Center").text.split(",")[0])),
433            round(float(limits_xml[0].find("Center").text.split(",")[1])),
434            round(float(limits_xml[0].find("Size").text.split(",")[0])),
435            round(float(limits_xml[0].find("Size").text.split(",")[1])),
436        ]
437        # Convert to top-left and bottom-right
438        limits = [
439            round(limits[0] - limits[2] / 2),
440            round(limits[1] - limits[3] / 2),
441            round(limits[0] + limits[2] / 2),
442            round(limits[1] + limits[3] / 2),
443        ]
444
445        # Extract ROIs and create ROI objects from them
446        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
447        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
448        if len(rois_xml_metadata) != len(rois_shape):
449            raise ValueError(
450                f"Metadata and binary data from {input_path} "
451                f"do not match in number of ROIs"
452            )
453        # We need both to determine the number of rows/columns because the XML lies
454        roi_indices = []
455        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
456            name = roi_xml.attrib["Name"]
457            # Determine the index of this scene
458            scene_index = -1
459            for scene in scenes_xml_metadata:
460                if scene.attrib["Name"] == name:
461                    scene_index = int(scene.attrib["Index"])
462                    break
463            if scene_index == -1:
464                raise ValueError(f"ROI {name} does not correspond to any scenes")
465            else:
466                roi_indices.append(scene_index)
467            # Extract other metadata
468            roi_limits = [
469                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
470                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
471                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
472                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
473            ]
474            # Convert to top-left and bottom-right
475            roi_limits = [
476                round(roi_limits[0] - roi_limits[2] / 2),
477                round(roi_limits[1] - roi_limits[3] / 2),
478                round(roi_limits[0] + roi_limits[2] / 2),
479                round(roi_limits[1] + roi_limits[3] / 2),
480            ]
481            # Bound the ROI to the actual scan limits
482            roi_limits = [
483                max(roi_limits[0], limits[0]),
484                max(roi_limits[1], limits[1]),
485                min(roi_limits[2], limits[2]),
486                min(roi_limits[3], limits[3]),
487            ]
488
489            tile_rows = int(roi_xml.find("Rows").text)
490            # Current best way of reliably extracting; <Columns> entry can be wrong
491            if (roi_shape["M"][1] % tile_rows) != 0:
492                raise ValueError(
493                    f"The number of tiles {roi_shape['M'][1]} is not "
494                    f"divisible by the tile rows {tile_rows}; metadata "
495                    f"must be messed up. Thanks Zeiss"
496                )
497            else:
498                tile_cols = int(roi_shape["M"][1] / tile_rows)
499            # Support points are actually the relevant focus points for this ROI
500            focus_points = []
501            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
502                focus_points.append(
503                    [
504                        int(float(focus_point.find("X").text)),
505                        int(float(focus_point.find("Y").text)),
506                        int(float(focus_point.find("Z").text)),
507                    ]
508                )
509            # Strip all sub-micron precision, it does not matter
510            scan.roi.append(
511                cls.ROI(
512                    origin_x_um=roi_limits[0],
513                    origin_y_um=roi_limits[1],
514                    width_um=roi_limits[2] - roi_limits[0],
515                    height_um=roi_limits[3] - roi_limits[1],
516                    tile_rows=tile_rows,
517                    tile_cols=tile_cols,
518                    focus_points=focus_points,
519                )
520            )
521        # Sort based on the scene indices
522        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
523
524        return scan
525
526    @classmethod
527    def load_txt(cls, input_path: str) -> typing.Self:
528        """
529        Loads a Scan object from a .txt file, which originates from the BZScanner.
530        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
531        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
532        :return: a Scan object
533        """
534        # Set paths
535        input_path = os.path.abspath(input_path)
536        if os.path.isdir(input_path):
537            input_path = os.path.join(
538                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
539            )
540
541        # Read in metadata as a dict
542        with open(input_path, "r") as file:
543            metadata_contents = file.read()
544            # Read each line, splitting on the = sign
545            metadata_dict = {}
546            for line in metadata_contents.splitlines():
547                key, value = line.split("=")
548                metadata_dict[key] = value
549
550        # Populate metadata
551        scan = cls()
552
553        scan.slide_id = metadata_dict["SLIDEID"]
554        scan.slide_id = scan.slide_id.strip().upper()
555
556        scan.path = metadata_dict["SLIDEDIR"]
557
558        # Extract start and finish datetimes
559        date = metadata_dict["DATE"]
560        date_as_datetime = datetime.datetime.strptime(
561            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
562        )
563        date_as_datetime = date_as_datetime.astimezone(
564            zoneinfo.ZoneInfo("America/Los_Angeles")
565        )  # Hardcoded because BZScanners are here
566        scan.start_date = date_as_datetime.strftime(
567            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
568        )
569        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
570        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
571        scan.end_date = date_as_datetime.strftime(
572            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
573        )
574
575        # Map the raw scanner ID (service ID) to our IDs
576        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
577        scan.tray = 0  # only one tray in a BZScanner
578        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
579
580        # Get camera and magnifying info
581        scan.camera = ""
582        magnification = 10
583        aperture = 0  # TODO: find the actual aperture
584        scan.objective = f"{magnification}x-{aperture}"
585        scan.pixel_size_um = 0.591  # Estimated from image metadata
586
587        # Get tile information
588        scan.tile_width_px = 1362  # Known from image metadata
589        scan.tile_height_px = 1004  # Known from image metadata
590        scan.tile_overlap_proportion = 0
591
592        # Extract channels and create Channel objects from them
593        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
594            channel_settings = metadata_dict[channel].split(",")
595            if channel_settings[0] == "0":
596                continue
597            scan.channels.append(
598                cls.Channel(
599                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
600                    exposure_ms=float(channel_settings[1]),
601                    intensity=float(channel_settings[2]),
602                )
603            )
604
605        # Get focus points
606        focus_points = []
607        for i in range(33):
608            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
609            if focus_point[0] == "0":
610                break
611            focus_points.append(
612                [
613                    int(float(focus_point[1])),
614                    int(float(focus_point[2])),
615                    int(float(focus_point[3])),
616                ]
617            )
618
619        # In the BZScanner, the slide is vertical instead of horizontal
620        # We put in nominal values for the ROI, which is oriented vertically as well
621        tile_rows = 96
622        tile_cols = 24
623        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
624        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
625        origin_x_um = 2500 + round((20000 - roi_width) / 2)
626        origin_y_um = 2500 + round((58000 - roi_height) / 2)
627        scan.roi.append(
628            cls.ROI(
629                origin_x_um=origin_x_um,
630                origin_y_um=origin_y_um,
631                width_um=roi_width,
632                height_um=roi_height,
633                tile_rows=tile_rows,
634                tile_cols=tile_cols,
635                focus_points=focus_points,
636            )
637        )
638        return scan
639
640    @classmethod
641    def load_from_folder(cls, input_path: str) -> typing.Self:
642        """
643        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
644        Prefers scan.yaml if both exist.
645        :param input_path: /path/to/folder
646        :return: a Scan object
647        """
648        input_path = os.path.abspath(input_path)
649        if os.path.isfile(
650            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
651        ):
652            return cls.load_yaml(input_path)
653        elif os.path.isfile(
654            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
655        ):
656            return cls.load_txt(input_path)
657        else:
658            raise ValueError(
659                f"No scan metadata files "
660                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
661                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
662                f"{input_path}"
663            )
664        pass
665
666    @classmethod
667    def make_placeholder(
668        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
669    ) -> typing.Self:
670        """
671        Make a placeholder Scan object with only basic required information filled in.
672        :param slide_id: the slide ID
673        :param n_tile: the number of this tile, which will become the number of
674                       tiles in the scan
675        :param n_roi: the number of ROIs in the scan
676        :return: a Scan object
677        """
678        # Sanitize inputs here
679        slide_id = str(slide_id).strip().upper()
680        n_tile = int(n_tile)
681        n_roi = int(n_roi)
682        # Generate the object
683        scan = cls()
684        scan.slide_id = slide_id
685        scan.roi = [cls.ROI() for _ in range(n_roi)]
686        scan.roi[0].tile_cols = n_tile
687        return scan
class Scan(yaml.YAMLObject):
 20class Scan(yaml.YAMLObject):
 21    """
 22    Class that composes a whole scan's metadata. Contains some universal data,
 23    plus lists for channels and ROIs.
 24
 25    .. include:: ../docs/csi_images/coordinate_systems.md
 26    """
 27
 28    yaml_tag = "csi_utils.scans.Scan"
 29
 30    class Type(enum.Enum):
 31        BZSCANNER = "bzscanner"
 32        AXIOSCAN7 = "axioscan7"
 33
 34    SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"}
 35    """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs"""
 36
 37    METADATA_FILE_NAME = {
 38        Type.AXIOSCAN7: "scan.yaml",
 39        Type.BZSCANNER: "slideinfo.txt",
 40    }
 41    DATETIME_FORMAT = {
 42        Type.AXIOSCAN7: "%Y-%m-%dT%H:%M:%S%z",
 43        Type.BZSCANNER: "%a %b %d %H:%M:%S %Y",
 44    }
 45
 46    # Actual channel names, from the BZScanner's default order
 47    BZSCANNER_CHANNEL_MAP = {
 48        "DAPI": "DAPI",
 49        "TRITC": "AF555",
 50        "CY5": "AF647",
 51        "BF": "BRIGHT",
 52        "FITC": "AF488",
 53    }
 54
 55    class Channel(yaml.YAMLObject):
 56        """
 57        Class that comprises a channel; we usually have multiple (2-5) per scan.
 58        Contains three fields:
 59        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
 60        - exposure_ms: the exposure time to capture a frame in milliseconds
 61        - intensity: the light intensity used OR the gain applied to the channel
 62        """
 63
 64        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
 65
 66        def __init__(
 67            self,
 68            name: str = "",
 69            exposure_ms: float = -1.0,
 70            intensity: float = -1.0,
 71        ):
 72            self.name = name
 73            self.exposure_ms = exposure_ms
 74            self.intensity = intensity
 75
 76        def __repr__(self):
 77            return yaml.dump(self, sort_keys=False)
 78
 79        def __eq__(self, other):
 80            return self.__repr__() == other.__repr__()
 81
 82    class ROI(yaml.YAMLObject):
 83        """
 84        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 85        """
 86
 87        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 88
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
108
109        def __repr__(self):
110            return yaml.dump(self, sort_keys=False)
111
112        def __eq__(self, other):
113            return self.__repr__() == other.__repr__()
114
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )
124
125    def __init__(
126        self,
127        slide_id: str = "",
128        path: str = "",
129        start_date: str = "",
130        end_date: str = "",
131        scan_time_s: int = -1,
132        scanner_id: str = "",
133        tray: int = -1,
134        slot: int = -1,
135        camera: str = "",
136        objective: str = "",
137        pixel_size_um: float = -1.0,
138        tile_width_px: int = -1,
139        tile_height_px: int = -1,
140        tile_overlap_proportion: int = -1,
141        channels=None,
142        roi=None,
143    ):
144        if roi is None:
145            roi = []
146        if channels is None:
147            channels = []
148        self.slide_id = slide_id
149        self.path = path
150        self.start_date = start_date
151        self.end_date = end_date
152        self.scan_time_s = scan_time_s
153        self.scanner_id = scanner_id
154        self.tray = tray
155        self.slot = slot
156        self.camera = camera
157        self.objective = objective
158        self.pixel_size_um = pixel_size_um
159        self.tile_width_px = tile_width_px
160        self.tile_height_px = tile_height_px
161        self.tile_overlap_proportion = tile_overlap_proportion
162        self.channels = channels
163        self.roi = roi
164
165    def __repr__(self):
166        return yaml.dump(self, sort_keys=False)
167
168    def __eq__(self, other):
169        return self.__repr__() == other.__repr__()
170
171    def has_same_profile(self, other):
172        return (
173            self.camera == other.camera
174            and self.objective == other.objective
175            and self.pixel_size_um == other.pixel_size_um
176            and self.tile_width_px == other.tile_width_px
177            and self.tile_height_px == other.tile_height_px
178            and self.tile_overlap_proportion == other.tile_overlap_proportion
179            and self.channels == other.channels
180            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
181        )
182
183    def get_channel_names(self) -> list[str]:
184        """
185        Get the channel names in the scan's channel order.
186        :return: a list of channel names.
187        """
188        return [channel.name for channel in self.channels]
189
190    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
191        """
192        Given a list of channel names, return the corresponding indices in the scan's
193        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
194        actual AlexaFluor names (AF555, AF647, AF488).
195        If a list entry is None, it will return -1 for that entry.
196        :param channel_names: a list of channel names.
197        :return: a list of channel indices.
198        """
199        # Get the scan's channel name list
200        scan_channel_names = self.get_channel_names()
201
202        channel_indices = []
203        for name in channel_names:
204            # Convert any BZScanner channel names to the actual channel names
205            if name in self.BZSCANNER_CHANNEL_MAP:
206                name = self.BZSCANNER_CHANNEL_MAP[name]
207
208            # Append the corresponding index if possible
209            if name is None:
210                channel_indices.append(-1)
211            elif name in scan_channel_names:
212                channel_indices.append(scan_channel_names.index(name))
213            else:
214                raise ValueError(
215                    f"Channel name {name} not found in scan channels {scan_channel_names}"
216                )
217        return channel_indices
218
219    def save_yaml(self, output_path: str):
220        """
221        Write the Scan object to a .yaml file.
222        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
223        :return: nothing; will raise an error on failure
224        """
225        # Create necessary folders
226        output_path = os.path.abspath(output_path)
227        if os.path.splitext(output_path)[1] == ".yaml":
228            os.makedirs(os.path.dirname(output_path), exist_ok=True)
229        else:
230            os.makedirs(output_path, exist_ok=True)
231            # Add the standard metadata file name to the path if needed
232            output_path = os.path.join(
233                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
234            )
235
236        # Populate the file
237        with open(output_path, "w") as file:
238            yaml.dump(self, stream=file, sort_keys=False)
239
240    @classmethod
241    def load_yaml(cls, input_path: str) -> typing.Self:
242        """
243        Load a Scan object from a .yaml file.
244        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
245        :return: a Scan object
246        """
247        input_path = os.path.abspath(input_path)
248        if os.path.isdir(input_path):
249            input_path = os.path.join(
250                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
251            )
252        with open(input_path, "r") as file:
253            metadata_obj = yaml.load(file, Loader=yaml.Loader)
254        return metadata_obj
255
256    def to_dict(self) -> dict:
257        # Dump to json; then add indents and a top-level key
258        channels_json = json.dumps(
259            self.channels, default=lambda x: x.__dict__, indent=2
260        )
261        channels_json = "  ".join(channels_json.splitlines(True))
262        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
263
264        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
265        roi_json = "  ".join(roi_json.splitlines(True))
266        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
267
268        return {
269            "slide_id": self.slide_id,
270            "path": self.path,
271            "start_date": self.start_date,
272            "end_date": self.end_date,
273            "scan_time_s": self.scan_time_s,
274            "scanner_id": self.scanner_id,
275            "tray": self.tray,
276            "slot": self.slot,
277            "camera": self.camera,
278            "objective": self.objective,
279            "pixel_size_um": self.pixel_size_um,
280            "tile_width_px": self.tile_width_px,
281            "tile_height_px": self.tile_height_px,
282            "tile_overlap_proportion": self.tile_overlap_proportion,
283            "channels": channels_json,
284            "roi": roi_json,
285        }
286
287    @classmethod
288    def from_dict(cls, scan_dict) -> typing.Self:
289        local_timezone = zoneinfo.ZoneInfo("localtime")
290        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
291        result = cls(
292            slide_id=scan_dict["slide_id"],
293            path=scan_dict["path"],
294            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
295            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
296            scan_time_s=int(dt),
297            scanner_id=scan_dict["scanner_id"],
298            tray=scan_dict["tray"],
299            slot=scan_dict["slot"],
300            camera=scan_dict["camera"],
301            objective=scan_dict["objective"],
302            pixel_size_um=scan_dict["pixel_size"],
303            tile_width_px=scan_dict["tile_width"],
304            tile_height_px=scan_dict["tile_height"],
305            tile_overlap_proportion=scan_dict["tile_overlap"],
306        )
307        for channel_json in scan_dict["channels"]["data"]:
308            result.channels.append(
309                cls.Channel(
310                    name=channel_json["name"],
311                    exposure_ms=channel_json["exposure_ms"],
312                    intensity=channel_json["intensity"],
313                )
314            )
315        for roi_json in scan_dict["roi"]["data"]:
316            result.roi.append(
317                cls.ROI(
318                    origin_x_um=roi_json["origin_x_um"],
319                    origin_y_um=roi_json["origin_y_um"],
320                    width_um=roi_json["width_um"],
321                    height_um=roi_json["height_um"],
322                    tile_rows=roi_json["tile_rows"],
323                    tile_cols=roi_json["tile_cols"],
324                    focus_points=roi_json["focus_points"],
325                )
326            )
327        return result
328
329    @classmethod
330    def load_czi(cls, input_path: str) -> typing.Self:
331        """
332        :param input_path: the path to the .czi file
333        :return: a Scan object
334        """
335        # Normalize paths
336        input_path = os.path.abspath(input_path)
337
338        # Read in metadata as XML elements
339        metadata_xml = aicspylibczi.CziFile(input_path).meta
340        # Read in shape metadata from binary
341        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
342
343        # Populate metadata
344        scan = cls()
345
346        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
347        if scan.slide_id is not None:
348            scan.slide_id = scan.slide_id.strip().upper()
349        # Map the raw scanner ID (service ID) to our IDs
350        scan.scanner_id = cls.SCANNER_IDS[
351            metadata_xml.find(".//Microscope/UserDefinedName").text
352        ]
353
354        # Extract start and finish datetimes
355        date = metadata_xml.find(".//Document/CreationDate").text
356        # Strip out sub-second precision
357        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
358        date_as_datetime = datetime.datetime.strptime(
359            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
360        )
361        scan.start_date = date_as_datetime.strftime(
362            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
363        )
364        scan.scan_time_s = round(
365            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
366        )
367        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
368        scan.end_date = date_as_datetime.strftime(
369            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
370        )
371
372        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
373        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
374
375        # Get camera and magnifying info
376        scan.camera = (
377            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
378        )["Name"]
379        magnification = metadata_xml.find(
380            ".//Objectives/Objective/NominalMagnification"
381        )
382        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
383        scan.objective = f"{magnification.text}x-{aperture.text}"
384        scan.pixel_size_um = (
385            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
386        )
387        # Round off the pixel size to nanometers; might not be optimal, but this
388        # gets rounded when we send it to the database anyways (to 7 places)
389        scan.pixel_size_um = round(scan.pixel_size_um, 3)
390
391        # Get tile information
392        # Note: X Y is untested, could be flipped. I always forget. Just don't use
393        # non-square frames and we're all good.
394        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
395        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
396
397        scan.tile_width_px = rois_shape[0]["X"][1]
398        scan.tile_height_px = rois_shape[0]["Y"][1]
399        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
400
401        # Extract channels and create Channel objects from them
402        channel_indices = []
403        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
404            channel_indices.append(int(channel.attrib["Id"][-1]))
405            intensity_xml = channel.find(".//Intensity")
406            if intensity_xml is None:
407                intensity = 0
408            else:
409                intensity = float(intensity_xml.text[:-2]) * 1e-2
410            scan.channels.append(
411                cls.Channel(
412                    name=channel.attrib["Name"].upper(),
413                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
414                    intensity=intensity,
415                )
416            )
417        # Make sure the channels are sorted
418        scan.channels = [
419            channel for _, channel in sorted(zip(channel_indices, scan.channels))
420        ]
421        # Verify that the shape corresponds to the channels
422        for roi in rois_shape:
423            if roi["C"][1] != len(scan.channels):
424                raise ValueError(
425                    f"Number of channels {len(scan.channels)} "
426                    f"is not the same as the number of channels in an ROI: "
427                    f"{roi['C'][1]}"
428                )
429
430        # Get the real ROI limits; the metadata is not always correct
431        limits_xml = metadata_xml.findall(".//AllowedScanArea")
432        limits = [
433            round(float(limits_xml[0].find("Center").text.split(",")[0])),
434            round(float(limits_xml[0].find("Center").text.split(",")[1])),
435            round(float(limits_xml[0].find("Size").text.split(",")[0])),
436            round(float(limits_xml[0].find("Size").text.split(",")[1])),
437        ]
438        # Convert to top-left and bottom-right
439        limits = [
440            round(limits[0] - limits[2] / 2),
441            round(limits[1] - limits[3] / 2),
442            round(limits[0] + limits[2] / 2),
443            round(limits[1] + limits[3] / 2),
444        ]
445
446        # Extract ROIs and create ROI objects from them
447        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
448        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
449        if len(rois_xml_metadata) != len(rois_shape):
450            raise ValueError(
451                f"Metadata and binary data from {input_path} "
452                f"do not match in number of ROIs"
453            )
454        # We need both to determine the number of rows/columns because the XML lies
455        roi_indices = []
456        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
457            name = roi_xml.attrib["Name"]
458            # Determine the index of this scene
459            scene_index = -1
460            for scene in scenes_xml_metadata:
461                if scene.attrib["Name"] == name:
462                    scene_index = int(scene.attrib["Index"])
463                    break
464            if scene_index == -1:
465                raise ValueError(f"ROI {name} does not correspond to any scenes")
466            else:
467                roi_indices.append(scene_index)
468            # Extract other metadata
469            roi_limits = [
470                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
471                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
472                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
473                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
474            ]
475            # Convert to top-left and bottom-right
476            roi_limits = [
477                round(roi_limits[0] - roi_limits[2] / 2),
478                round(roi_limits[1] - roi_limits[3] / 2),
479                round(roi_limits[0] + roi_limits[2] / 2),
480                round(roi_limits[1] + roi_limits[3] / 2),
481            ]
482            # Bound the ROI to the actual scan limits
483            roi_limits = [
484                max(roi_limits[0], limits[0]),
485                max(roi_limits[1], limits[1]),
486                min(roi_limits[2], limits[2]),
487                min(roi_limits[3], limits[3]),
488            ]
489
490            tile_rows = int(roi_xml.find("Rows").text)
491            # Current best way of reliably extracting; <Columns> entry can be wrong
492            if (roi_shape["M"][1] % tile_rows) != 0:
493                raise ValueError(
494                    f"The number of tiles {roi_shape['M'][1]} is not "
495                    f"divisible by the tile rows {tile_rows}; metadata "
496                    f"must be messed up. Thanks Zeiss"
497                )
498            else:
499                tile_cols = int(roi_shape["M"][1] / tile_rows)
500            # Support points are actually the relevant focus points for this ROI
501            focus_points = []
502            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
503                focus_points.append(
504                    [
505                        int(float(focus_point.find("X").text)),
506                        int(float(focus_point.find("Y").text)),
507                        int(float(focus_point.find("Z").text)),
508                    ]
509                )
510            # Strip all sub-micron precision, it does not matter
511            scan.roi.append(
512                cls.ROI(
513                    origin_x_um=roi_limits[0],
514                    origin_y_um=roi_limits[1],
515                    width_um=roi_limits[2] - roi_limits[0],
516                    height_um=roi_limits[3] - roi_limits[1],
517                    tile_rows=tile_rows,
518                    tile_cols=tile_cols,
519                    focus_points=focus_points,
520                )
521            )
522        # Sort based on the scene indices
523        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
524
525        return scan
526
527    @classmethod
528    def load_txt(cls, input_path: str) -> typing.Self:
529        """
530        Loads a Scan object from a .txt file, which originates from the BZScanner.
531        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
532        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
533        :return: a Scan object
534        """
535        # Set paths
536        input_path = os.path.abspath(input_path)
537        if os.path.isdir(input_path):
538            input_path = os.path.join(
539                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
540            )
541
542        # Read in metadata as a dict
543        with open(input_path, "r") as file:
544            metadata_contents = file.read()
545            # Read each line, splitting on the = sign
546            metadata_dict = {}
547            for line in metadata_contents.splitlines():
548                key, value = line.split("=")
549                metadata_dict[key] = value
550
551        # Populate metadata
552        scan = cls()
553
554        scan.slide_id = metadata_dict["SLIDEID"]
555        scan.slide_id = scan.slide_id.strip().upper()
556
557        scan.path = metadata_dict["SLIDEDIR"]
558
559        # Extract start and finish datetimes
560        date = metadata_dict["DATE"]
561        date_as_datetime = datetime.datetime.strptime(
562            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
563        )
564        date_as_datetime = date_as_datetime.astimezone(
565            zoneinfo.ZoneInfo("America/Los_Angeles")
566        )  # Hardcoded because BZScanners are here
567        scan.start_date = date_as_datetime.strftime(
568            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
569        )
570        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
571        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
572        scan.end_date = date_as_datetime.strftime(
573            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
574        )
575
576        # Map the raw scanner ID (service ID) to our IDs
577        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
578        scan.tray = 0  # only one tray in a BZScanner
579        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
580
581        # Get camera and magnifying info
582        scan.camera = ""
583        magnification = 10
584        aperture = 0  # TODO: find the actual aperture
585        scan.objective = f"{magnification}x-{aperture}"
586        scan.pixel_size_um = 0.591  # Estimated from image metadata
587
588        # Get tile information
589        scan.tile_width_px = 1362  # Known from image metadata
590        scan.tile_height_px = 1004  # Known from image metadata
591        scan.tile_overlap_proportion = 0
592
593        # Extract channels and create Channel objects from them
594        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
595            channel_settings = metadata_dict[channel].split(",")
596            if channel_settings[0] == "0":
597                continue
598            scan.channels.append(
599                cls.Channel(
600                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
601                    exposure_ms=float(channel_settings[1]),
602                    intensity=float(channel_settings[2]),
603                )
604            )
605
606        # Get focus points
607        focus_points = []
608        for i in range(33):
609            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
610            if focus_point[0] == "0":
611                break
612            focus_points.append(
613                [
614                    int(float(focus_point[1])),
615                    int(float(focus_point[2])),
616                    int(float(focus_point[3])),
617                ]
618            )
619
620        # In the BZScanner, the slide is vertical instead of horizontal
621        # We put in nominal values for the ROI, which is oriented vertically as well
622        tile_rows = 96
623        tile_cols = 24
624        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
625        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
626        origin_x_um = 2500 + round((20000 - roi_width) / 2)
627        origin_y_um = 2500 + round((58000 - roi_height) / 2)
628        scan.roi.append(
629            cls.ROI(
630                origin_x_um=origin_x_um,
631                origin_y_um=origin_y_um,
632                width_um=roi_width,
633                height_um=roi_height,
634                tile_rows=tile_rows,
635                tile_cols=tile_cols,
636                focus_points=focus_points,
637            )
638        )
639        return scan
640
641    @classmethod
642    def load_from_folder(cls, input_path: str) -> typing.Self:
643        """
644        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
645        Prefers scan.yaml if both exist.
646        :param input_path: /path/to/folder
647        :return: a Scan object
648        """
649        input_path = os.path.abspath(input_path)
650        if os.path.isfile(
651            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
652        ):
653            return cls.load_yaml(input_path)
654        elif os.path.isfile(
655            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
656        ):
657            return cls.load_txt(input_path)
658        else:
659            raise ValueError(
660                f"No scan metadata files "
661                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
662                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
663                f"{input_path}"
664            )
665        pass
666
667    @classmethod
668    def make_placeholder(
669        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
670    ) -> typing.Self:
671        """
672        Make a placeholder Scan object with only basic required information filled in.
673        :param slide_id: the slide ID
674        :param n_tile: the number of this tile, which will become the number of
675                       tiles in the scan
676        :param n_roi: the number of ROIs in the scan
677        :return: a Scan object
678        """
679        # Sanitize inputs here
680        slide_id = str(slide_id).strip().upper()
681        n_tile = int(n_tile)
682        n_roi = int(n_roi)
683        # Generate the object
684        scan = cls()
685        scan.slide_id = slide_id
686        scan.roi = [cls.ROI() for _ in range(n_roi)]
687        scan.roi[0].tile_cols = n_tile
688        return scan

Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.

Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.

Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).

Picture of the slide coordinate system, which assumes a slide placed horizontally
with the label on the left. The x-axis points to the right, and the y-axis points
down. The origin is at the top left corner. Key positions, such as the origin of the
slide's active area at (14500, 2500) micrometers and the bottom-right corner at
(72500, 22500) micrometers are displayed.

Scan( slide_id: str = '', path: str = '', start_date: str = '', end_date: str = '', scan_time_s: int = -1, scanner_id: str = '', tray: int = -1, slot: int = -1, camera: str = '', objective: str = '', pixel_size_um: float = -1.0, tile_width_px: int = -1, tile_height_px: int = -1, tile_overlap_proportion: int = -1, channels=None, roi=None)
125    def __init__(
126        self,
127        slide_id: str = "",
128        path: str = "",
129        start_date: str = "",
130        end_date: str = "",
131        scan_time_s: int = -1,
132        scanner_id: str = "",
133        tray: int = -1,
134        slot: int = -1,
135        camera: str = "",
136        objective: str = "",
137        pixel_size_um: float = -1.0,
138        tile_width_px: int = -1,
139        tile_height_px: int = -1,
140        tile_overlap_proportion: int = -1,
141        channels=None,
142        roi=None,
143    ):
144        if roi is None:
145            roi = []
146        if channels is None:
147            channels = []
148        self.slide_id = slide_id
149        self.path = path
150        self.start_date = start_date
151        self.end_date = end_date
152        self.scan_time_s = scan_time_s
153        self.scanner_id = scanner_id
154        self.tray = tray
155        self.slot = slot
156        self.camera = camera
157        self.objective = objective
158        self.pixel_size_um = pixel_size_um
159        self.tile_width_px = tile_width_px
160        self.tile_height_px = tile_height_px
161        self.tile_overlap_proportion = tile_overlap_proportion
162        self.channels = channels
163        self.roi = roi
yaml_tag = 'csi_utils.scans.Scan'
SCANNER_IDS = {'4661000426': 'axioscan7_0'}

Axioscan 7 scanner IDs (service number), mapped to our scanner IDs

METADATA_FILE_NAME = {<Type.AXIOSCAN7: 'axioscan7'>: 'scan.yaml', <Type.BZSCANNER: 'bzscanner'>: 'slideinfo.txt'}
DATETIME_FORMAT = {<Type.AXIOSCAN7: 'axioscan7'>: '%Y-%m-%dT%H:%M:%S%z', <Type.BZSCANNER: 'bzscanner'>: '%a %b %d %H:%M:%S %Y'}
BZSCANNER_CHANNEL_MAP = {'DAPI': 'DAPI', 'TRITC': 'AF555', 'CY5': 'AF647', 'BF': 'BRIGHT', 'FITC': 'AF488'}
slide_id
path
start_date
end_date
scan_time_s
scanner_id
tray
slot
camera
objective
pixel_size_um
tile_width_px
tile_height_px
tile_overlap_proportion
channels
roi
def has_same_profile(self, other):
171    def has_same_profile(self, other):
172        return (
173            self.camera == other.camera
174            and self.objective == other.objective
175            and self.pixel_size_um == other.pixel_size_um
176            and self.tile_width_px == other.tile_width_px
177            and self.tile_height_px == other.tile_height_px
178            and self.tile_overlap_proportion == other.tile_overlap_proportion
179            and self.channels == other.channels
180            and all(a.similar(b) for a, b in zip(self.roi, other.roi))
181        )
def get_channel_names(self) -> list[str]:
183    def get_channel_names(self) -> list[str]:
184        """
185        Get the channel names in the scan's channel order.
186        :return: a list of channel names.
187        """
188        return [channel.name for channel in self.channels]

Get the channel names in the scan's channel order.

Returns

a list of channel names.

def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
190    def get_channel_indices(self, channel_names: list[str | None]) -> list[int]:
191        """
192        Given a list of channel names, return the corresponding indices in the scan's
193        channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the
194        actual AlexaFluor names (AF555, AF647, AF488).
195        If a list entry is None, it will return -1 for that entry.
196        :param channel_names: a list of channel names.
197        :return: a list of channel indices.
198        """
199        # Get the scan's channel name list
200        scan_channel_names = self.get_channel_names()
201
202        channel_indices = []
203        for name in channel_names:
204            # Convert any BZScanner channel names to the actual channel names
205            if name in self.BZSCANNER_CHANNEL_MAP:
206                name = self.BZSCANNER_CHANNEL_MAP[name]
207
208            # Append the corresponding index if possible
209            if name is None:
210                channel_indices.append(-1)
211            elif name in scan_channel_names:
212                channel_indices.append(scan_channel_names.index(name))
213            else:
214                raise ValueError(
215                    f"Channel name {name} not found in scan channels {scan_channel_names}"
216                )
217        return channel_indices

Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.

Parameters
  • channel_names: a list of channel names.
Returns

a list of channel indices.

def save_yaml(self, output_path: str):
219    def save_yaml(self, output_path: str):
220        """
221        Write the Scan object to a .yaml file.
222        :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
223        :return: nothing; will raise an error on failure
224        """
225        # Create necessary folders
226        output_path = os.path.abspath(output_path)
227        if os.path.splitext(output_path)[1] == ".yaml":
228            os.makedirs(os.path.dirname(output_path), exist_ok=True)
229        else:
230            os.makedirs(output_path, exist_ok=True)
231            # Add the standard metadata file name to the path if needed
232            output_path = os.path.join(
233                output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7]
234            )
235
236        # Populate the file
237        with open(output_path, "w") as file:
238            yaml.dump(self, stream=file, sort_keys=False)

Write the Scan object to a .yaml file.

Parameters
  • output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns

nothing; will raise an error on failure

@classmethod
def load_yaml(cls, input_path: str) -> Self:
240    @classmethod
241    def load_yaml(cls, input_path: str) -> typing.Self:
242        """
243        Load a Scan object from a .yaml file.
244        :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
245        :return: a Scan object
246        """
247        input_path = os.path.abspath(input_path)
248        if os.path.isdir(input_path):
249            input_path = os.path.join(
250                input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]
251            )
252        with open(input_path, "r") as file:
253            metadata_obj = yaml.load(file, Loader=yaml.Loader)
254        return metadata_obj

Load a Scan object from a .yaml file.

Parameters
  • input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns

a Scan object

def to_dict(self) -> dict:
256    def to_dict(self) -> dict:
257        # Dump to json; then add indents and a top-level key
258        channels_json = json.dumps(
259            self.channels, default=lambda x: x.__dict__, indent=2
260        )
261        channels_json = "  ".join(channels_json.splitlines(True))
262        channels_json = "{\n  " + '"data": ' + channels_json + "\n}"
263
264        roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2)
265        roi_json = "  ".join(roi_json.splitlines(True))
266        roi_json = "{\n  " + '"data": ' + roi_json + "\n}"
267
268        return {
269            "slide_id": self.slide_id,
270            "path": self.path,
271            "start_date": self.start_date,
272            "end_date": self.end_date,
273            "scan_time_s": self.scan_time_s,
274            "scanner_id": self.scanner_id,
275            "tray": self.tray,
276            "slot": self.slot,
277            "camera": self.camera,
278            "objective": self.objective,
279            "pixel_size_um": self.pixel_size_um,
280            "tile_width_px": self.tile_width_px,
281            "tile_height_px": self.tile_height_px,
282            "tile_overlap_proportion": self.tile_overlap_proportion,
283            "channels": channels_json,
284            "roi": roi_json,
285        }
@classmethod
def from_dict(cls, scan_dict) -> Self:
287    @classmethod
288    def from_dict(cls, scan_dict) -> typing.Self:
289        local_timezone = zoneinfo.ZoneInfo("localtime")
290        dt = (scan_dict["end_datetime"] - scan_dict["start_datetime"]).total_seconds()
291        result = cls(
292            slide_id=scan_dict["slide_id"],
293            path=scan_dict["path"],
294            start_date=scan_dict["start_datetime"].astimezone(local_timezone),
295            end_date=scan_dict["end_datetime"].astimezone(local_timezone),
296            scan_time_s=int(dt),
297            scanner_id=scan_dict["scanner_id"],
298            tray=scan_dict["tray"],
299            slot=scan_dict["slot"],
300            camera=scan_dict["camera"],
301            objective=scan_dict["objective"],
302            pixel_size_um=scan_dict["pixel_size"],
303            tile_width_px=scan_dict["tile_width"],
304            tile_height_px=scan_dict["tile_height"],
305            tile_overlap_proportion=scan_dict["tile_overlap"],
306        )
307        for channel_json in scan_dict["channels"]["data"]:
308            result.channels.append(
309                cls.Channel(
310                    name=channel_json["name"],
311                    exposure_ms=channel_json["exposure_ms"],
312                    intensity=channel_json["intensity"],
313                )
314            )
315        for roi_json in scan_dict["roi"]["data"]:
316            result.roi.append(
317                cls.ROI(
318                    origin_x_um=roi_json["origin_x_um"],
319                    origin_y_um=roi_json["origin_y_um"],
320                    width_um=roi_json["width_um"],
321                    height_um=roi_json["height_um"],
322                    tile_rows=roi_json["tile_rows"],
323                    tile_cols=roi_json["tile_cols"],
324                    focus_points=roi_json["focus_points"],
325                )
326            )
327        return result
@classmethod
def load_czi(cls, input_path: str) -> Self:
329    @classmethod
330    def load_czi(cls, input_path: str) -> typing.Self:
331        """
332        :param input_path: the path to the .czi file
333        :return: a Scan object
334        """
335        # Normalize paths
336        input_path = os.path.abspath(input_path)
337
338        # Read in metadata as XML elements
339        metadata_xml = aicspylibczi.CziFile(input_path).meta
340        # Read in shape metadata from binary
341        rois_shape = aicspylibczi.CziFile(input_path).get_dims_shape()
342
343        # Populate metadata
344        scan = cls()
345
346        scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text
347        if scan.slide_id is not None:
348            scan.slide_id = scan.slide_id.strip().upper()
349        # Map the raw scanner ID (service ID) to our IDs
350        scan.scanner_id = cls.SCANNER_IDS[
351            metadata_xml.find(".//Microscope/UserDefinedName").text
352        ]
353
354        # Extract start and finish datetimes
355        date = metadata_xml.find(".//Document/CreationDate").text
356        # Strip out sub-second precision
357        date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :]
358        date_as_datetime = datetime.datetime.strptime(
359            date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
360        )
361        scan.start_date = date_as_datetime.strftime(
362            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
363        )
364        scan.scan_time_s = round(
365            float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000
366        )
367        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
368        scan.end_date = date_as_datetime.strftime(
369            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
370        )
371
372        scan.tray = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text)
373        scan.slot = int(metadata_xml.find(".//SlideScannerPosition").text[-1])
374
375        # Get camera and magnifying info
376        scan.camera = (
377            metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib
378        )["Name"]
379        magnification = metadata_xml.find(
380            ".//Objectives/Objective/NominalMagnification"
381        )
382        aperture = metadata_xml.find(".//Objectives/Objective/LensNA")
383        scan.objective = f"{magnification.text}x-{aperture.text}"
384        scan.pixel_size_um = (
385            float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6
386        )
387        # Round off the pixel size to nanometers; might not be optimal, but this
388        # gets rounded when we send it to the database anyways (to 7 places)
389        scan.pixel_size_um = round(scan.pixel_size_um, 3)
390
391        # Get tile information
392        # Note: X Y is untested, could be flipped. I always forget. Just don't use
393        # non-square frames and we're all good.
394        tile_info = metadata_xml.find(".//HardwareSetting/ParameterCollection/Frame")
395        tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")]
396
397        scan.tile_width_px = rois_shape[0]["X"][1]
398        scan.tile_height_px = rois_shape[0]["Y"][1]
399        scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text)
400
401        # Extract channels and create Channel objects from them
402        channel_indices = []
403        for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"):
404            channel_indices.append(int(channel.attrib["Id"][-1]))
405            intensity_xml = channel.find(".//Intensity")
406            if intensity_xml is None:
407                intensity = 0
408            else:
409                intensity = float(intensity_xml.text[:-2]) * 1e-2
410            scan.channels.append(
411                cls.Channel(
412                    name=channel.attrib["Name"].upper(),
413                    exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6,
414                    intensity=intensity,
415                )
416            )
417        # Make sure the channels are sorted
418        scan.channels = [
419            channel for _, channel in sorted(zip(channel_indices, scan.channels))
420        ]
421        # Verify that the shape corresponds to the channels
422        for roi in rois_shape:
423            if roi["C"][1] != len(scan.channels):
424                raise ValueError(
425                    f"Number of channels {len(scan.channels)} "
426                    f"is not the same as the number of channels in an ROI: "
427                    f"{roi['C'][1]}"
428                )
429
430        # Get the real ROI limits; the metadata is not always correct
431        limits_xml = metadata_xml.findall(".//AllowedScanArea")
432        limits = [
433            round(float(limits_xml[0].find("Center").text.split(",")[0])),
434            round(float(limits_xml[0].find("Center").text.split(",")[1])),
435            round(float(limits_xml[0].find("Size").text.split(",")[0])),
436            round(float(limits_xml[0].find("Size").text.split(",")[1])),
437        ]
438        # Convert to top-left and bottom-right
439        limits = [
440            round(limits[0] - limits[2] / 2),
441            round(limits[1] - limits[3] / 2),
442            round(limits[0] + limits[2] / 2),
443            round(limits[1] + limits[3] / 2),
444        ]
445
446        # Extract ROIs and create ROI objects from them
447        rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion")
448        scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene")
449        if len(rois_xml_metadata) != len(rois_shape):
450            raise ValueError(
451                f"Metadata and binary data from {input_path} "
452                f"do not match in number of ROIs"
453            )
454        # We need both to determine the number of rows/columns because the XML lies
455        roi_indices = []
456        for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape):
457            name = roi_xml.attrib["Name"]
458            # Determine the index of this scene
459            scene_index = -1
460            for scene in scenes_xml_metadata:
461                if scene.attrib["Name"] == name:
462                    scene_index = int(scene.attrib["Index"])
463                    break
464            if scene_index == -1:
465                raise ValueError(f"ROI {name} does not correspond to any scenes")
466            else:
467                roi_indices.append(scene_index)
468            # Extract other metadata
469            roi_limits = [
470                round(float(roi_xml.find("CenterPosition").text.split(",")[0])),
471                round(float(roi_xml.find("CenterPosition").text.split(",")[1])),
472                round(float(roi_xml.find("ContourSize").text.split(",")[0])),
473                round(float(roi_xml.find("ContourSize").text.split(",")[1])),
474            ]
475            # Convert to top-left and bottom-right
476            roi_limits = [
477                round(roi_limits[0] - roi_limits[2] / 2),
478                round(roi_limits[1] - roi_limits[3] / 2),
479                round(roi_limits[0] + roi_limits[2] / 2),
480                round(roi_limits[1] + roi_limits[3] / 2),
481            ]
482            # Bound the ROI to the actual scan limits
483            roi_limits = [
484                max(roi_limits[0], limits[0]),
485                max(roi_limits[1], limits[1]),
486                min(roi_limits[2], limits[2]),
487                min(roi_limits[3], limits[3]),
488            ]
489
490            tile_rows = int(roi_xml.find("Rows").text)
491            # Current best way of reliably extracting; <Columns> entry can be wrong
492            if (roi_shape["M"][1] % tile_rows) != 0:
493                raise ValueError(
494                    f"The number of tiles {roi_shape['M'][1]} is not "
495                    f"divisible by the tile rows {tile_rows}; metadata "
496                    f"must be messed up. Thanks Zeiss"
497                )
498            else:
499                tile_cols = int(roi_shape["M"][1] / tile_rows)
500            # Support points are actually the relevant focus points for this ROI
501            focus_points = []
502            for focus_point in roi_xml.findall("SupportPoints/SupportPoint"):
503                focus_points.append(
504                    [
505                        int(float(focus_point.find("X").text)),
506                        int(float(focus_point.find("Y").text)),
507                        int(float(focus_point.find("Z").text)),
508                    ]
509                )
510            # Strip all sub-micron precision, it does not matter
511            scan.roi.append(
512                cls.ROI(
513                    origin_x_um=roi_limits[0],
514                    origin_y_um=roi_limits[1],
515                    width_um=roi_limits[2] - roi_limits[0],
516                    height_um=roi_limits[3] - roi_limits[1],
517                    tile_rows=tile_rows,
518                    tile_cols=tile_cols,
519                    focus_points=focus_points,
520                )
521            )
522        # Sort based on the scene indices
523        scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))]
524
525        return scan
Parameters
  • input_path: the path to the .czi file
Returns

a Scan object

@classmethod
def load_txt(cls, input_path: str) -> Self:
527    @classmethod
528    def load_txt(cls, input_path: str) -> typing.Self:
529        """
530        Loads a Scan object from a .txt file, which originates from the BZScanner.
531        Some metadata from the slideinfo.txt file is missing or adjusted to fit.
532        :param input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
533        :return: a Scan object
534        """
535        # Set paths
536        input_path = os.path.abspath(input_path)
537        if os.path.isdir(input_path):
538            input_path = os.path.join(
539                input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]
540            )
541
542        # Read in metadata as a dict
543        with open(input_path, "r") as file:
544            metadata_contents = file.read()
545            # Read each line, splitting on the = sign
546            metadata_dict = {}
547            for line in metadata_contents.splitlines():
548                key, value = line.split("=")
549                metadata_dict[key] = value
550
551        # Populate metadata
552        scan = cls()
553
554        scan.slide_id = metadata_dict["SLIDEID"]
555        scan.slide_id = scan.slide_id.strip().upper()
556
557        scan.path = metadata_dict["SLIDEDIR"]
558
559        # Extract start and finish datetimes
560        date = metadata_dict["DATE"]
561        date_as_datetime = datetime.datetime.strptime(
562            date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER]
563        )
564        date_as_datetime = date_as_datetime.astimezone(
565            zoneinfo.ZoneInfo("America/Los_Angeles")
566        )  # Hardcoded because BZScanners are here
567        scan.start_date = date_as_datetime.strftime(
568            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
569        )
570        scan.scan_time_s = 90 * 60  # estimated 90 minutes per scan
571        date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s)
572        scan.end_date = date_as_datetime.strftime(
573            cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7]
574        )
575
576        # Map the raw scanner ID (service ID) to our IDs
577        scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}'
578        scan.tray = 0  # only one tray in a BZScanner
579        scan.slot = int(metadata_dict["SLIDEPOS"]) - 1  # 1-indexed
580
581        # Get camera and magnifying info
582        scan.camera = ""
583        magnification = 10
584        aperture = 0  # TODO: find the actual aperture
585        scan.objective = f"{magnification}x-{aperture}"
586        scan.pixel_size_um = 0.591  # Estimated from image metadata
587
588        # Get tile information
589        scan.tile_width_px = 1362  # Known from image metadata
590        scan.tile_height_px = 1004  # Known from image metadata
591        scan.tile_overlap_proportion = 0
592
593        # Extract channels and create Channel objects from them
594        for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()):
595            channel_settings = metadata_dict[channel].split(",")
596            if channel_settings[0] == "0":
597                continue
598            scan.channels.append(
599                cls.Channel(
600                    name=cls.BZSCANNER_CHANNEL_MAP[channel],
601                    exposure_ms=float(channel_settings[1]),
602                    intensity=float(channel_settings[2]),
603                )
604            )
605
606        # Get focus points
607        focus_points = []
608        for i in range(33):
609            focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",")
610            if focus_point[0] == "0":
611                break
612            focus_points.append(
613                [
614                    int(float(focus_point[1])),
615                    int(float(focus_point[2])),
616                    int(float(focus_point[3])),
617                ]
618            )
619
620        # In the BZScanner, the slide is vertical instead of horizontal
621        # We put in nominal values for the ROI, which is oriented vertically as well
622        tile_rows = 96
623        tile_cols = 24
624        roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols)
625        roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows)
626        origin_x_um = 2500 + round((20000 - roi_width) / 2)
627        origin_y_um = 2500 + round((58000 - roi_height) / 2)
628        scan.roi.append(
629            cls.ROI(
630                origin_x_um=origin_x_um,
631                origin_y_um=origin_y_um,
632                width_um=roi_width,
633                height_um=roi_height,
634                tile_rows=tile_rows,
635                tile_cols=tile_cols,
636                focus_points=focus_points,
637            )
638        )
639        return scan

Loads a Scan object from a .txt file, which originates from the BZScanner. Some metadata from the slideinfo.txt file is missing or adjusted to fit.

Parameters
  • input_path: /path/to/file.txt or /path/to/folder that contains slideinfo.txt
Returns

a Scan object

@classmethod
def load_from_folder(cls, input_path: str) -> Self:
641    @classmethod
642    def load_from_folder(cls, input_path: str) -> typing.Self:
643        """
644        Load a Scan object from a folder that contains scan.yaml or slideinfo.txt.
645        Prefers scan.yaml if both exist.
646        :param input_path: /path/to/folder
647        :return: a Scan object
648        """
649        input_path = os.path.abspath(input_path)
650        if os.path.isfile(
651            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7])
652        ):
653            return cls.load_yaml(input_path)
654        elif os.path.isfile(
655            os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER])
656        ):
657            return cls.load_txt(input_path)
658        else:
659            raise ValueError(
660                f"No scan metadata files "
661                f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, "
662                f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder "
663                f"{input_path}"
664            )
665        pass

Load a Scan object from a folder that contains scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist.

Parameters
  • input_path: /path/to/folder
Returns

a Scan object

@classmethod
def make_placeholder(cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0) -> Self:
667    @classmethod
668    def make_placeholder(
669        cls, slide_id: str, n_tile: int = 2304, n_roi: int = 0
670    ) -> typing.Self:
671        """
672        Make a placeholder Scan object with only basic required information filled in.
673        :param slide_id: the slide ID
674        :param n_tile: the number of this tile, which will become the number of
675                       tiles in the scan
676        :param n_roi: the number of ROIs in the scan
677        :return: a Scan object
678        """
679        # Sanitize inputs here
680        slide_id = str(slide_id).strip().upper()
681        n_tile = int(n_tile)
682        n_roi = int(n_roi)
683        # Generate the object
684        scan = cls()
685        scan.slide_id = slide_id
686        scan.roi = [cls.ROI() for _ in range(n_roi)]
687        scan.roi[0].tile_cols = n_tile
688        return scan

Make a placeholder Scan object with only basic required information filled in.

Parameters
  • slide_id: the slide ID
  • n_tile: the number of this tile, which will become the number of tiles in the scan
  • n_roi: the number of ROIs in the scan
Returns

a Scan object

class Scan.Type(enum.Enum):
30    class Type(enum.Enum):
31        BZSCANNER = "bzscanner"
32        AXIOSCAN7 = "axioscan7"
BZSCANNER = <Type.BZSCANNER: 'bzscanner'>
AXIOSCAN7 = <Type.AXIOSCAN7: 'axioscan7'>
class Scan.Channel(yaml.YAMLObject):
55    class Channel(yaml.YAMLObject):
56        """
57        Class that comprises a channel; we usually have multiple (2-5) per scan.
58        Contains three fields:
59        - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
60        - exposure_ms: the exposure time to capture a frame in milliseconds
61        - intensity: the light intensity used OR the gain applied to the channel
62        """
63
64        yaml_tag = "csi_utils.csi_scans.Scan.Channel"
65
66        def __init__(
67            self,
68            name: str = "",
69            exposure_ms: float = -1.0,
70            intensity: float = -1.0,
71        ):
72            self.name = name
73            self.exposure_ms = exposure_ms
74            self.intensity = intensity
75
76        def __repr__(self):
77            return yaml.dump(self, sort_keys=False)
78
79        def __eq__(self, other):
80            return self.__repr__() == other.__repr__()

Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:

  • name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
  • exposure_ms: the exposure time to capture a frame in milliseconds
  • intensity: the light intensity used OR the gain applied to the channel
Scan.Channel(name: str = '', exposure_ms: float = -1.0, intensity: float = -1.0)
66        def __init__(
67            self,
68            name: str = "",
69            exposure_ms: float = -1.0,
70            intensity: float = -1.0,
71        ):
72            self.name = name
73            self.exposure_ms = exposure_ms
74            self.intensity = intensity
yaml_tag = 'csi_utils.csi_scans.Scan.Channel'
name
exposure_ms
intensity
class Scan.ROI(yaml.YAMLObject):
 82    class ROI(yaml.YAMLObject):
 83        """
 84        Class that comprises an ROI; we usually have 1, but may have more in a scan.
 85        """
 86
 87        yaml_tag = "csi_utils.csi_scans.Scan.ROI"
 88
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
108
109        def __repr__(self):
110            return yaml.dump(self, sort_keys=False)
111
112        def __eq__(self, other):
113            return self.__repr__() == other.__repr__()
114
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )

Class that comprises an ROI; we usually have 1, but may have more in a scan.

Scan.ROI( origin_x_um: int = -1, origin_y_um: int = -1, width_um: int = -1, height_um: int = -1, tile_rows: int = -1, tile_cols: int = -1, focus_points=None)
 89        def __init__(
 90            self,
 91            origin_x_um: int = -1,
 92            origin_y_um: int = -1,
 93            width_um: int = -1,
 94            height_um: int = -1,
 95            tile_rows: int = -1,
 96            tile_cols: int = -1,
 97            focus_points=None,
 98        ):
 99            if focus_points is None:
100                focus_points = []
101            self.origin_x_um = origin_x_um
102            self.origin_y_um = origin_y_um
103            self.width_um = width_um
104            self.height_um = height_um
105            self.tile_rows = tile_rows
106            self.tile_cols = tile_cols
107            self.focus_points = focus_points
yaml_tag = 'csi_utils.csi_scans.Scan.ROI'
origin_x_um
origin_y_um
width_um
height_um
tile_rows
tile_cols
focus_points
def similar(self, other):
115        def similar(self, other):
116            return (
117                self.origin_y_um == other.origin_y_um
118                and self.origin_x_um == other.origin_x_um
119                and self.width_um == other.width_um
120                and self.height_um == other.height_um
121                and self.tile_rows == other.tile_rows
122                and self.tile_cols == other.tile_cols
123            )