csi_images.csi_scans
Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.
1""" 2Contains the Scan class, which holds important metadata from a scan. This metadata 3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan 4object can also be loaded from a .czi file or a .txt file. 5""" 6 7import os 8import math 9import enum 10import datetime 11import zoneinfo 12from typing import Self, Iterable 13 14import yaml 15import json 16 17try: 18 import aicspylibczi 19except ImportError: 20 aicspylibczi = None 21 22 23class Scan(yaml.YAMLObject): 24 """ 25 Class that composes a whole scan's metadata. Contains some universal data, 26 plus lists for channels and ROIs. 27 28 .. include:: ../docs/coordinate_systems.md 29 """ 30 31 yaml_tag = "csi_images.csi_scans.Scan" 32 33 class Type(enum.Enum): 34 BZSCANNER = "bzscanner" 35 AXIOSCAN7 = "axioscan7" 36 37 SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"} 38 """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs""" 39 40 METADATA_FILE_NAME = { 41 Type.AXIOSCAN7: "scan.yaml", 42 Type.BZSCANNER: "slideinfo.txt", 43 } 44 STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" 45 DATETIME_FORMAT = { 46 Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT, 47 Type.BZSCANNER: "%a %b %d %H:%M:%S %Y", 48 } 49 50 # Actual channel names, from the BZScanner's default order 51 BZSCANNER_CHANNEL_MAP = { 52 "DAPI": "DAPI", 53 "TRITC": "AF555", 54 "CY5": "AF647", 55 "BF": "BRIGHT", 56 "FITC": "AF488", 57 } 58 59 class Channel(yaml.YAMLObject): 60 """ 61 Class that comprises a channel; we usually have multiple (2-5) per scan. 62 Contains three fields: 63 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 64 - exposure_ms: the exposure time to capture a frame in milliseconds 65 - intensity: the light intensity used OR the gain applied to the channel 66 """ 67 68 yaml_tag = "csi_images.csi_scans.Scan.Channel" 69 70 def __init__( 71 self, 72 name: str = "", 73 exposure_ms: float = -1.0, 74 intensity: float = -1.0, 75 gain_applied: bool = True, 76 ): 77 self.name = name 78 self.exposure_ms = exposure_ms 79 self.intensity = intensity 80 self.gain_applied = gain_applied 81 82 def __repr__(self): 83 return yaml.dump(self, sort_keys=False) 84 85 def __eq__(self, other): 86 return self.__repr__() == other.__repr__() 87 88 class ROI(yaml.YAMLObject): 89 """ 90 Class that comprises an ROI; we usually have 1, but may have more in a scan. 91 """ 92 93 yaml_tag = "csi_images.csi_scans.Scan.ROI" 94 95 def __init__( 96 self, 97 origin_x_um: int = -1, 98 origin_y_um: int = -1, 99 width_um: int = -1, 100 height_um: int = -1, 101 tile_rows: int = -1, 102 tile_cols: int = -1, 103 focus_points=None, 104 ): 105 if focus_points is None: 106 focus_points = [] 107 self.origin_x_um = origin_x_um 108 self.origin_y_um = origin_y_um 109 self.width_um = width_um 110 self.height_um = height_um 111 self.tile_rows = tile_rows 112 self.tile_cols = tile_cols 113 self.focus_points = focus_points 114 115 def __repr__(self): 116 return yaml.dump(self, sort_keys=False) 117 118 def __eq__(self, other): 119 return self.__repr__() == other.__repr__() 120 121 def similar(self, other): 122 return ( 123 self.origin_y_um == other.origin_y_um 124 and self.origin_x_um == other.origin_x_um 125 and self.width_um == other.width_um 126 and self.height_um == other.height_um 127 and self.tile_rows == other.tile_rows 128 and self.tile_cols == other.tile_cols 129 ) 130 131 def __init__( 132 self, 133 slide_id: str = "", 134 scanner_id: str = "", 135 path: str = "", 136 exists: bool = True, 137 start_datetime: str = "", 138 end_datetime: str = "", 139 scan_time_s: int = -1, 140 tray_pos: int = -1, 141 slide_pos: int = -1, 142 camera: str = "", 143 objective: str = "", 144 pixel_size_um: float = -1.0, 145 tile_width_px: int = -1, 146 tile_height_px: int = -1, 147 tile_x_offset_px: int = -1, 148 tile_y_offset_px: int = -1, 149 tile_overlap_proportion: int = -1, 150 channels: list[Channel] = None, 151 roi: list[ROI] = None, 152 ): 153 if roi is None: 154 roi = [] 155 if channels is None: 156 channels = [] 157 self.slide_id = slide_id 158 self.scanner_id = scanner_id 159 self.path = path 160 self.exists = exists 161 self.start_datetime = start_datetime 162 self.end_datetime = end_datetime 163 self.scan_time_s = scan_time_s 164 self.tray_pos = tray_pos 165 self.slide_pos = slide_pos 166 self.camera = camera 167 self.objective = objective 168 self.pixel_size_um = pixel_size_um 169 self.tile_width_px = tile_width_px 170 self.tile_height_px = tile_height_px 171 self.tile_x_offset_px = tile_x_offset_px 172 self.tile_y_offset_px = tile_y_offset_px 173 self.tile_overlap_proportion = tile_overlap_proportion 174 self.channels = channels 175 self.roi = roi 176 177 def __key(self): 178 return ( 179 self.slide_id, 180 self.scanner_id, 181 self.path, 182 self.exists, 183 self.start_datetime, 184 self.end_datetime, 185 self.scan_time_s, 186 self.tray_pos, 187 self.slide_pos, 188 self.camera, 189 self.objective, 190 self.pixel_size_um, 191 self.tile_width_px, 192 self.tile_height_px, 193 self.tile_overlap_proportion, 194 tuple(self.channels), 195 tuple(self.roi), 196 ) 197 198 def __hash__(self): 199 return hash(self.__key()) 200 201 def __repr__(self): 202 return yaml.dump(self, sort_keys=False) 203 204 def __eq__(self, other): 205 return self.__repr__() == other.__repr__() 206 207 def has_same_profile(self, other): 208 return ( 209 self.camera == other.camera 210 and self.objective == other.objective 211 and self.pixel_size_um == other.pixel_size_um 212 and self.tile_width_px == other.tile_width_px 213 and self.tile_height_px == other.tile_height_px 214 and self.tile_x_offset_px == other.tile_x_offset_px 215 and self.tile_y_offset_px == other.tile_y_offset_px 216 and self.tile_overlap_proportion == other.tile_overlap_proportion 217 and self.channels == other.channels 218 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 219 ) 220 221 def get_channel_names(self) -> list[str]: 222 """ 223 Get the channel names in the scan's channel order. 224 :return: a list of channel names. 225 """ 226 return [channel.name for channel in self.channels] 227 228 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 229 """ 230 Given a list of channel names, return the corresponding indices in the scan's 231 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 232 actual AlexaFluor names (AF555, AF647, AF488). 233 If a list entry is not found or None, it will return -1 for that entry. 234 :param channel_names: a list of channel names. 235 :return: a list of channel indices. 236 """ 237 # Get the scan's channel name list 238 scan_channel_names = self.get_channel_names() 239 240 channel_indices = [] 241 for name in channel_names: 242 # Convert any BZScanner channel names to the actual channel names 243 if name in self.BZSCANNER_CHANNEL_MAP: 244 name = self.BZSCANNER_CHANNEL_MAP[name] 245 246 # Append the corresponding index if possible 247 if name in scan_channel_names: 248 channel_indices.append(scan_channel_names.index(name)) 249 else: 250 channel_indices.append(-1) 251 return channel_indices 252 253 def get_image_size(self) -> tuple[int, int]: 254 """ 255 Get the real size of the image in pixels after subtracting overlap. 256 :return: a tuple of (real_height, real_width) for easy comparison to arrays 257 """ 258 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 259 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 260 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap 261 262 def save_yaml(self, output_path: str): 263 """ 264 Write the Scan object to a .yaml file. 265 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 266 :return: nothing; will raise an error on failure 267 """ 268 # Create necessary folders 269 output_path = os.path.abspath(output_path) 270 if os.path.splitext(output_path)[1] == ".yaml": 271 os.makedirs(os.path.dirname(output_path), exist_ok=True) 272 else: 273 os.makedirs(output_path, exist_ok=True) 274 # Add the standard metadata file name to the path if needed 275 output_path = os.path.join( 276 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 277 ) 278 279 # Populate the file 280 with open(output_path, "w") as file: 281 yaml.dump(self, stream=file, sort_keys=False) 282 283 @classmethod 284 def load_yaml(cls, input_path: str) -> Self: 285 """ 286 Load a Scan object from a .yaml file. 287 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 288 :return: a Scan object 289 """ 290 input_path = os.path.abspath(input_path) 291 if os.path.isdir(input_path): 292 input_path = os.path.join( 293 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 294 ) 295 with open(input_path, "r") as file: 296 metadata_obj = yaml.load(file, Loader=yaml.Loader) 297 return metadata_obj 298 299 def to_dict(self) -> dict: 300 """ 301 Convert the Scan object to a dictionary with keys matching database columns 302 and values matching database entries 303 :return: a dictionary 304 """ 305 # Dump to json; then add indents and a top-level key 306 channels_json = json.dumps( 307 self.channels, default=lambda x: x.__dict__, indent=2 308 ) 309 channels_json = " ".join(channels_json.splitlines(True)) 310 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 311 312 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 313 roi_json = " ".join(roi_json.splitlines(True)) 314 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 315 316 # Keys are named the same as database columns 317 return { 318 "scanner_id": self.scanner_id, 319 "slide_id": self.slide_id, 320 "exists": self.exists, 321 "path": self.path, 322 "start_datetime": self.start_datetime, 323 "end_datetime": self.end_datetime, 324 "scan_time_s": self.scan_time_s, 325 "tray_pos": self.tray_pos, 326 "slide_pos": self.slide_pos, 327 "tile_width": self.tile_width_px, 328 "tile_height": self.tile_height_px, 329 "tile_x_offset": self.tile_x_offset_px, 330 "tile_y_offset": self.tile_y_offset_px, 331 "tile_overlap": self.tile_overlap_proportion, 332 "camera": self.camera, 333 "objective": self.objective, 334 "pixel_size": self.pixel_size_um, 335 "channels": channels_json, 336 "roi": roi_json, 337 } 338 339 @classmethod 340 def from_dict(cls, scan_dict) -> Self: 341 """ 342 Convert a dictionary from to_dict() or the database to a Scan object 343 :param scan_dict: a dictionary 344 :return: a Scan object 345 """ 346 result = cls( 347 scanner_id=scan_dict["scanner_id"], 348 slide_id=scan_dict["slide_id"], 349 path=scan_dict["path"], 350 exists=scan_dict["exists"], 351 start_datetime=scan_dict["start_datetime"], 352 end_datetime=scan_dict["end_datetime"], 353 scan_time_s=scan_dict["scan_time_s"], 354 tray_pos=scan_dict["tray_pos"], 355 slide_pos=scan_dict["slide_pos"], 356 camera=scan_dict["camera"], 357 objective=scan_dict["objective"], 358 pixel_size_um=scan_dict["pixel_size"], 359 tile_width_px=scan_dict["tile_width"], 360 tile_height_px=scan_dict["tile_height"], 361 tile_x_offset_px=scan_dict["tile_x_offset"], 362 tile_y_offset_px=scan_dict["tile_y_offset"], 363 tile_overlap_proportion=scan_dict["tile_overlap"], 364 ) 365 # Handle JSON and dictionaries 366 if isinstance(scan_dict["channels"], str): 367 channels_dict = json.loads(scan_dict["channels"])["data"] 368 else: 369 channels_dict = scan_dict["channels"]["data"] 370 for channel in channels_dict: 371 result.channels.append( 372 cls.Channel( 373 name=channel["name"], 374 exposure_ms=channel["exposure_ms"], 375 intensity=channel["intensity"], 376 gain_applied=channel["gain_applied"], 377 ) 378 ) 379 # Handle JSON and dictionaries 380 if isinstance(scan_dict["channels"], str): 381 roi_dict = json.loads(scan_dict["roi"])["data"] 382 else: 383 roi_dict = scan_dict["roi"]["data"] 384 for roi in roi_dict: 385 result.roi.append( 386 cls.ROI( 387 origin_x_um=roi["origin_x_um"], 388 origin_y_um=roi["origin_y_um"], 389 width_um=roi["width_um"], 390 height_um=roi["height_um"], 391 tile_rows=roi["tile_rows"], 392 tile_cols=roi["tile_cols"], 393 focus_points=roi["focus_points"], 394 ) 395 ) 396 return result 397 398 @classmethod 399 def load_czi(cls, input_path: str) -> Self: 400 """ 401 Extracts metadata from a .czi file, which is the output of the Axioscan 402 :param input_path: the path to the .czi file 403 :return: a Scan object 404 """ 405 if aicspylibczi is None: 406 raise ModuleNotFoundError( 407 "aicspylibczi library not installed. " 408 "Install csi-images with [imageio] option to resolve." 409 ) 410 411 # Normalize paths 412 input_path = os.path.abspath(input_path) 413 414 with open(input_path, "rb") as file: 415 # Read in metadata as XML elements 416 metadata_xml = aicspylibczi.CziFile(file).meta 417 # Read in shape metadata from binary 418 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 419 420 # Populate metadata 421 scan = cls() 422 423 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 424 if scan.slide_id is not None: 425 scan.slide_id = scan.slide_id.strip().upper() 426 # Map the raw scanner ID (service ID) to our IDs 427 scan.scanner_id = cls.SCANNER_IDS[ 428 metadata_xml.find(".//Microscope/UserDefinedName").text 429 ] 430 431 # Extract start and finish datetimes 432 date = metadata_xml.find(".//Document/CreationDate").text 433 # Strip out sub-second precision 434 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 435 date_as_datetime = datetime.datetime.strptime( 436 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 437 ) 438 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 439 scan.scan_time_s = round( 440 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 441 ) 442 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 443 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 444 445 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 446 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 447 448 # Get camera and magnifying info 449 scan.camera = ( 450 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 451 )["Name"] 452 magnification = metadata_xml.find( 453 ".//Objectives/Objective/NominalMagnification" 454 ) 455 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 456 scan.objective = f"{magnification.text}x-{aperture.text}" 457 scan.pixel_size_um = ( 458 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 459 ) 460 # Round off the pixel size to nanometers; might not be optimal, but this 461 # gets rounded when we send it to the database anyways (to 7 places) 462 scan.pixel_size_um = round(scan.pixel_size_um, 3) 463 464 # Get tile information 465 # Note: X Y is untested, could be flipped. I always forget. Just don't use 466 # non-square frames and we're all good. 467 selected_detector = metadata_xml.find(".//SelectedDetector").text 468 detectors = metadata_xml.findall(".//Detectors/Detector") 469 for detector in detectors: 470 if detector.attrib["Id"] == selected_detector: 471 tile_info = detector.find(".//Frame") 472 break 473 # Convert to integers 474 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 475 476 scan.tile_x_offset_px = tile_info[0] 477 scan.tile_y_offset_px = tile_info[1] 478 scan.tile_width_px = tile_info[2] 479 scan.tile_height_px = tile_info[3] 480 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 481 482 # Extract channels and create Channel objects from them 483 channel_indices = [] 484 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 485 channel_indices.append(int(channel.attrib["Id"][-1])) 486 intensity_xml = channel.find(".//Intensity") 487 if intensity_xml is None: 488 intensity = 0 489 else: 490 intensity = float(intensity_xml.text[:-2]) * 1e-2 491 scan.channels.append( 492 cls.Channel( 493 name=channel.attrib["Name"].upper(), 494 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 495 intensity=intensity, 496 gain_applied=True, # In Axioscan, we will always use gain = 1 497 ) 498 ) 499 # Make sure the channels are sorted 500 scan.channels = [ 501 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 502 ] 503 # Verify that the shape corresponds to the channels 504 for roi in rois_shape: 505 if roi["C"][1] != len(scan.channels): 506 raise ValueError( 507 f"Number of channels {len(scan.channels)} " 508 f"is not the same as the number of channels in an ROI: " 509 f"{roi['C'][1]}" 510 ) 511 512 # Get the real ROI limits; the metadata is not always correct 513 limits_xml = metadata_xml.findall(".//AllowedScanArea") 514 limits = [ 515 round(float(limits_xml[0].find("Center").text.split(",")[0])), 516 round(float(limits_xml[0].find("Center").text.split(",")[1])), 517 round(float(limits_xml[0].find("Size").text.split(",")[0])), 518 round(float(limits_xml[0].find("Size").text.split(",")[1])), 519 ] 520 # Convert to top-left and bottom-right 521 limits = [ 522 round(limits[0] - limits[2] / 2), 523 round(limits[1] - limits[3] / 2), 524 round(limits[0] + limits[2] / 2), 525 round(limits[1] + limits[3] / 2), 526 ] 527 528 # Extract ROIs and create ROI objects from them 529 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 530 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 531 if len(rois_xml_metadata) != len(rois_shape): 532 raise ValueError( 533 f"Metadata and binary data from {input_path} " 534 f"do not match in number of ROIs" 535 ) 536 # We need both to determine the number of rows/columns because the XML lies 537 roi_indices = [] 538 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 539 name = roi_xml.attrib["Name"] 540 # Determine the index of this scene 541 scene_index = -1 542 for scene in scenes_xml_metadata: 543 if scene.attrib["Name"] == name: 544 scene_index = int(scene.attrib["Index"]) 545 break 546 if scene_index == -1: 547 raise ValueError(f"ROI {name} does not correspond to any scenes") 548 else: 549 roi_indices.append(scene_index) 550 # Extract other metadata 551 roi_limits = [ 552 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 553 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 554 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 555 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 556 ] 557 # Convert to top-left and bottom-right 558 roi_limits = [ 559 round(roi_limits[0] - roi_limits[2] / 2), 560 round(roi_limits[1] - roi_limits[3] / 2), 561 round(roi_limits[0] + roi_limits[2] / 2), 562 round(roi_limits[1] + roi_limits[3] / 2), 563 ] 564 # Bound the ROI to the actual scan limits 565 roi_limits = [ 566 max(roi_limits[0], limits[0]), 567 max(roi_limits[1], limits[1]), 568 min(roi_limits[2], limits[2]), 569 min(roi_limits[3], limits[3]), 570 ] 571 572 tile_rows = int(roi_xml.find("Rows").text) 573 # Current best way of reliably extracting; <Columns> entry can be wrong 574 if (roi_shape["M"][1] % tile_rows) != 0: 575 raise ValueError( 576 f"The number of tiles {roi_shape['M'][1]} is not " 577 f"divisible by the tile rows {tile_rows}; metadata " 578 f"must be messed up. Thanks Zeiss" 579 ) 580 else: 581 tile_cols = int(roi_shape["M"][1] / tile_rows) 582 # Support points are actually the relevant focus points for this ROI 583 focus_points = [] 584 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 585 focus_points.append( 586 [ 587 int(float(focus_point.find("X").text)), 588 int(float(focus_point.find("Y").text)), 589 int(float(focus_point.find("Z").text)), 590 ] 591 ) 592 # Strip all sub-micron precision, it does not matter 593 scan.roi.append( 594 cls.ROI( 595 origin_x_um=roi_limits[0], 596 origin_y_um=roi_limits[1], 597 width_um=roi_limits[2] - roi_limits[0], 598 height_um=roi_limits[3] - roi_limits[1], 599 tile_rows=tile_rows, 600 tile_cols=tile_cols, 601 focus_points=focus_points, 602 ) 603 ) 604 # Sort based on the scene indices 605 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 606 607 return scan 608 609 @classmethod 610 def load_txt(cls, input_path: str) -> Self: 611 """ 612 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 613 from the BZScanner. Some metadata is filled in or adjusted to fit 614 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 615 :return: a Scan object 616 """ 617 # Set paths 618 input_path = os.path.abspath(input_path) 619 if os.path.isdir(input_path): 620 input_path = os.path.join( 621 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 622 ) 623 624 # Read in metadata as a dict 625 with open(input_path, "r") as file: 626 metadata_contents = file.read() 627 # Read each line, splitting on the = sign 628 metadata_dict = {} 629 for line in metadata_contents.splitlines(): 630 key, value = line.split("=") 631 metadata_dict[key] = value 632 633 # Populate metadata 634 scan = cls() 635 636 scan.slide_id = metadata_dict["SLIDEID"] 637 scan.slide_id = scan.slide_id.strip().upper() 638 639 scan.path = metadata_dict["SLIDEDIR"] 640 641 # Extract start and finish datetimes 642 date = metadata_dict["DATE"] 643 date_as_datetime = datetime.datetime.strptime( 644 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 645 ) 646 date_as_datetime = date_as_datetime.astimezone( 647 zoneinfo.ZoneInfo("America/Los_Angeles") 648 ) # Hardcoded because BZScanners are here 649 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 650 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 651 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 652 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 653 654 # Map the raw scanner ID (service ID) to our IDs 655 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 656 scan.tray_pos = 0 # only one tray_pos in a BZScanner 657 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 658 659 # Get camera and magnifying info 660 scan.camera = "" 661 magnification = 10 662 aperture = 0 # TODO: find the actual aperture 663 scan.objective = f"{magnification}x-{aperture}" 664 scan.pixel_size_um = 0.591 # Estimated from image metadata 665 666 # Get tile information 667 scan.tile_width_px = 1362 # Known from image metadata 668 scan.tile_height_px = 1004 # Known from image metadata 669 scan.tile_x_offset_px = 0 # Already removed 670 scan.tile_y_offset_px = 0 # Already removed 671 scan.tile_overlap_proportion = 0 # Already removed 672 673 # Extract channels and create Channel objects from them 674 if "gain_applied" in metadata_dict: 675 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 676 else: 677 gain_applied = True # Previous policy was always to apply gains 678 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 679 channel_settings = metadata_dict[channel].split(",") 680 if channel_settings[0] == "0": 681 continue 682 scan.channels.append( 683 cls.Channel( 684 name=cls.BZSCANNER_CHANNEL_MAP[channel], 685 exposure_ms=float(channel_settings[1]), 686 intensity=float(channel_settings[2]), 687 gain_applied=gain_applied, 688 ) 689 ) 690 691 # Get focus points 692 focus_points = [] 693 for i in range(33): 694 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 695 if focus_point[0] == "0": 696 break 697 focus_points.append( 698 [ 699 int(float(focus_point[1])), 700 int(float(focus_point[2])), 701 int(float(focus_point[3])), 702 ] 703 ) 704 705 # In the BZScanner, the slide is vertical instead of horizontal 706 # We put in nominal values for the ROI, which is oriented vertically as well 707 tile_rows = 96 708 tile_cols = 24 709 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 710 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 711 origin_x_um = 2500 + round((20000 - roi_width) / 2) 712 origin_y_um = 2500 + round((58000 - roi_height) / 2) 713 scan.roi.append( 714 cls.ROI( 715 origin_x_um=origin_x_um, 716 origin_y_um=origin_y_um, 717 width_um=roi_width, 718 height_um=roi_height, 719 tile_rows=tile_rows, 720 tile_cols=tile_cols, 721 focus_points=focus_points, 722 ) 723 ) 724 return scan 725 726 @classmethod 727 def load_from_folder(cls, input_path: str) -> Self: 728 """ 729 Load a Scan object from a folder that contains defaultly-named metadata files, 730 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 731 :param input_path: /path/to/folder 732 :return: a Scan object 733 """ 734 input_path = os.path.abspath(input_path) 735 if os.path.isfile( 736 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 737 ): 738 return cls.load_yaml(input_path) 739 elif os.path.isfile( 740 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 741 ): 742 return cls.load_txt(input_path) 743 else: 744 raise ValueError( 745 f"No scan metadata files " 746 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 747 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 748 f"{input_path}" 749 ) 750 pass 751 752 @classmethod 753 def make_placeholder( 754 cls, 755 slide_id: str, 756 n_tile: int = 2303, 757 n_roi: int = 0, 758 scanner_type: Type = Type.BZSCANNER, 759 ) -> Self: 760 """ 761 Make a placeholder Scan object with only basic required information filled in. 762 :param slide_id: the slide ID 763 :param n_tile: the number of this tile, which will become the number of 764 tiles in the scan 765 :param n_roi: the number of ROIs in the scan 766 :param scanner_type: the scanner type 767 :return: a Scan object 768 """ 769 # Sanitize inputs here 770 slide_id = str(slide_id).strip().upper() 771 n_tile = int(n_tile) 772 n_roi = int(n_roi) 773 # Generate the object 774 scan = cls() 775 scan.slide_id = slide_id 776 if scanner_type == cls.Type.AXIOSCAN7: 777 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 778 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 779 scan.roi[0].tile_rows = 17 780 scan.roi[0].tile_cols = (n_tile // 17) + 1 781 scan.channels = [ 782 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 783 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 784 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 785 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 786 ] 787 elif scanner_type == cls.Type.BZSCANNER: 788 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 789 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 790 scan.roi[0].tile_rows = 96 791 scan.roi[0].tile_cols = 24 792 scan.channels = [ 793 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 794 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 795 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 796 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 797 ] 798 return scan
24class Scan(yaml.YAMLObject): 25 """ 26 Class that composes a whole scan's metadata. Contains some universal data, 27 plus lists for channels and ROIs. 28 29 .. include:: ../docs/coordinate_systems.md 30 """ 31 32 yaml_tag = "csi_images.csi_scans.Scan" 33 34 class Type(enum.Enum): 35 BZSCANNER = "bzscanner" 36 AXIOSCAN7 = "axioscan7" 37 38 SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"} 39 """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs""" 40 41 METADATA_FILE_NAME = { 42 Type.AXIOSCAN7: "scan.yaml", 43 Type.BZSCANNER: "slideinfo.txt", 44 } 45 STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" 46 DATETIME_FORMAT = { 47 Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT, 48 Type.BZSCANNER: "%a %b %d %H:%M:%S %Y", 49 } 50 51 # Actual channel names, from the BZScanner's default order 52 BZSCANNER_CHANNEL_MAP = { 53 "DAPI": "DAPI", 54 "TRITC": "AF555", 55 "CY5": "AF647", 56 "BF": "BRIGHT", 57 "FITC": "AF488", 58 } 59 60 class Channel(yaml.YAMLObject): 61 """ 62 Class that comprises a channel; we usually have multiple (2-5) per scan. 63 Contains three fields: 64 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 65 - exposure_ms: the exposure time to capture a frame in milliseconds 66 - intensity: the light intensity used OR the gain applied to the channel 67 """ 68 69 yaml_tag = "csi_images.csi_scans.Scan.Channel" 70 71 def __init__( 72 self, 73 name: str = "", 74 exposure_ms: float = -1.0, 75 intensity: float = -1.0, 76 gain_applied: bool = True, 77 ): 78 self.name = name 79 self.exposure_ms = exposure_ms 80 self.intensity = intensity 81 self.gain_applied = gain_applied 82 83 def __repr__(self): 84 return yaml.dump(self, sort_keys=False) 85 86 def __eq__(self, other): 87 return self.__repr__() == other.__repr__() 88 89 class ROI(yaml.YAMLObject): 90 """ 91 Class that comprises an ROI; we usually have 1, but may have more in a scan. 92 """ 93 94 yaml_tag = "csi_images.csi_scans.Scan.ROI" 95 96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points 115 116 def __repr__(self): 117 return yaml.dump(self, sort_keys=False) 118 119 def __eq__(self, other): 120 return self.__repr__() == other.__repr__() 121 122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 ) 131 132 def __init__( 133 self, 134 slide_id: str = "", 135 scanner_id: str = "", 136 path: str = "", 137 exists: bool = True, 138 start_datetime: str = "", 139 end_datetime: str = "", 140 scan_time_s: int = -1, 141 tray_pos: int = -1, 142 slide_pos: int = -1, 143 camera: str = "", 144 objective: str = "", 145 pixel_size_um: float = -1.0, 146 tile_width_px: int = -1, 147 tile_height_px: int = -1, 148 tile_x_offset_px: int = -1, 149 tile_y_offset_px: int = -1, 150 tile_overlap_proportion: int = -1, 151 channels: list[Channel] = None, 152 roi: list[ROI] = None, 153 ): 154 if roi is None: 155 roi = [] 156 if channels is None: 157 channels = [] 158 self.slide_id = slide_id 159 self.scanner_id = scanner_id 160 self.path = path 161 self.exists = exists 162 self.start_datetime = start_datetime 163 self.end_datetime = end_datetime 164 self.scan_time_s = scan_time_s 165 self.tray_pos = tray_pos 166 self.slide_pos = slide_pos 167 self.camera = camera 168 self.objective = objective 169 self.pixel_size_um = pixel_size_um 170 self.tile_width_px = tile_width_px 171 self.tile_height_px = tile_height_px 172 self.tile_x_offset_px = tile_x_offset_px 173 self.tile_y_offset_px = tile_y_offset_px 174 self.tile_overlap_proportion = tile_overlap_proportion 175 self.channels = channels 176 self.roi = roi 177 178 def __key(self): 179 return ( 180 self.slide_id, 181 self.scanner_id, 182 self.path, 183 self.exists, 184 self.start_datetime, 185 self.end_datetime, 186 self.scan_time_s, 187 self.tray_pos, 188 self.slide_pos, 189 self.camera, 190 self.objective, 191 self.pixel_size_um, 192 self.tile_width_px, 193 self.tile_height_px, 194 self.tile_overlap_proportion, 195 tuple(self.channels), 196 tuple(self.roi), 197 ) 198 199 def __hash__(self): 200 return hash(self.__key()) 201 202 def __repr__(self): 203 return yaml.dump(self, sort_keys=False) 204 205 def __eq__(self, other): 206 return self.__repr__() == other.__repr__() 207 208 def has_same_profile(self, other): 209 return ( 210 self.camera == other.camera 211 and self.objective == other.objective 212 and self.pixel_size_um == other.pixel_size_um 213 and self.tile_width_px == other.tile_width_px 214 and self.tile_height_px == other.tile_height_px 215 and self.tile_x_offset_px == other.tile_x_offset_px 216 and self.tile_y_offset_px == other.tile_y_offset_px 217 and self.tile_overlap_proportion == other.tile_overlap_proportion 218 and self.channels == other.channels 219 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 220 ) 221 222 def get_channel_names(self) -> list[str]: 223 """ 224 Get the channel names in the scan's channel order. 225 :return: a list of channel names. 226 """ 227 return [channel.name for channel in self.channels] 228 229 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 230 """ 231 Given a list of channel names, return the corresponding indices in the scan's 232 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 233 actual AlexaFluor names (AF555, AF647, AF488). 234 If a list entry is not found or None, it will return -1 for that entry. 235 :param channel_names: a list of channel names. 236 :return: a list of channel indices. 237 """ 238 # Get the scan's channel name list 239 scan_channel_names = self.get_channel_names() 240 241 channel_indices = [] 242 for name in channel_names: 243 # Convert any BZScanner channel names to the actual channel names 244 if name in self.BZSCANNER_CHANNEL_MAP: 245 name = self.BZSCANNER_CHANNEL_MAP[name] 246 247 # Append the corresponding index if possible 248 if name in scan_channel_names: 249 channel_indices.append(scan_channel_names.index(name)) 250 else: 251 channel_indices.append(-1) 252 return channel_indices 253 254 def get_image_size(self) -> tuple[int, int]: 255 """ 256 Get the real size of the image in pixels after subtracting overlap. 257 :return: a tuple of (real_height, real_width) for easy comparison to arrays 258 """ 259 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 260 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 261 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap 262 263 def save_yaml(self, output_path: str): 264 """ 265 Write the Scan object to a .yaml file. 266 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 267 :return: nothing; will raise an error on failure 268 """ 269 # Create necessary folders 270 output_path = os.path.abspath(output_path) 271 if os.path.splitext(output_path)[1] == ".yaml": 272 os.makedirs(os.path.dirname(output_path), exist_ok=True) 273 else: 274 os.makedirs(output_path, exist_ok=True) 275 # Add the standard metadata file name to the path if needed 276 output_path = os.path.join( 277 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 278 ) 279 280 # Populate the file 281 with open(output_path, "w") as file: 282 yaml.dump(self, stream=file, sort_keys=False) 283 284 @classmethod 285 def load_yaml(cls, input_path: str) -> Self: 286 """ 287 Load a Scan object from a .yaml file. 288 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 289 :return: a Scan object 290 """ 291 input_path = os.path.abspath(input_path) 292 if os.path.isdir(input_path): 293 input_path = os.path.join( 294 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 295 ) 296 with open(input_path, "r") as file: 297 metadata_obj = yaml.load(file, Loader=yaml.Loader) 298 return metadata_obj 299 300 def to_dict(self) -> dict: 301 """ 302 Convert the Scan object to a dictionary with keys matching database columns 303 and values matching database entries 304 :return: a dictionary 305 """ 306 # Dump to json; then add indents and a top-level key 307 channels_json = json.dumps( 308 self.channels, default=lambda x: x.__dict__, indent=2 309 ) 310 channels_json = " ".join(channels_json.splitlines(True)) 311 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 312 313 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 314 roi_json = " ".join(roi_json.splitlines(True)) 315 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 316 317 # Keys are named the same as database columns 318 return { 319 "scanner_id": self.scanner_id, 320 "slide_id": self.slide_id, 321 "exists": self.exists, 322 "path": self.path, 323 "start_datetime": self.start_datetime, 324 "end_datetime": self.end_datetime, 325 "scan_time_s": self.scan_time_s, 326 "tray_pos": self.tray_pos, 327 "slide_pos": self.slide_pos, 328 "tile_width": self.tile_width_px, 329 "tile_height": self.tile_height_px, 330 "tile_x_offset": self.tile_x_offset_px, 331 "tile_y_offset": self.tile_y_offset_px, 332 "tile_overlap": self.tile_overlap_proportion, 333 "camera": self.camera, 334 "objective": self.objective, 335 "pixel_size": self.pixel_size_um, 336 "channels": channels_json, 337 "roi": roi_json, 338 } 339 340 @classmethod 341 def from_dict(cls, scan_dict) -> Self: 342 """ 343 Convert a dictionary from to_dict() or the database to a Scan object 344 :param scan_dict: a dictionary 345 :return: a Scan object 346 """ 347 result = cls( 348 scanner_id=scan_dict["scanner_id"], 349 slide_id=scan_dict["slide_id"], 350 path=scan_dict["path"], 351 exists=scan_dict["exists"], 352 start_datetime=scan_dict["start_datetime"], 353 end_datetime=scan_dict["end_datetime"], 354 scan_time_s=scan_dict["scan_time_s"], 355 tray_pos=scan_dict["tray_pos"], 356 slide_pos=scan_dict["slide_pos"], 357 camera=scan_dict["camera"], 358 objective=scan_dict["objective"], 359 pixel_size_um=scan_dict["pixel_size"], 360 tile_width_px=scan_dict["tile_width"], 361 tile_height_px=scan_dict["tile_height"], 362 tile_x_offset_px=scan_dict["tile_x_offset"], 363 tile_y_offset_px=scan_dict["tile_y_offset"], 364 tile_overlap_proportion=scan_dict["tile_overlap"], 365 ) 366 # Handle JSON and dictionaries 367 if isinstance(scan_dict["channels"], str): 368 channels_dict = json.loads(scan_dict["channels"])["data"] 369 else: 370 channels_dict = scan_dict["channels"]["data"] 371 for channel in channels_dict: 372 result.channels.append( 373 cls.Channel( 374 name=channel["name"], 375 exposure_ms=channel["exposure_ms"], 376 intensity=channel["intensity"], 377 gain_applied=channel["gain_applied"], 378 ) 379 ) 380 # Handle JSON and dictionaries 381 if isinstance(scan_dict["channels"], str): 382 roi_dict = json.loads(scan_dict["roi"])["data"] 383 else: 384 roi_dict = scan_dict["roi"]["data"] 385 for roi in roi_dict: 386 result.roi.append( 387 cls.ROI( 388 origin_x_um=roi["origin_x_um"], 389 origin_y_um=roi["origin_y_um"], 390 width_um=roi["width_um"], 391 height_um=roi["height_um"], 392 tile_rows=roi["tile_rows"], 393 tile_cols=roi["tile_cols"], 394 focus_points=roi["focus_points"], 395 ) 396 ) 397 return result 398 399 @classmethod 400 def load_czi(cls, input_path: str) -> Self: 401 """ 402 Extracts metadata from a .czi file, which is the output of the Axioscan 403 :param input_path: the path to the .czi file 404 :return: a Scan object 405 """ 406 if aicspylibczi is None: 407 raise ModuleNotFoundError( 408 "aicspylibczi library not installed. " 409 "Install csi-images with [imageio] option to resolve." 410 ) 411 412 # Normalize paths 413 input_path = os.path.abspath(input_path) 414 415 with open(input_path, "rb") as file: 416 # Read in metadata as XML elements 417 metadata_xml = aicspylibczi.CziFile(file).meta 418 # Read in shape metadata from binary 419 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 420 421 # Populate metadata 422 scan = cls() 423 424 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 425 if scan.slide_id is not None: 426 scan.slide_id = scan.slide_id.strip().upper() 427 # Map the raw scanner ID (service ID) to our IDs 428 scan.scanner_id = cls.SCANNER_IDS[ 429 metadata_xml.find(".//Microscope/UserDefinedName").text 430 ] 431 432 # Extract start and finish datetimes 433 date = metadata_xml.find(".//Document/CreationDate").text 434 # Strip out sub-second precision 435 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 436 date_as_datetime = datetime.datetime.strptime( 437 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 438 ) 439 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 440 scan.scan_time_s = round( 441 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 442 ) 443 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 444 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 445 446 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 447 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 448 449 # Get camera and magnifying info 450 scan.camera = ( 451 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 452 )["Name"] 453 magnification = metadata_xml.find( 454 ".//Objectives/Objective/NominalMagnification" 455 ) 456 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 457 scan.objective = f"{magnification.text}x-{aperture.text}" 458 scan.pixel_size_um = ( 459 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 460 ) 461 # Round off the pixel size to nanometers; might not be optimal, but this 462 # gets rounded when we send it to the database anyways (to 7 places) 463 scan.pixel_size_um = round(scan.pixel_size_um, 3) 464 465 # Get tile information 466 # Note: X Y is untested, could be flipped. I always forget. Just don't use 467 # non-square frames and we're all good. 468 selected_detector = metadata_xml.find(".//SelectedDetector").text 469 detectors = metadata_xml.findall(".//Detectors/Detector") 470 for detector in detectors: 471 if detector.attrib["Id"] == selected_detector: 472 tile_info = detector.find(".//Frame") 473 break 474 # Convert to integers 475 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 476 477 scan.tile_x_offset_px = tile_info[0] 478 scan.tile_y_offset_px = tile_info[1] 479 scan.tile_width_px = tile_info[2] 480 scan.tile_height_px = tile_info[3] 481 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 482 483 # Extract channels and create Channel objects from them 484 channel_indices = [] 485 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 486 channel_indices.append(int(channel.attrib["Id"][-1])) 487 intensity_xml = channel.find(".//Intensity") 488 if intensity_xml is None: 489 intensity = 0 490 else: 491 intensity = float(intensity_xml.text[:-2]) * 1e-2 492 scan.channels.append( 493 cls.Channel( 494 name=channel.attrib["Name"].upper(), 495 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 496 intensity=intensity, 497 gain_applied=True, # In Axioscan, we will always use gain = 1 498 ) 499 ) 500 # Make sure the channels are sorted 501 scan.channels = [ 502 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 503 ] 504 # Verify that the shape corresponds to the channels 505 for roi in rois_shape: 506 if roi["C"][1] != len(scan.channels): 507 raise ValueError( 508 f"Number of channels {len(scan.channels)} " 509 f"is not the same as the number of channels in an ROI: " 510 f"{roi['C'][1]}" 511 ) 512 513 # Get the real ROI limits; the metadata is not always correct 514 limits_xml = metadata_xml.findall(".//AllowedScanArea") 515 limits = [ 516 round(float(limits_xml[0].find("Center").text.split(",")[0])), 517 round(float(limits_xml[0].find("Center").text.split(",")[1])), 518 round(float(limits_xml[0].find("Size").text.split(",")[0])), 519 round(float(limits_xml[0].find("Size").text.split(",")[1])), 520 ] 521 # Convert to top-left and bottom-right 522 limits = [ 523 round(limits[0] - limits[2] / 2), 524 round(limits[1] - limits[3] / 2), 525 round(limits[0] + limits[2] / 2), 526 round(limits[1] + limits[3] / 2), 527 ] 528 529 # Extract ROIs and create ROI objects from them 530 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 531 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 532 if len(rois_xml_metadata) != len(rois_shape): 533 raise ValueError( 534 f"Metadata and binary data from {input_path} " 535 f"do not match in number of ROIs" 536 ) 537 # We need both to determine the number of rows/columns because the XML lies 538 roi_indices = [] 539 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 540 name = roi_xml.attrib["Name"] 541 # Determine the index of this scene 542 scene_index = -1 543 for scene in scenes_xml_metadata: 544 if scene.attrib["Name"] == name: 545 scene_index = int(scene.attrib["Index"]) 546 break 547 if scene_index == -1: 548 raise ValueError(f"ROI {name} does not correspond to any scenes") 549 else: 550 roi_indices.append(scene_index) 551 # Extract other metadata 552 roi_limits = [ 553 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 554 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 555 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 556 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 557 ] 558 # Convert to top-left and bottom-right 559 roi_limits = [ 560 round(roi_limits[0] - roi_limits[2] / 2), 561 round(roi_limits[1] - roi_limits[3] / 2), 562 round(roi_limits[0] + roi_limits[2] / 2), 563 round(roi_limits[1] + roi_limits[3] / 2), 564 ] 565 # Bound the ROI to the actual scan limits 566 roi_limits = [ 567 max(roi_limits[0], limits[0]), 568 max(roi_limits[1], limits[1]), 569 min(roi_limits[2], limits[2]), 570 min(roi_limits[3], limits[3]), 571 ] 572 573 tile_rows = int(roi_xml.find("Rows").text) 574 # Current best way of reliably extracting; <Columns> entry can be wrong 575 if (roi_shape["M"][1] % tile_rows) != 0: 576 raise ValueError( 577 f"The number of tiles {roi_shape['M'][1]} is not " 578 f"divisible by the tile rows {tile_rows}; metadata " 579 f"must be messed up. Thanks Zeiss" 580 ) 581 else: 582 tile_cols = int(roi_shape["M"][1] / tile_rows) 583 # Support points are actually the relevant focus points for this ROI 584 focus_points = [] 585 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 586 focus_points.append( 587 [ 588 int(float(focus_point.find("X").text)), 589 int(float(focus_point.find("Y").text)), 590 int(float(focus_point.find("Z").text)), 591 ] 592 ) 593 # Strip all sub-micron precision, it does not matter 594 scan.roi.append( 595 cls.ROI( 596 origin_x_um=roi_limits[0], 597 origin_y_um=roi_limits[1], 598 width_um=roi_limits[2] - roi_limits[0], 599 height_um=roi_limits[3] - roi_limits[1], 600 tile_rows=tile_rows, 601 tile_cols=tile_cols, 602 focus_points=focus_points, 603 ) 604 ) 605 # Sort based on the scene indices 606 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 607 608 return scan 609 610 @classmethod 611 def load_txt(cls, input_path: str) -> Self: 612 """ 613 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 614 from the BZScanner. Some metadata is filled in or adjusted to fit 615 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 616 :return: a Scan object 617 """ 618 # Set paths 619 input_path = os.path.abspath(input_path) 620 if os.path.isdir(input_path): 621 input_path = os.path.join( 622 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 623 ) 624 625 # Read in metadata as a dict 626 with open(input_path, "r") as file: 627 metadata_contents = file.read() 628 # Read each line, splitting on the = sign 629 metadata_dict = {} 630 for line in metadata_contents.splitlines(): 631 key, value = line.split("=") 632 metadata_dict[key] = value 633 634 # Populate metadata 635 scan = cls() 636 637 scan.slide_id = metadata_dict["SLIDEID"] 638 scan.slide_id = scan.slide_id.strip().upper() 639 640 scan.path = metadata_dict["SLIDEDIR"] 641 642 # Extract start and finish datetimes 643 date = metadata_dict["DATE"] 644 date_as_datetime = datetime.datetime.strptime( 645 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 646 ) 647 date_as_datetime = date_as_datetime.astimezone( 648 zoneinfo.ZoneInfo("America/Los_Angeles") 649 ) # Hardcoded because BZScanners are here 650 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 651 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 652 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 653 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 654 655 # Map the raw scanner ID (service ID) to our IDs 656 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 657 scan.tray_pos = 0 # only one tray_pos in a BZScanner 658 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 659 660 # Get camera and magnifying info 661 scan.camera = "" 662 magnification = 10 663 aperture = 0 # TODO: find the actual aperture 664 scan.objective = f"{magnification}x-{aperture}" 665 scan.pixel_size_um = 0.591 # Estimated from image metadata 666 667 # Get tile information 668 scan.tile_width_px = 1362 # Known from image metadata 669 scan.tile_height_px = 1004 # Known from image metadata 670 scan.tile_x_offset_px = 0 # Already removed 671 scan.tile_y_offset_px = 0 # Already removed 672 scan.tile_overlap_proportion = 0 # Already removed 673 674 # Extract channels and create Channel objects from them 675 if "gain_applied" in metadata_dict: 676 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 677 else: 678 gain_applied = True # Previous policy was always to apply gains 679 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 680 channel_settings = metadata_dict[channel].split(",") 681 if channel_settings[0] == "0": 682 continue 683 scan.channels.append( 684 cls.Channel( 685 name=cls.BZSCANNER_CHANNEL_MAP[channel], 686 exposure_ms=float(channel_settings[1]), 687 intensity=float(channel_settings[2]), 688 gain_applied=gain_applied, 689 ) 690 ) 691 692 # Get focus points 693 focus_points = [] 694 for i in range(33): 695 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 696 if focus_point[0] == "0": 697 break 698 focus_points.append( 699 [ 700 int(float(focus_point[1])), 701 int(float(focus_point[2])), 702 int(float(focus_point[3])), 703 ] 704 ) 705 706 # In the BZScanner, the slide is vertical instead of horizontal 707 # We put in nominal values for the ROI, which is oriented vertically as well 708 tile_rows = 96 709 tile_cols = 24 710 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 711 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 712 origin_x_um = 2500 + round((20000 - roi_width) / 2) 713 origin_y_um = 2500 + round((58000 - roi_height) / 2) 714 scan.roi.append( 715 cls.ROI( 716 origin_x_um=origin_x_um, 717 origin_y_um=origin_y_um, 718 width_um=roi_width, 719 height_um=roi_height, 720 tile_rows=tile_rows, 721 tile_cols=tile_cols, 722 focus_points=focus_points, 723 ) 724 ) 725 return scan 726 727 @classmethod 728 def load_from_folder(cls, input_path: str) -> Self: 729 """ 730 Load a Scan object from a folder that contains defaultly-named metadata files, 731 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 732 :param input_path: /path/to/folder 733 :return: a Scan object 734 """ 735 input_path = os.path.abspath(input_path) 736 if os.path.isfile( 737 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 738 ): 739 return cls.load_yaml(input_path) 740 elif os.path.isfile( 741 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 742 ): 743 return cls.load_txt(input_path) 744 else: 745 raise ValueError( 746 f"No scan metadata files " 747 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 748 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 749 f"{input_path}" 750 ) 751 pass 752 753 @classmethod 754 def make_placeholder( 755 cls, 756 slide_id: str, 757 n_tile: int = 2303, 758 n_roi: int = 0, 759 scanner_type: Type = Type.BZSCANNER, 760 ) -> Self: 761 """ 762 Make a placeholder Scan object with only basic required information filled in. 763 :param slide_id: the slide ID 764 :param n_tile: the number of this tile, which will become the number of 765 tiles in the scan 766 :param n_roi: the number of ROIs in the scan 767 :param scanner_type: the scanner type 768 :return: a Scan object 769 """ 770 # Sanitize inputs here 771 slide_id = str(slide_id).strip().upper() 772 n_tile = int(n_tile) 773 n_roi = int(n_roi) 774 # Generate the object 775 scan = cls() 776 scan.slide_id = slide_id 777 if scanner_type == cls.Type.AXIOSCAN7: 778 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 779 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 780 scan.roi[0].tile_rows = 17 781 scan.roi[0].tile_cols = (n_tile // 17) + 1 782 scan.channels = [ 783 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 784 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 785 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 786 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 787 ] 788 elif scanner_type == cls.Type.BZSCANNER: 789 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 790 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 791 scan.roi[0].tile_rows = 96 792 scan.roi[0].tile_cols = 24 793 scan.channels = [ 794 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 795 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 796 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 797 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 798 ] 799 return scan
Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.
Scans & Scanners
Scans are the highest-level data structure, indicating the key parameters of a scan such as the area scanned, the dimensions of output images, and the channels used. Scans also include metadata such as the scanner ID, slide ID, where the images should be, etc.
Coordinate Frames
There are three levels of coordinate frames in a scan. From inside-out, we have:
- Tile coordinate frame. Events are provided with simple integer (x, y) pixel coordinates, which makes it easy to crop and manipulate images. The origin is at the top-left corner as with normal image axes.
- Scan coordinate frame. Each scanner has its own coordinate frame, which is determined by the scanner's hardware. The scanner coordinate frame is used to convert between in-frame pixel coordinates and micrometers. The origin varies by the scanner, but generally resides in the top-left of the scanner's movable stage. In cases where there are multiple slide slots, the origin is assumed to be at the top left of the current slide. The slide may be oriented horizontally, vertically, or upside-down; this all depends on the scanner.
- Slide coordinate frame. This is a set coordinate frame where:
- Slide is active area up.
- Slide is oriented horizontally.
- Slide label area is on the left.
- Origin is at the top-left corner.
Generally speaking, we should always compare scanners by converting them to the slide coordinate frame. Events in the scan and slide coordinate frame are referred to in micrometers ($\mu$m).
132 def __init__( 133 self, 134 slide_id: str = "", 135 scanner_id: str = "", 136 path: str = "", 137 exists: bool = True, 138 start_datetime: str = "", 139 end_datetime: str = "", 140 scan_time_s: int = -1, 141 tray_pos: int = -1, 142 slide_pos: int = -1, 143 camera: str = "", 144 objective: str = "", 145 pixel_size_um: float = -1.0, 146 tile_width_px: int = -1, 147 tile_height_px: int = -1, 148 tile_x_offset_px: int = -1, 149 tile_y_offset_px: int = -1, 150 tile_overlap_proportion: int = -1, 151 channels: list[Channel] = None, 152 roi: list[ROI] = None, 153 ): 154 if roi is None: 155 roi = [] 156 if channels is None: 157 channels = [] 158 self.slide_id = slide_id 159 self.scanner_id = scanner_id 160 self.path = path 161 self.exists = exists 162 self.start_datetime = start_datetime 163 self.end_datetime = end_datetime 164 self.scan_time_s = scan_time_s 165 self.tray_pos = tray_pos 166 self.slide_pos = slide_pos 167 self.camera = camera 168 self.objective = objective 169 self.pixel_size_um = pixel_size_um 170 self.tile_width_px = tile_width_px 171 self.tile_height_px = tile_height_px 172 self.tile_x_offset_px = tile_x_offset_px 173 self.tile_y_offset_px = tile_y_offset_px 174 self.tile_overlap_proportion = tile_overlap_proportion 175 self.channels = channels 176 self.roi = roi
Axioscan 7 scanner IDs (service number), mapped to our scanner IDs
208 def has_same_profile(self, other): 209 return ( 210 self.camera == other.camera 211 and self.objective == other.objective 212 and self.pixel_size_um == other.pixel_size_um 213 and self.tile_width_px == other.tile_width_px 214 and self.tile_height_px == other.tile_height_px 215 and self.tile_x_offset_px == other.tile_x_offset_px 216 and self.tile_y_offset_px == other.tile_y_offset_px 217 and self.tile_overlap_proportion == other.tile_overlap_proportion 218 and self.channels == other.channels 219 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 220 )
222 def get_channel_names(self) -> list[str]: 223 """ 224 Get the channel names in the scan's channel order. 225 :return: a list of channel names. 226 """ 227 return [channel.name for channel in self.channels]
Get the channel names in the scan's channel order.
Returns
a list of channel names.
229 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 230 """ 231 Given a list of channel names, return the corresponding indices in the scan's 232 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 233 actual AlexaFluor names (AF555, AF647, AF488). 234 If a list entry is not found or None, it will return -1 for that entry. 235 :param channel_names: a list of channel names. 236 :return: a list of channel indices. 237 """ 238 # Get the scan's channel name list 239 scan_channel_names = self.get_channel_names() 240 241 channel_indices = [] 242 for name in channel_names: 243 # Convert any BZScanner channel names to the actual channel names 244 if name in self.BZSCANNER_CHANNEL_MAP: 245 name = self.BZSCANNER_CHANNEL_MAP[name] 246 247 # Append the corresponding index if possible 248 if name in scan_channel_names: 249 channel_indices.append(scan_channel_names.index(name)) 250 else: 251 channel_indices.append(-1) 252 return channel_indices
Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is not found or None, it will return -1 for that entry.
Parameters
- channel_names: a list of channel names.
Returns
a list of channel indices.
254 def get_image_size(self) -> tuple[int, int]: 255 """ 256 Get the real size of the image in pixels after subtracting overlap. 257 :return: a tuple of (real_height, real_width) for easy comparison to arrays 258 """ 259 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 260 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 261 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
Get the real size of the image in pixels after subtracting overlap.
Returns
a tuple of (real_height, real_width) for easy comparison to arrays
263 def save_yaml(self, output_path: str): 264 """ 265 Write the Scan object to a .yaml file. 266 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 267 :return: nothing; will raise an error on failure 268 """ 269 # Create necessary folders 270 output_path = os.path.abspath(output_path) 271 if os.path.splitext(output_path)[1] == ".yaml": 272 os.makedirs(os.path.dirname(output_path), exist_ok=True) 273 else: 274 os.makedirs(output_path, exist_ok=True) 275 # Add the standard metadata file name to the path if needed 276 output_path = os.path.join( 277 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 278 ) 279 280 # Populate the file 281 with open(output_path, "w") as file: 282 yaml.dump(self, stream=file, sort_keys=False)
Write the Scan object to a .yaml file.
Parameters
- output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns
nothing; will raise an error on failure
284 @classmethod 285 def load_yaml(cls, input_path: str) -> Self: 286 """ 287 Load a Scan object from a .yaml file. 288 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 289 :return: a Scan object 290 """ 291 input_path = os.path.abspath(input_path) 292 if os.path.isdir(input_path): 293 input_path = os.path.join( 294 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 295 ) 296 with open(input_path, "r") as file: 297 metadata_obj = yaml.load(file, Loader=yaml.Loader) 298 return metadata_obj
Load a Scan object from a .yaml file.
Parameters
- input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns
a Scan object
300 def to_dict(self) -> dict: 301 """ 302 Convert the Scan object to a dictionary with keys matching database columns 303 and values matching database entries 304 :return: a dictionary 305 """ 306 # Dump to json; then add indents and a top-level key 307 channels_json = json.dumps( 308 self.channels, default=lambda x: x.__dict__, indent=2 309 ) 310 channels_json = " ".join(channels_json.splitlines(True)) 311 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 312 313 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 314 roi_json = " ".join(roi_json.splitlines(True)) 315 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 316 317 # Keys are named the same as database columns 318 return { 319 "scanner_id": self.scanner_id, 320 "slide_id": self.slide_id, 321 "exists": self.exists, 322 "path": self.path, 323 "start_datetime": self.start_datetime, 324 "end_datetime": self.end_datetime, 325 "scan_time_s": self.scan_time_s, 326 "tray_pos": self.tray_pos, 327 "slide_pos": self.slide_pos, 328 "tile_width": self.tile_width_px, 329 "tile_height": self.tile_height_px, 330 "tile_x_offset": self.tile_x_offset_px, 331 "tile_y_offset": self.tile_y_offset_px, 332 "tile_overlap": self.tile_overlap_proportion, 333 "camera": self.camera, 334 "objective": self.objective, 335 "pixel_size": self.pixel_size_um, 336 "channels": channels_json, 337 "roi": roi_json, 338 }
Convert the Scan object to a dictionary with keys matching database columns and values matching database entries
Returns
a dictionary
340 @classmethod 341 def from_dict(cls, scan_dict) -> Self: 342 """ 343 Convert a dictionary from to_dict() or the database to a Scan object 344 :param scan_dict: a dictionary 345 :return: a Scan object 346 """ 347 result = cls( 348 scanner_id=scan_dict["scanner_id"], 349 slide_id=scan_dict["slide_id"], 350 path=scan_dict["path"], 351 exists=scan_dict["exists"], 352 start_datetime=scan_dict["start_datetime"], 353 end_datetime=scan_dict["end_datetime"], 354 scan_time_s=scan_dict["scan_time_s"], 355 tray_pos=scan_dict["tray_pos"], 356 slide_pos=scan_dict["slide_pos"], 357 camera=scan_dict["camera"], 358 objective=scan_dict["objective"], 359 pixel_size_um=scan_dict["pixel_size"], 360 tile_width_px=scan_dict["tile_width"], 361 tile_height_px=scan_dict["tile_height"], 362 tile_x_offset_px=scan_dict["tile_x_offset"], 363 tile_y_offset_px=scan_dict["tile_y_offset"], 364 tile_overlap_proportion=scan_dict["tile_overlap"], 365 ) 366 # Handle JSON and dictionaries 367 if isinstance(scan_dict["channels"], str): 368 channels_dict = json.loads(scan_dict["channels"])["data"] 369 else: 370 channels_dict = scan_dict["channels"]["data"] 371 for channel in channels_dict: 372 result.channels.append( 373 cls.Channel( 374 name=channel["name"], 375 exposure_ms=channel["exposure_ms"], 376 intensity=channel["intensity"], 377 gain_applied=channel["gain_applied"], 378 ) 379 ) 380 # Handle JSON and dictionaries 381 if isinstance(scan_dict["channels"], str): 382 roi_dict = json.loads(scan_dict["roi"])["data"] 383 else: 384 roi_dict = scan_dict["roi"]["data"] 385 for roi in roi_dict: 386 result.roi.append( 387 cls.ROI( 388 origin_x_um=roi["origin_x_um"], 389 origin_y_um=roi["origin_y_um"], 390 width_um=roi["width_um"], 391 height_um=roi["height_um"], 392 tile_rows=roi["tile_rows"], 393 tile_cols=roi["tile_cols"], 394 focus_points=roi["focus_points"], 395 ) 396 ) 397 return result
Convert a dictionary from to_dict() or the database to a Scan object
Parameters
- scan_dict: a dictionary
Returns
a Scan object
399 @classmethod 400 def load_czi(cls, input_path: str) -> Self: 401 """ 402 Extracts metadata from a .czi file, which is the output of the Axioscan 403 :param input_path: the path to the .czi file 404 :return: a Scan object 405 """ 406 if aicspylibczi is None: 407 raise ModuleNotFoundError( 408 "aicspylibczi library not installed. " 409 "Install csi-images with [imageio] option to resolve." 410 ) 411 412 # Normalize paths 413 input_path = os.path.abspath(input_path) 414 415 with open(input_path, "rb") as file: 416 # Read in metadata as XML elements 417 metadata_xml = aicspylibczi.CziFile(file).meta 418 # Read in shape metadata from binary 419 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 420 421 # Populate metadata 422 scan = cls() 423 424 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 425 if scan.slide_id is not None: 426 scan.slide_id = scan.slide_id.strip().upper() 427 # Map the raw scanner ID (service ID) to our IDs 428 scan.scanner_id = cls.SCANNER_IDS[ 429 metadata_xml.find(".//Microscope/UserDefinedName").text 430 ] 431 432 # Extract start and finish datetimes 433 date = metadata_xml.find(".//Document/CreationDate").text 434 # Strip out sub-second precision 435 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 436 date_as_datetime = datetime.datetime.strptime( 437 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 438 ) 439 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 440 scan.scan_time_s = round( 441 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 442 ) 443 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 444 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 445 446 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 447 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 448 449 # Get camera and magnifying info 450 scan.camera = ( 451 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 452 )["Name"] 453 magnification = metadata_xml.find( 454 ".//Objectives/Objective/NominalMagnification" 455 ) 456 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 457 scan.objective = f"{magnification.text}x-{aperture.text}" 458 scan.pixel_size_um = ( 459 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 460 ) 461 # Round off the pixel size to nanometers; might not be optimal, but this 462 # gets rounded when we send it to the database anyways (to 7 places) 463 scan.pixel_size_um = round(scan.pixel_size_um, 3) 464 465 # Get tile information 466 # Note: X Y is untested, could be flipped. I always forget. Just don't use 467 # non-square frames and we're all good. 468 selected_detector = metadata_xml.find(".//SelectedDetector").text 469 detectors = metadata_xml.findall(".//Detectors/Detector") 470 for detector in detectors: 471 if detector.attrib["Id"] == selected_detector: 472 tile_info = detector.find(".//Frame") 473 break 474 # Convert to integers 475 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 476 477 scan.tile_x_offset_px = tile_info[0] 478 scan.tile_y_offset_px = tile_info[1] 479 scan.tile_width_px = tile_info[2] 480 scan.tile_height_px = tile_info[3] 481 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 482 483 # Extract channels and create Channel objects from them 484 channel_indices = [] 485 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 486 channel_indices.append(int(channel.attrib["Id"][-1])) 487 intensity_xml = channel.find(".//Intensity") 488 if intensity_xml is None: 489 intensity = 0 490 else: 491 intensity = float(intensity_xml.text[:-2]) * 1e-2 492 scan.channels.append( 493 cls.Channel( 494 name=channel.attrib["Name"].upper(), 495 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 496 intensity=intensity, 497 gain_applied=True, # In Axioscan, we will always use gain = 1 498 ) 499 ) 500 # Make sure the channels are sorted 501 scan.channels = [ 502 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 503 ] 504 # Verify that the shape corresponds to the channels 505 for roi in rois_shape: 506 if roi["C"][1] != len(scan.channels): 507 raise ValueError( 508 f"Number of channels {len(scan.channels)} " 509 f"is not the same as the number of channels in an ROI: " 510 f"{roi['C'][1]}" 511 ) 512 513 # Get the real ROI limits; the metadata is not always correct 514 limits_xml = metadata_xml.findall(".//AllowedScanArea") 515 limits = [ 516 round(float(limits_xml[0].find("Center").text.split(",")[0])), 517 round(float(limits_xml[0].find("Center").text.split(",")[1])), 518 round(float(limits_xml[0].find("Size").text.split(",")[0])), 519 round(float(limits_xml[0].find("Size").text.split(",")[1])), 520 ] 521 # Convert to top-left and bottom-right 522 limits = [ 523 round(limits[0] - limits[2] / 2), 524 round(limits[1] - limits[3] / 2), 525 round(limits[0] + limits[2] / 2), 526 round(limits[1] + limits[3] / 2), 527 ] 528 529 # Extract ROIs and create ROI objects from them 530 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 531 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 532 if len(rois_xml_metadata) != len(rois_shape): 533 raise ValueError( 534 f"Metadata and binary data from {input_path} " 535 f"do not match in number of ROIs" 536 ) 537 # We need both to determine the number of rows/columns because the XML lies 538 roi_indices = [] 539 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 540 name = roi_xml.attrib["Name"] 541 # Determine the index of this scene 542 scene_index = -1 543 for scene in scenes_xml_metadata: 544 if scene.attrib["Name"] == name: 545 scene_index = int(scene.attrib["Index"]) 546 break 547 if scene_index == -1: 548 raise ValueError(f"ROI {name} does not correspond to any scenes") 549 else: 550 roi_indices.append(scene_index) 551 # Extract other metadata 552 roi_limits = [ 553 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 554 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 555 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 556 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 557 ] 558 # Convert to top-left and bottom-right 559 roi_limits = [ 560 round(roi_limits[0] - roi_limits[2] / 2), 561 round(roi_limits[1] - roi_limits[3] / 2), 562 round(roi_limits[0] + roi_limits[2] / 2), 563 round(roi_limits[1] + roi_limits[3] / 2), 564 ] 565 # Bound the ROI to the actual scan limits 566 roi_limits = [ 567 max(roi_limits[0], limits[0]), 568 max(roi_limits[1], limits[1]), 569 min(roi_limits[2], limits[2]), 570 min(roi_limits[3], limits[3]), 571 ] 572 573 tile_rows = int(roi_xml.find("Rows").text) 574 # Current best way of reliably extracting; <Columns> entry can be wrong 575 if (roi_shape["M"][1] % tile_rows) != 0: 576 raise ValueError( 577 f"The number of tiles {roi_shape['M'][1]} is not " 578 f"divisible by the tile rows {tile_rows}; metadata " 579 f"must be messed up. Thanks Zeiss" 580 ) 581 else: 582 tile_cols = int(roi_shape["M"][1] / tile_rows) 583 # Support points are actually the relevant focus points for this ROI 584 focus_points = [] 585 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 586 focus_points.append( 587 [ 588 int(float(focus_point.find("X").text)), 589 int(float(focus_point.find("Y").text)), 590 int(float(focus_point.find("Z").text)), 591 ] 592 ) 593 # Strip all sub-micron precision, it does not matter 594 scan.roi.append( 595 cls.ROI( 596 origin_x_um=roi_limits[0], 597 origin_y_um=roi_limits[1], 598 width_um=roi_limits[2] - roi_limits[0], 599 height_um=roi_limits[3] - roi_limits[1], 600 tile_rows=tile_rows, 601 tile_cols=tile_cols, 602 focus_points=focus_points, 603 ) 604 ) 605 # Sort based on the scene indices 606 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 607 608 return scan
Extracts metadata from a .czi file, which is the output of the Axioscan
Parameters
- input_path: the path to the .czi file
Returns
a Scan object
610 @classmethod 611 def load_txt(cls, input_path: str) -> Self: 612 """ 613 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 614 from the BZScanner. Some metadata is filled in or adjusted to fit 615 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 616 :return: a Scan object 617 """ 618 # Set paths 619 input_path = os.path.abspath(input_path) 620 if os.path.isdir(input_path): 621 input_path = os.path.join( 622 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 623 ) 624 625 # Read in metadata as a dict 626 with open(input_path, "r") as file: 627 metadata_contents = file.read() 628 # Read each line, splitting on the = sign 629 metadata_dict = {} 630 for line in metadata_contents.splitlines(): 631 key, value = line.split("=") 632 metadata_dict[key] = value 633 634 # Populate metadata 635 scan = cls() 636 637 scan.slide_id = metadata_dict["SLIDEID"] 638 scan.slide_id = scan.slide_id.strip().upper() 639 640 scan.path = metadata_dict["SLIDEDIR"] 641 642 # Extract start and finish datetimes 643 date = metadata_dict["DATE"] 644 date_as_datetime = datetime.datetime.strptime( 645 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 646 ) 647 date_as_datetime = date_as_datetime.astimezone( 648 zoneinfo.ZoneInfo("America/Los_Angeles") 649 ) # Hardcoded because BZScanners are here 650 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 651 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 652 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 653 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 654 655 # Map the raw scanner ID (service ID) to our IDs 656 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 657 scan.tray_pos = 0 # only one tray_pos in a BZScanner 658 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 659 660 # Get camera and magnifying info 661 scan.camera = "" 662 magnification = 10 663 aperture = 0 # TODO: find the actual aperture 664 scan.objective = f"{magnification}x-{aperture}" 665 scan.pixel_size_um = 0.591 # Estimated from image metadata 666 667 # Get tile information 668 scan.tile_width_px = 1362 # Known from image metadata 669 scan.tile_height_px = 1004 # Known from image metadata 670 scan.tile_x_offset_px = 0 # Already removed 671 scan.tile_y_offset_px = 0 # Already removed 672 scan.tile_overlap_proportion = 0 # Already removed 673 674 # Extract channels and create Channel objects from them 675 if "gain_applied" in metadata_dict: 676 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 677 else: 678 gain_applied = True # Previous policy was always to apply gains 679 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 680 channel_settings = metadata_dict[channel].split(",") 681 if channel_settings[0] == "0": 682 continue 683 scan.channels.append( 684 cls.Channel( 685 name=cls.BZSCANNER_CHANNEL_MAP[channel], 686 exposure_ms=float(channel_settings[1]), 687 intensity=float(channel_settings[2]), 688 gain_applied=gain_applied, 689 ) 690 ) 691 692 # Get focus points 693 focus_points = [] 694 for i in range(33): 695 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 696 if focus_point[0] == "0": 697 break 698 focus_points.append( 699 [ 700 int(float(focus_point[1])), 701 int(float(focus_point[2])), 702 int(float(focus_point[3])), 703 ] 704 ) 705 706 # In the BZScanner, the slide is vertical instead of horizontal 707 # We put in nominal values for the ROI, which is oriented vertically as well 708 tile_rows = 96 709 tile_cols = 24 710 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 711 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 712 origin_x_um = 2500 + round((20000 - roi_width) / 2) 713 origin_y_um = 2500 + round((58000 - roi_height) / 2) 714 scan.roi.append( 715 cls.ROI( 716 origin_x_um=origin_x_um, 717 origin_y_um=origin_y_um, 718 width_um=roi_width, 719 height_um=roi_height, 720 tile_rows=tile_rows, 721 tile_cols=tile_cols, 722 focus_points=focus_points, 723 ) 724 ) 725 return scan
Loads a Scan object from a .txt file, usually slideinfo.txt, which originates from the BZScanner. Some metadata is filled in or adjusted to fit
Parameters
- input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
Returns
a Scan object
727 @classmethod 728 def load_from_folder(cls, input_path: str) -> Self: 729 """ 730 Load a Scan object from a folder that contains defaultly-named metadata files, 731 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 732 :param input_path: /path/to/folder 733 :return: a Scan object 734 """ 735 input_path = os.path.abspath(input_path) 736 if os.path.isfile( 737 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 738 ): 739 return cls.load_yaml(input_path) 740 elif os.path.isfile( 741 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 742 ): 743 return cls.load_txt(input_path) 744 else: 745 raise ValueError( 746 f"No scan metadata files " 747 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 748 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 749 f"{input_path}" 750 ) 751 pass
Load a Scan object from a folder that contains defaultly-named metadata files, scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
Parameters
- input_path: /path/to/folder
Returns
a Scan object
753 @classmethod 754 def make_placeholder( 755 cls, 756 slide_id: str, 757 n_tile: int = 2303, 758 n_roi: int = 0, 759 scanner_type: Type = Type.BZSCANNER, 760 ) -> Self: 761 """ 762 Make a placeholder Scan object with only basic required information filled in. 763 :param slide_id: the slide ID 764 :param n_tile: the number of this tile, which will become the number of 765 tiles in the scan 766 :param n_roi: the number of ROIs in the scan 767 :param scanner_type: the scanner type 768 :return: a Scan object 769 """ 770 # Sanitize inputs here 771 slide_id = str(slide_id).strip().upper() 772 n_tile = int(n_tile) 773 n_roi = int(n_roi) 774 # Generate the object 775 scan = cls() 776 scan.slide_id = slide_id 777 if scanner_type == cls.Type.AXIOSCAN7: 778 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 779 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 780 scan.roi[0].tile_rows = 17 781 scan.roi[0].tile_cols = (n_tile // 17) + 1 782 scan.channels = [ 783 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 784 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 785 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 786 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 787 ] 788 elif scanner_type == cls.Type.BZSCANNER: 789 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 790 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 791 scan.roi[0].tile_rows = 96 792 scan.roi[0].tile_cols = 24 793 scan.channels = [ 794 cls.Channel(name="DAPI", exposure_ms=1.0, intensity=1.0), 795 cls.Channel(name="AF555", exposure_ms=1.0, intensity=1.0), 796 cls.Channel(name="AF647", exposure_ms=1.0, intensity=1.0), 797 cls.Channel(name="AF488", exposure_ms=1.0, intensity=1.0), 798 ] 799 return scan
Make a placeholder Scan object with only basic required information filled in.
Parameters
- slide_id: the slide ID
- n_tile: the number of this tile, which will become the number of tiles in the scan
- n_roi: the number of ROIs in the scan
- scanner_type: the scanner type
Returns
a Scan object
60 class Channel(yaml.YAMLObject): 61 """ 62 Class that comprises a channel; we usually have multiple (2-5) per scan. 63 Contains three fields: 64 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 65 - exposure_ms: the exposure time to capture a frame in milliseconds 66 - intensity: the light intensity used OR the gain applied to the channel 67 """ 68 69 yaml_tag = "csi_images.csi_scans.Scan.Channel" 70 71 def __init__( 72 self, 73 name: str = "", 74 exposure_ms: float = -1.0, 75 intensity: float = -1.0, 76 gain_applied: bool = True, 77 ): 78 self.name = name 79 self.exposure_ms = exposure_ms 80 self.intensity = intensity 81 self.gain_applied = gain_applied 82 83 def __repr__(self): 84 return yaml.dump(self, sort_keys=False) 85 86 def __eq__(self, other): 87 return self.__repr__() == other.__repr__()
Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:
- name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
- exposure_ms: the exposure time to capture a frame in milliseconds
- intensity: the light intensity used OR the gain applied to the channel
89 class ROI(yaml.YAMLObject): 90 """ 91 Class that comprises an ROI; we usually have 1, but may have more in a scan. 92 """ 93 94 yaml_tag = "csi_images.csi_scans.Scan.ROI" 95 96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points 115 116 def __repr__(self): 117 return yaml.dump(self, sort_keys=False) 118 119 def __eq__(self, other): 120 return self.__repr__() == other.__repr__() 121 122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 )
Class that comprises an ROI; we usually have 1, but may have more in a scan.
96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points
122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 )