csi_images.csi_scans
Contains the Scan class, which holds important metadata from a scan. This metadata can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan object can also be loaded from a .czi file or a .txt file.
1""" 2Contains the Scan class, which holds important metadata from a scan. This metadata 3can be exported to a .yaml file, which can be loaded back into a Scan object. The Scan 4object can also be loaded from a .czi file or a .txt file. 5""" 6 7import os 8import math 9import enum 10import datetime 11import zoneinfo 12from typing import Self, Iterable 13 14import yaml 15import json 16 17try: 18 import aicspylibczi 19except ImportError: 20 aicspylibczi = None 21 22 23class Scan(yaml.YAMLObject): 24 """ 25 Class that composes a whole scan's metadata. Contains some universal data, 26 plus lists for channels and ROIs. 27 28 .. include:: ../docs/coordinate_systems.md 29 """ 30 31 yaml_tag = "csi_utils.scans.Scan" 32 33 class Type(enum.Enum): 34 BZSCANNER = "bzscanner" 35 AXIOSCAN7 = "axioscan7" 36 37 SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"} 38 """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs""" 39 40 METADATA_FILE_NAME = { 41 Type.AXIOSCAN7: "scan.yaml", 42 Type.BZSCANNER: "slideinfo.txt", 43 } 44 STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" 45 DATETIME_FORMAT = { 46 Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT, 47 Type.BZSCANNER: "%a %b %d %H:%M:%S %Y", 48 } 49 50 # Actual channel names, from the BZScanner's default order 51 BZSCANNER_CHANNEL_MAP = { 52 "DAPI": "DAPI", 53 "TRITC": "AF555", 54 "CY5": "AF647", 55 "BF": "BRIGHT", 56 "FITC": "AF488", 57 } 58 59 class Channel(yaml.YAMLObject): 60 """ 61 Class that comprises a channel; we usually have multiple (2-5) per scan. 62 Contains three fields: 63 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 64 - exposure_ms: the exposure time to capture a frame in milliseconds 65 - intensity: the light intensity used OR the gain applied to the channel 66 """ 67 68 yaml_tag = "csi_utils.csi_scans.Scan.Channel" 69 70 def __init__( 71 self, 72 name: str = "", 73 exposure_ms: float = -1.0, 74 intensity: float = -1.0, 75 gain_applied: bool = False, 76 ): 77 self.name = name 78 self.exposure_ms = exposure_ms 79 self.intensity = intensity 80 self.gain_applied = gain_applied 81 82 def __repr__(self): 83 return yaml.dump(self, sort_keys=False) 84 85 def __eq__(self, other): 86 return self.__repr__() == other.__repr__() 87 88 class ROI(yaml.YAMLObject): 89 """ 90 Class that comprises an ROI; we usually have 1, but may have more in a scan. 91 """ 92 93 yaml_tag = "csi_utils.csi_scans.Scan.ROI" 94 95 def __init__( 96 self, 97 origin_x_um: int = -1, 98 origin_y_um: int = -1, 99 width_um: int = -1, 100 height_um: int = -1, 101 tile_rows: int = -1, 102 tile_cols: int = -1, 103 focus_points=None, 104 ): 105 if focus_points is None: 106 focus_points = [] 107 self.origin_x_um = origin_x_um 108 self.origin_y_um = origin_y_um 109 self.width_um = width_um 110 self.height_um = height_um 111 self.tile_rows = tile_rows 112 self.tile_cols = tile_cols 113 self.focus_points = focus_points 114 115 def __repr__(self): 116 return yaml.dump(self, sort_keys=False) 117 118 def __eq__(self, other): 119 return self.__repr__() == other.__repr__() 120 121 def similar(self, other): 122 return ( 123 self.origin_y_um == other.origin_y_um 124 and self.origin_x_um == other.origin_x_um 125 and self.width_um == other.width_um 126 and self.height_um == other.height_um 127 and self.tile_rows == other.tile_rows 128 and self.tile_cols == other.tile_cols 129 ) 130 131 def __init__( 132 self, 133 slide_id: str = "", 134 exists: bool = True, 135 path: str = "", 136 start_datetime: str = "", 137 end_datetime: str = "", 138 scan_time_s: int = -1, 139 scanner_id: str = "", 140 tray_pos: int = -1, 141 slide_pos: int = -1, 142 camera: str = "", 143 objective: str = "", 144 pixel_size_um: float = -1.0, 145 tile_width_px: int = -1, 146 tile_height_px: int = -1, 147 tile_x_offset_px: int = -1, 148 tile_y_offset_px: int = -1, 149 tile_overlap_proportion: int = -1, 150 channels: list[Channel] = None, 151 roi: list[ROI] = None, 152 ): 153 if roi is None: 154 roi = [] 155 if channels is None: 156 channels = [] 157 self.slide_id = slide_id 158 self.exists = exists 159 self.path = path 160 self.start_datetime = start_datetime 161 self.end_datetime = end_datetime 162 self.scan_time_s = scan_time_s 163 self.scanner_id = scanner_id 164 self.tray_pos = tray_pos 165 self.slide_pos = slide_pos 166 self.camera = camera 167 self.objective = objective 168 self.pixel_size_um = pixel_size_um 169 self.tile_width_px = tile_width_px 170 self.tile_height_px = tile_height_px 171 self.tile_x_offset_px = tile_x_offset_px 172 self.tile_y_offset_px = tile_y_offset_px 173 self.tile_overlap_proportion = tile_overlap_proportion 174 self.channels = channels 175 self.roi = roi 176 177 def __key(self): 178 return ( 179 self.slide_id, 180 self.exists, 181 self.path, 182 self.start_datetime, 183 self.end_datetime, 184 self.scan_time_s, 185 self.scanner_id, 186 self.tray_pos, 187 self.slide_pos, 188 self.camera, 189 self.objective, 190 self.pixel_size_um, 191 self.tile_width_px, 192 self.tile_height_px, 193 self.tile_overlap_proportion, 194 tuple(self.channels), 195 tuple(self.roi), 196 ) 197 198 def __hash__(self): 199 return hash(self.__key()) 200 201 def __repr__(self): 202 return yaml.dump(self, sort_keys=False) 203 204 def __eq__(self, other): 205 return self.__repr__() == other.__repr__() 206 207 def has_same_profile(self, other): 208 return ( 209 self.camera == other.camera 210 and self.objective == other.objective 211 and self.pixel_size_um == other.pixel_size_um 212 and self.tile_width_px == other.tile_width_px 213 and self.tile_height_px == other.tile_height_px 214 and self.tile_x_offset_px == other.tile_x_offset_px 215 and self.tile_y_offset_px == other.tile_y_offset_px 216 and self.tile_overlap_proportion == other.tile_overlap_proportion 217 and self.channels == other.channels 218 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 219 ) 220 221 def get_channel_names(self) -> list[str]: 222 """ 223 Get the channel names in the scan's channel order. 224 :return: a list of channel names. 225 """ 226 return [channel.name for channel in self.channels] 227 228 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 229 """ 230 Given a list of channel names, return the corresponding indices in the scan's 231 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 232 actual AlexaFluor names (AF555, AF647, AF488). 233 If a list entry is None, it will return -1 for that entry. 234 :param channel_names: a list of channel names. 235 :return: a list of channel indices. 236 """ 237 # Get the scan's channel name list 238 scan_channel_names = self.get_channel_names() 239 240 channel_indices = [] 241 for name in channel_names: 242 # Convert any BZScanner channel names to the actual channel names 243 if name in self.BZSCANNER_CHANNEL_MAP: 244 name = self.BZSCANNER_CHANNEL_MAP[name] 245 246 # Append the corresponding index if possible 247 if name is None: 248 channel_indices.append(-1) 249 elif name in scan_channel_names: 250 channel_indices.append(scan_channel_names.index(name)) 251 else: 252 raise ValueError( 253 f"Channel name {name} not found in scan channels {scan_channel_names}" 254 ) 255 return channel_indices 256 257 def get_image_size(self) -> tuple[int, int]: 258 """ 259 Get the real size of the image in pixels after subtracting overlap. 260 :return: a tuple of (real_height, real_width) for easy comparison to arrays 261 """ 262 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 263 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 264 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap 265 266 def save_yaml(self, output_path: str): 267 """ 268 Write the Scan object to a .yaml file. 269 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 270 :return: nothing; will raise an error on failure 271 """ 272 # Create necessary folders 273 output_path = os.path.abspath(output_path) 274 if os.path.splitext(output_path)[1] == ".yaml": 275 os.makedirs(os.path.dirname(output_path), exist_ok=True) 276 else: 277 os.makedirs(output_path, exist_ok=True) 278 # Add the standard metadata file name to the path if needed 279 output_path = os.path.join( 280 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 281 ) 282 283 # Populate the file 284 with open(output_path, "w") as file: 285 yaml.dump(self, stream=file, sort_keys=False) 286 287 @classmethod 288 def load_yaml(cls, input_path: str) -> Self: 289 """ 290 Load a Scan object from a .yaml file. 291 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 292 :return: a Scan object 293 """ 294 input_path = os.path.abspath(input_path) 295 if os.path.isdir(input_path): 296 input_path = os.path.join( 297 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 298 ) 299 with open(input_path, "r") as file: 300 metadata_obj = yaml.load(file, Loader=yaml.Loader) 301 return metadata_obj 302 303 def to_dict(self) -> dict: 304 """ 305 Convert the Scan object to a dictionary with keys matching database columns 306 and values matching database entries 307 :return: a dictionary 308 """ 309 # Dump to json; then add indents and a top-level key 310 channels_json = json.dumps( 311 self.channels, default=lambda x: x.__dict__, indent=2 312 ) 313 channels_json = " ".join(channels_json.splitlines(True)) 314 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 315 316 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 317 roi_json = " ".join(roi_json.splitlines(True)) 318 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 319 320 # Keys are named the same as database columns 321 return { 322 "scanner_id": self.scanner_id, 323 "slide_id": self.slide_id, 324 "exists": self.exists, 325 "path": self.path, 326 "start_datetime": self.start_datetime, 327 "end_datetime": self.end_datetime, 328 "tray_pos": self.tray_pos, 329 "slide_pos": self.slide_pos, 330 "tile_width": self.tile_width_px, 331 "tile_height": self.tile_height_px, 332 "tile_x_offset": self.tile_x_offset_px, 333 "tile_y_offset": self.tile_y_offset_px, 334 "tile_overlap": self.tile_overlap_proportion, 335 "camera": self.camera, 336 "objective": self.objective, 337 "pixel_size": self.pixel_size_um, 338 "channels": channels_json, 339 "roi": roi_json, 340 } 341 342 @classmethod 343 def from_dict(cls, scan_dict) -> Self: 344 """ 345 Convert a dictionary from to_dict() or the database to a Scan object 346 :param scan_dict: a dictionary 347 :return: a Scan object 348 """ 349 local_timezone = zoneinfo.ZoneInfo("localtime") 350 if isinstance(scan_dict["start_datetime"], str): 351 start_datetime = datetime.datetime.strptime( 352 scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT 353 ).astimezone(local_timezone) 354 else: 355 start_datetime = scan_dict["start_datetime"].astimezone(local_timezone) 356 if isinstance(scan_dict["end_datetime"], str): 357 end_datetime = datetime.datetime.strptime( 358 scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT 359 ).astimezone(local_timezone) 360 else: 361 end_datetime = scan_dict["end_datetime"].astimezone(local_timezone) 362 dt = (end_datetime - start_datetime).total_seconds() 363 result = cls( 364 scanner_id=scan_dict["scanner_id"], 365 slide_id=scan_dict["slide_id"], 366 exists=scan_dict["exists"], 367 path=scan_dict["path"], 368 start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 369 end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 370 scan_time_s=int(dt), 371 tray_pos=scan_dict["tray_pos"], 372 slide_pos=scan_dict["slide_pos"], 373 tile_width_px=scan_dict["tile_width"], 374 tile_height_px=scan_dict["tile_height"], 375 tile_x_offset_px=scan_dict["tile_x_offset"], 376 tile_y_offset_px=scan_dict["tile_y_offset"], 377 tile_overlap_proportion=scan_dict["tile_overlap"], 378 camera=scan_dict["camera"], 379 objective=scan_dict["objective"], 380 pixel_size_um=scan_dict["pixel_size"], 381 ) 382 for channel_json in json.loads(scan_dict["channels"])["data"]: 383 result.channels.append( 384 cls.Channel( 385 name=channel_json["name"], 386 exposure_ms=channel_json["exposure_ms"], 387 intensity=channel_json["intensity"], 388 gain_applied=channel_json["gain_applied"], 389 ) 390 ) 391 for roi_json in json.loads(scan_dict["roi"])["data"]: 392 result.roi.append( 393 cls.ROI( 394 origin_x_um=roi_json["origin_x_um"], 395 origin_y_um=roi_json["origin_y_um"], 396 width_um=roi_json["width_um"], 397 height_um=roi_json["height_um"], 398 tile_rows=roi_json["tile_rows"], 399 tile_cols=roi_json["tile_cols"], 400 focus_points=roi_json["focus_points"], 401 ) 402 ) 403 return result 404 405 @classmethod 406 def load_czi(cls, input_path: str) -> Self: 407 """ 408 Extracts metadata from a .czi file, which is the output of the Axioscan 409 :param input_path: the path to the .czi file 410 :return: a Scan object 411 """ 412 if aicspylibczi is None: 413 raise ModuleNotFoundError( 414 "aicspylibczi library not installed. " 415 "Install csi-images with [imageio] option to resolve." 416 ) 417 418 # Normalize paths 419 input_path = os.path.abspath(input_path) 420 421 with open(input_path, "rb") as file: 422 # Read in metadata as XML elements 423 metadata_xml = aicspylibczi.CziFile(file).meta 424 # Read in shape metadata from binary 425 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 426 427 # Populate metadata 428 scan = cls() 429 430 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 431 if scan.slide_id is not None: 432 scan.slide_id = scan.slide_id.strip().upper() 433 # Map the raw scanner ID (service ID) to our IDs 434 scan.scanner_id = cls.SCANNER_IDS[ 435 metadata_xml.find(".//Microscope/UserDefinedName").text 436 ] 437 438 # Extract start and finish datetimes 439 date = metadata_xml.find(".//Document/CreationDate").text 440 # Strip out sub-second precision 441 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 442 date_as_datetime = datetime.datetime.strptime( 443 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 444 ) 445 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 446 scan.scan_time_s = round( 447 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 448 ) 449 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 450 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 451 452 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 453 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 454 455 # Get camera and magnifying info 456 scan.camera = ( 457 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 458 )["Name"] 459 magnification = metadata_xml.find( 460 ".//Objectives/Objective/NominalMagnification" 461 ) 462 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 463 scan.objective = f"{magnification.text}x-{aperture.text}" 464 scan.pixel_size_um = ( 465 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 466 ) 467 # Round off the pixel size to nanometers; might not be optimal, but this 468 # gets rounded when we send it to the database anyways (to 7 places) 469 scan.pixel_size_um = round(scan.pixel_size_um, 3) 470 471 # Get tile information 472 # Note: X Y is untested, could be flipped. I always forget. Just don't use 473 # non-square frames and we're all good. 474 selected_detector = metadata_xml.find(".//SelectedDetector").text 475 detectors = metadata_xml.findall(".//Detectors/Detector") 476 for detector in detectors: 477 if detector.attrib["Id"] == selected_detector: 478 tile_info = detector.find(".//Frame") 479 break 480 # Convert to integers 481 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 482 483 scan.tile_x_offset_px = tile_info[0] 484 scan.tile_y_offset_px = tile_info[1] 485 scan.tile_width_px = tile_info[2] 486 scan.tile_height_px = tile_info[3] 487 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 488 489 # Extract channels and create Channel objects from them 490 channel_indices = [] 491 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 492 channel_indices.append(int(channel.attrib["Id"][-1])) 493 intensity_xml = channel.find(".//Intensity") 494 if intensity_xml is None: 495 intensity = 0 496 else: 497 intensity = float(intensity_xml.text[:-2]) * 1e-2 498 scan.channels.append( 499 cls.Channel( 500 name=channel.attrib["Name"].upper(), 501 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 502 intensity=intensity, 503 gain_applied=True, # In Axioscan, we will always use gain = 1 504 ) 505 ) 506 # Make sure the channels are sorted 507 scan.channels = [ 508 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 509 ] 510 # Verify that the shape corresponds to the channels 511 for roi in rois_shape: 512 if roi["C"][1] != len(scan.channels): 513 raise ValueError( 514 f"Number of channels {len(scan.channels)} " 515 f"is not the same as the number of channels in an ROI: " 516 f"{roi['C'][1]}" 517 ) 518 519 # Get the real ROI limits; the metadata is not always correct 520 limits_xml = metadata_xml.findall(".//AllowedScanArea") 521 limits = [ 522 round(float(limits_xml[0].find("Center").text.split(",")[0])), 523 round(float(limits_xml[0].find("Center").text.split(",")[1])), 524 round(float(limits_xml[0].find("Size").text.split(",")[0])), 525 round(float(limits_xml[0].find("Size").text.split(",")[1])), 526 ] 527 # Convert to top-left and bottom-right 528 limits = [ 529 round(limits[0] - limits[2] / 2), 530 round(limits[1] - limits[3] / 2), 531 round(limits[0] + limits[2] / 2), 532 round(limits[1] + limits[3] / 2), 533 ] 534 535 # Extract ROIs and create ROI objects from them 536 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 537 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 538 if len(rois_xml_metadata) != len(rois_shape): 539 raise ValueError( 540 f"Metadata and binary data from {input_path} " 541 f"do not match in number of ROIs" 542 ) 543 # We need both to determine the number of rows/columns because the XML lies 544 roi_indices = [] 545 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 546 name = roi_xml.attrib["Name"] 547 # Determine the index of this scene 548 scene_index = -1 549 for scene in scenes_xml_metadata: 550 if scene.attrib["Name"] == name: 551 scene_index = int(scene.attrib["Index"]) 552 break 553 if scene_index == -1: 554 raise ValueError(f"ROI {name} does not correspond to any scenes") 555 else: 556 roi_indices.append(scene_index) 557 # Extract other metadata 558 roi_limits = [ 559 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 560 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 561 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 562 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 563 ] 564 # Convert to top-left and bottom-right 565 roi_limits = [ 566 round(roi_limits[0] - roi_limits[2] / 2), 567 round(roi_limits[1] - roi_limits[3] / 2), 568 round(roi_limits[0] + roi_limits[2] / 2), 569 round(roi_limits[1] + roi_limits[3] / 2), 570 ] 571 # Bound the ROI to the actual scan limits 572 roi_limits = [ 573 max(roi_limits[0], limits[0]), 574 max(roi_limits[1], limits[1]), 575 min(roi_limits[2], limits[2]), 576 min(roi_limits[3], limits[3]), 577 ] 578 579 tile_rows = int(roi_xml.find("Rows").text) 580 # Current best way of reliably extracting; <Columns> entry can be wrong 581 if (roi_shape["M"][1] % tile_rows) != 0: 582 raise ValueError( 583 f"The number of tiles {roi_shape['M'][1]} is not " 584 f"divisible by the tile rows {tile_rows}; metadata " 585 f"must be messed up. Thanks Zeiss" 586 ) 587 else: 588 tile_cols = int(roi_shape["M"][1] / tile_rows) 589 # Support points are actually the relevant focus points for this ROI 590 focus_points = [] 591 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 592 focus_points.append( 593 [ 594 int(float(focus_point.find("X").text)), 595 int(float(focus_point.find("Y").text)), 596 int(float(focus_point.find("Z").text)), 597 ] 598 ) 599 # Strip all sub-micron precision, it does not matter 600 scan.roi.append( 601 cls.ROI( 602 origin_x_um=roi_limits[0], 603 origin_y_um=roi_limits[1], 604 width_um=roi_limits[2] - roi_limits[0], 605 height_um=roi_limits[3] - roi_limits[1], 606 tile_rows=tile_rows, 607 tile_cols=tile_cols, 608 focus_points=focus_points, 609 ) 610 ) 611 # Sort based on the scene indices 612 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 613 614 return scan 615 616 @classmethod 617 def load_txt(cls, input_path: str) -> Self: 618 """ 619 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 620 from the BZScanner. Some metadata is filled in or adjusted to fit 621 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 622 :return: a Scan object 623 """ 624 # Set paths 625 input_path = os.path.abspath(input_path) 626 if os.path.isdir(input_path): 627 input_path = os.path.join( 628 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 629 ) 630 631 # Read in metadata as a dict 632 with open(input_path, "r") as file: 633 metadata_contents = file.read() 634 # Read each line, splitting on the = sign 635 metadata_dict = {} 636 for line in metadata_contents.splitlines(): 637 key, value = line.split("=") 638 metadata_dict[key] = value 639 640 # Populate metadata 641 scan = cls() 642 643 scan.slide_id = metadata_dict["SLIDEID"] 644 scan.slide_id = scan.slide_id.strip().upper() 645 646 scan.path = metadata_dict["SLIDEDIR"] 647 648 # Extract start and finish datetimes 649 date = metadata_dict["DATE"] 650 date_as_datetime = datetime.datetime.strptime( 651 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 652 ) 653 date_as_datetime = date_as_datetime.astimezone( 654 zoneinfo.ZoneInfo("America/Los_Angeles") 655 ) # Hardcoded because BZScanners are here 656 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 657 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 658 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 659 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 660 661 # Map the raw scanner ID (service ID) to our IDs 662 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 663 scan.tray_pos = 0 # only one tray_pos in a BZScanner 664 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 665 666 # Get camera and magnifying info 667 scan.camera = "" 668 magnification = 10 669 aperture = 0 # TODO: find the actual aperture 670 scan.objective = f"{magnification}x-{aperture}" 671 scan.pixel_size_um = 0.591 # Estimated from image metadata 672 673 # Get tile information 674 scan.tile_width_px = 1362 # Known from image metadata 675 scan.tile_height_px = 1004 # Known from image metadata 676 scan.tile_x_offset_px = 0 # Already removed 677 scan.tile_y_offset_px = 0 # Already removed 678 scan.tile_overlap_proportion = 0 # Already removed 679 680 # Extract channels and create Channel objects from them 681 if "gain_applied" in metadata_dict: 682 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 683 else: 684 gain_applied = True # Previous policy was always to apply gains 685 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 686 channel_settings = metadata_dict[channel].split(",") 687 if channel_settings[0] == "0": 688 continue 689 scan.channels.append( 690 cls.Channel( 691 name=cls.BZSCANNER_CHANNEL_MAP[channel], 692 exposure_ms=float(channel_settings[1]), 693 intensity=float(channel_settings[2]), 694 gain_applied=gain_applied, 695 ) 696 ) 697 698 # Get focus points 699 focus_points = [] 700 for i in range(33): 701 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 702 if focus_point[0] == "0": 703 break 704 focus_points.append( 705 [ 706 int(float(focus_point[1])), 707 int(float(focus_point[2])), 708 int(float(focus_point[3])), 709 ] 710 ) 711 712 # In the BZScanner, the slide is vertical instead of horizontal 713 # We put in nominal values for the ROI, which is oriented vertically as well 714 tile_rows = 96 715 tile_cols = 24 716 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 717 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 718 origin_x_um = 2500 + round((20000 - roi_width) / 2) 719 origin_y_um = 2500 + round((58000 - roi_height) / 2) 720 scan.roi.append( 721 cls.ROI( 722 origin_x_um=origin_x_um, 723 origin_y_um=origin_y_um, 724 width_um=roi_width, 725 height_um=roi_height, 726 tile_rows=tile_rows, 727 tile_cols=tile_cols, 728 focus_points=focus_points, 729 ) 730 ) 731 return scan 732 733 @classmethod 734 def load_from_folder(cls, input_path: str) -> Self: 735 """ 736 Load a Scan object from a folder that contains defaultly-named metadata files, 737 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 738 :param input_path: /path/to/folder 739 :return: a Scan object 740 """ 741 input_path = os.path.abspath(input_path) 742 if os.path.isfile( 743 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 744 ): 745 return cls.load_yaml(input_path) 746 elif os.path.isfile( 747 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 748 ): 749 return cls.load_txt(input_path) 750 else: 751 raise ValueError( 752 f"No scan metadata files " 753 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 754 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 755 f"{input_path}" 756 ) 757 pass 758 759 @classmethod 760 def make_placeholder( 761 cls, 762 slide_id: str, 763 n_tile: int = 2303, 764 n_roi: int = 0, 765 scanner_type: Type = Type.BZSCANNER, 766 ) -> Self: 767 """ 768 Make a placeholder Scan object with only basic required information filled in. 769 :param slide_id: the slide ID 770 :param n_tile: the number of this tile, which will become the number of 771 tiles in the scan 772 :param n_roi: the number of ROIs in the scan 773 :return: a Scan object 774 """ 775 # Sanitize inputs here 776 slide_id = str(slide_id).strip().upper() 777 n_tile = int(n_tile) 778 n_roi = int(n_roi) 779 # Generate the object 780 scan = cls() 781 scan.slide_id = slide_id 782 if scanner_type == cls.Type.AXIOSCAN7: 783 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 784 elif scanner_type == cls.Type.BZSCANNER: 785 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 786 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 787 scan.roi[0].tile_rows = 1 788 scan.roi[0].tile_cols = n_tile + 1 789 return scan
24class Scan(yaml.YAMLObject): 25 """ 26 Class that composes a whole scan's metadata. Contains some universal data, 27 plus lists for channels and ROIs. 28 29 .. include:: ../docs/coordinate_systems.md 30 """ 31 32 yaml_tag = "csi_utils.scans.Scan" 33 34 class Type(enum.Enum): 35 BZSCANNER = "bzscanner" 36 AXIOSCAN7 = "axioscan7" 37 38 SCANNER_IDS = {"4661000426": f"{Type.AXIOSCAN7.value}_0"} 39 """Axioscan 7 scanner IDs (service number), mapped to our scanner IDs""" 40 41 METADATA_FILE_NAME = { 42 Type.AXIOSCAN7: "scan.yaml", 43 Type.BZSCANNER: "slideinfo.txt", 44 } 45 STANDARD_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" 46 DATETIME_FORMAT = { 47 Type.AXIOSCAN7: STANDARD_DATETIME_FORMAT, 48 Type.BZSCANNER: "%a %b %d %H:%M:%S %Y", 49 } 50 51 # Actual channel names, from the BZScanner's default order 52 BZSCANNER_CHANNEL_MAP = { 53 "DAPI": "DAPI", 54 "TRITC": "AF555", 55 "CY5": "AF647", 56 "BF": "BRIGHT", 57 "FITC": "AF488", 58 } 59 60 class Channel(yaml.YAMLObject): 61 """ 62 Class that comprises a channel; we usually have multiple (2-5) per scan. 63 Contains three fields: 64 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 65 - exposure_ms: the exposure time to capture a frame in milliseconds 66 - intensity: the light intensity used OR the gain applied to the channel 67 """ 68 69 yaml_tag = "csi_utils.csi_scans.Scan.Channel" 70 71 def __init__( 72 self, 73 name: str = "", 74 exposure_ms: float = -1.0, 75 intensity: float = -1.0, 76 gain_applied: bool = False, 77 ): 78 self.name = name 79 self.exposure_ms = exposure_ms 80 self.intensity = intensity 81 self.gain_applied = gain_applied 82 83 def __repr__(self): 84 return yaml.dump(self, sort_keys=False) 85 86 def __eq__(self, other): 87 return self.__repr__() == other.__repr__() 88 89 class ROI(yaml.YAMLObject): 90 """ 91 Class that comprises an ROI; we usually have 1, but may have more in a scan. 92 """ 93 94 yaml_tag = "csi_utils.csi_scans.Scan.ROI" 95 96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points 115 116 def __repr__(self): 117 return yaml.dump(self, sort_keys=False) 118 119 def __eq__(self, other): 120 return self.__repr__() == other.__repr__() 121 122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 ) 131 132 def __init__( 133 self, 134 slide_id: str = "", 135 exists: bool = True, 136 path: str = "", 137 start_datetime: str = "", 138 end_datetime: str = "", 139 scan_time_s: int = -1, 140 scanner_id: str = "", 141 tray_pos: int = -1, 142 slide_pos: int = -1, 143 camera: str = "", 144 objective: str = "", 145 pixel_size_um: float = -1.0, 146 tile_width_px: int = -1, 147 tile_height_px: int = -1, 148 tile_x_offset_px: int = -1, 149 tile_y_offset_px: int = -1, 150 tile_overlap_proportion: int = -1, 151 channels: list[Channel] = None, 152 roi: list[ROI] = None, 153 ): 154 if roi is None: 155 roi = [] 156 if channels is None: 157 channels = [] 158 self.slide_id = slide_id 159 self.exists = exists 160 self.path = path 161 self.start_datetime = start_datetime 162 self.end_datetime = end_datetime 163 self.scan_time_s = scan_time_s 164 self.scanner_id = scanner_id 165 self.tray_pos = tray_pos 166 self.slide_pos = slide_pos 167 self.camera = camera 168 self.objective = objective 169 self.pixel_size_um = pixel_size_um 170 self.tile_width_px = tile_width_px 171 self.tile_height_px = tile_height_px 172 self.tile_x_offset_px = tile_x_offset_px 173 self.tile_y_offset_px = tile_y_offset_px 174 self.tile_overlap_proportion = tile_overlap_proportion 175 self.channels = channels 176 self.roi = roi 177 178 def __key(self): 179 return ( 180 self.slide_id, 181 self.exists, 182 self.path, 183 self.start_datetime, 184 self.end_datetime, 185 self.scan_time_s, 186 self.scanner_id, 187 self.tray_pos, 188 self.slide_pos, 189 self.camera, 190 self.objective, 191 self.pixel_size_um, 192 self.tile_width_px, 193 self.tile_height_px, 194 self.tile_overlap_proportion, 195 tuple(self.channels), 196 tuple(self.roi), 197 ) 198 199 def __hash__(self): 200 return hash(self.__key()) 201 202 def __repr__(self): 203 return yaml.dump(self, sort_keys=False) 204 205 def __eq__(self, other): 206 return self.__repr__() == other.__repr__() 207 208 def has_same_profile(self, other): 209 return ( 210 self.camera == other.camera 211 and self.objective == other.objective 212 and self.pixel_size_um == other.pixel_size_um 213 and self.tile_width_px == other.tile_width_px 214 and self.tile_height_px == other.tile_height_px 215 and self.tile_x_offset_px == other.tile_x_offset_px 216 and self.tile_y_offset_px == other.tile_y_offset_px 217 and self.tile_overlap_proportion == other.tile_overlap_proportion 218 and self.channels == other.channels 219 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 220 ) 221 222 def get_channel_names(self) -> list[str]: 223 """ 224 Get the channel names in the scan's channel order. 225 :return: a list of channel names. 226 """ 227 return [channel.name for channel in self.channels] 228 229 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 230 """ 231 Given a list of channel names, return the corresponding indices in the scan's 232 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 233 actual AlexaFluor names (AF555, AF647, AF488). 234 If a list entry is None, it will return -1 for that entry. 235 :param channel_names: a list of channel names. 236 :return: a list of channel indices. 237 """ 238 # Get the scan's channel name list 239 scan_channel_names = self.get_channel_names() 240 241 channel_indices = [] 242 for name in channel_names: 243 # Convert any BZScanner channel names to the actual channel names 244 if name in self.BZSCANNER_CHANNEL_MAP: 245 name = self.BZSCANNER_CHANNEL_MAP[name] 246 247 # Append the corresponding index if possible 248 if name is None: 249 channel_indices.append(-1) 250 elif name in scan_channel_names: 251 channel_indices.append(scan_channel_names.index(name)) 252 else: 253 raise ValueError( 254 f"Channel name {name} not found in scan channels {scan_channel_names}" 255 ) 256 return channel_indices 257 258 def get_image_size(self) -> tuple[int, int]: 259 """ 260 Get the real size of the image in pixels after subtracting overlap. 261 :return: a tuple of (real_height, real_width) for easy comparison to arrays 262 """ 263 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 264 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 265 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap 266 267 def save_yaml(self, output_path: str): 268 """ 269 Write the Scan object to a .yaml file. 270 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 271 :return: nothing; will raise an error on failure 272 """ 273 # Create necessary folders 274 output_path = os.path.abspath(output_path) 275 if os.path.splitext(output_path)[1] == ".yaml": 276 os.makedirs(os.path.dirname(output_path), exist_ok=True) 277 else: 278 os.makedirs(output_path, exist_ok=True) 279 # Add the standard metadata file name to the path if needed 280 output_path = os.path.join( 281 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 282 ) 283 284 # Populate the file 285 with open(output_path, "w") as file: 286 yaml.dump(self, stream=file, sort_keys=False) 287 288 @classmethod 289 def load_yaml(cls, input_path: str) -> Self: 290 """ 291 Load a Scan object from a .yaml file. 292 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 293 :return: a Scan object 294 """ 295 input_path = os.path.abspath(input_path) 296 if os.path.isdir(input_path): 297 input_path = os.path.join( 298 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 299 ) 300 with open(input_path, "r") as file: 301 metadata_obj = yaml.load(file, Loader=yaml.Loader) 302 return metadata_obj 303 304 def to_dict(self) -> dict: 305 """ 306 Convert the Scan object to a dictionary with keys matching database columns 307 and values matching database entries 308 :return: a dictionary 309 """ 310 # Dump to json; then add indents and a top-level key 311 channels_json = json.dumps( 312 self.channels, default=lambda x: x.__dict__, indent=2 313 ) 314 channels_json = " ".join(channels_json.splitlines(True)) 315 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 316 317 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 318 roi_json = " ".join(roi_json.splitlines(True)) 319 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 320 321 # Keys are named the same as database columns 322 return { 323 "scanner_id": self.scanner_id, 324 "slide_id": self.slide_id, 325 "exists": self.exists, 326 "path": self.path, 327 "start_datetime": self.start_datetime, 328 "end_datetime": self.end_datetime, 329 "tray_pos": self.tray_pos, 330 "slide_pos": self.slide_pos, 331 "tile_width": self.tile_width_px, 332 "tile_height": self.tile_height_px, 333 "tile_x_offset": self.tile_x_offset_px, 334 "tile_y_offset": self.tile_y_offset_px, 335 "tile_overlap": self.tile_overlap_proportion, 336 "camera": self.camera, 337 "objective": self.objective, 338 "pixel_size": self.pixel_size_um, 339 "channels": channels_json, 340 "roi": roi_json, 341 } 342 343 @classmethod 344 def from_dict(cls, scan_dict) -> Self: 345 """ 346 Convert a dictionary from to_dict() or the database to a Scan object 347 :param scan_dict: a dictionary 348 :return: a Scan object 349 """ 350 local_timezone = zoneinfo.ZoneInfo("localtime") 351 if isinstance(scan_dict["start_datetime"], str): 352 start_datetime = datetime.datetime.strptime( 353 scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT 354 ).astimezone(local_timezone) 355 else: 356 start_datetime = scan_dict["start_datetime"].astimezone(local_timezone) 357 if isinstance(scan_dict["end_datetime"], str): 358 end_datetime = datetime.datetime.strptime( 359 scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT 360 ).astimezone(local_timezone) 361 else: 362 end_datetime = scan_dict["end_datetime"].astimezone(local_timezone) 363 dt = (end_datetime - start_datetime).total_seconds() 364 result = cls( 365 scanner_id=scan_dict["scanner_id"], 366 slide_id=scan_dict["slide_id"], 367 exists=scan_dict["exists"], 368 path=scan_dict["path"], 369 start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 370 end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 371 scan_time_s=int(dt), 372 tray_pos=scan_dict["tray_pos"], 373 slide_pos=scan_dict["slide_pos"], 374 tile_width_px=scan_dict["tile_width"], 375 tile_height_px=scan_dict["tile_height"], 376 tile_x_offset_px=scan_dict["tile_x_offset"], 377 tile_y_offset_px=scan_dict["tile_y_offset"], 378 tile_overlap_proportion=scan_dict["tile_overlap"], 379 camera=scan_dict["camera"], 380 objective=scan_dict["objective"], 381 pixel_size_um=scan_dict["pixel_size"], 382 ) 383 for channel_json in json.loads(scan_dict["channels"])["data"]: 384 result.channels.append( 385 cls.Channel( 386 name=channel_json["name"], 387 exposure_ms=channel_json["exposure_ms"], 388 intensity=channel_json["intensity"], 389 gain_applied=channel_json["gain_applied"], 390 ) 391 ) 392 for roi_json in json.loads(scan_dict["roi"])["data"]: 393 result.roi.append( 394 cls.ROI( 395 origin_x_um=roi_json["origin_x_um"], 396 origin_y_um=roi_json["origin_y_um"], 397 width_um=roi_json["width_um"], 398 height_um=roi_json["height_um"], 399 tile_rows=roi_json["tile_rows"], 400 tile_cols=roi_json["tile_cols"], 401 focus_points=roi_json["focus_points"], 402 ) 403 ) 404 return result 405 406 @classmethod 407 def load_czi(cls, input_path: str) -> Self: 408 """ 409 Extracts metadata from a .czi file, which is the output of the Axioscan 410 :param input_path: the path to the .czi file 411 :return: a Scan object 412 """ 413 if aicspylibczi is None: 414 raise ModuleNotFoundError( 415 "aicspylibczi library not installed. " 416 "Install csi-images with [imageio] option to resolve." 417 ) 418 419 # Normalize paths 420 input_path = os.path.abspath(input_path) 421 422 with open(input_path, "rb") as file: 423 # Read in metadata as XML elements 424 metadata_xml = aicspylibczi.CziFile(file).meta 425 # Read in shape metadata from binary 426 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 427 428 # Populate metadata 429 scan = cls() 430 431 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 432 if scan.slide_id is not None: 433 scan.slide_id = scan.slide_id.strip().upper() 434 # Map the raw scanner ID (service ID) to our IDs 435 scan.scanner_id = cls.SCANNER_IDS[ 436 metadata_xml.find(".//Microscope/UserDefinedName").text 437 ] 438 439 # Extract start and finish datetimes 440 date = metadata_xml.find(".//Document/CreationDate").text 441 # Strip out sub-second precision 442 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 443 date_as_datetime = datetime.datetime.strptime( 444 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 445 ) 446 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 447 scan.scan_time_s = round( 448 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 449 ) 450 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 451 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 452 453 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 454 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 455 456 # Get camera and magnifying info 457 scan.camera = ( 458 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 459 )["Name"] 460 magnification = metadata_xml.find( 461 ".//Objectives/Objective/NominalMagnification" 462 ) 463 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 464 scan.objective = f"{magnification.text}x-{aperture.text}" 465 scan.pixel_size_um = ( 466 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 467 ) 468 # Round off the pixel size to nanometers; might not be optimal, but this 469 # gets rounded when we send it to the database anyways (to 7 places) 470 scan.pixel_size_um = round(scan.pixel_size_um, 3) 471 472 # Get tile information 473 # Note: X Y is untested, could be flipped. I always forget. Just don't use 474 # non-square frames and we're all good. 475 selected_detector = metadata_xml.find(".//SelectedDetector").text 476 detectors = metadata_xml.findall(".//Detectors/Detector") 477 for detector in detectors: 478 if detector.attrib["Id"] == selected_detector: 479 tile_info = detector.find(".//Frame") 480 break 481 # Convert to integers 482 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 483 484 scan.tile_x_offset_px = tile_info[0] 485 scan.tile_y_offset_px = tile_info[1] 486 scan.tile_width_px = tile_info[2] 487 scan.tile_height_px = tile_info[3] 488 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 489 490 # Extract channels and create Channel objects from them 491 channel_indices = [] 492 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 493 channel_indices.append(int(channel.attrib["Id"][-1])) 494 intensity_xml = channel.find(".//Intensity") 495 if intensity_xml is None: 496 intensity = 0 497 else: 498 intensity = float(intensity_xml.text[:-2]) * 1e-2 499 scan.channels.append( 500 cls.Channel( 501 name=channel.attrib["Name"].upper(), 502 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 503 intensity=intensity, 504 gain_applied=True, # In Axioscan, we will always use gain = 1 505 ) 506 ) 507 # Make sure the channels are sorted 508 scan.channels = [ 509 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 510 ] 511 # Verify that the shape corresponds to the channels 512 for roi in rois_shape: 513 if roi["C"][1] != len(scan.channels): 514 raise ValueError( 515 f"Number of channels {len(scan.channels)} " 516 f"is not the same as the number of channels in an ROI: " 517 f"{roi['C'][1]}" 518 ) 519 520 # Get the real ROI limits; the metadata is not always correct 521 limits_xml = metadata_xml.findall(".//AllowedScanArea") 522 limits = [ 523 round(float(limits_xml[0].find("Center").text.split(",")[0])), 524 round(float(limits_xml[0].find("Center").text.split(",")[1])), 525 round(float(limits_xml[0].find("Size").text.split(",")[0])), 526 round(float(limits_xml[0].find("Size").text.split(",")[1])), 527 ] 528 # Convert to top-left and bottom-right 529 limits = [ 530 round(limits[0] - limits[2] / 2), 531 round(limits[1] - limits[3] / 2), 532 round(limits[0] + limits[2] / 2), 533 round(limits[1] + limits[3] / 2), 534 ] 535 536 # Extract ROIs and create ROI objects from them 537 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 538 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 539 if len(rois_xml_metadata) != len(rois_shape): 540 raise ValueError( 541 f"Metadata and binary data from {input_path} " 542 f"do not match in number of ROIs" 543 ) 544 # We need both to determine the number of rows/columns because the XML lies 545 roi_indices = [] 546 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 547 name = roi_xml.attrib["Name"] 548 # Determine the index of this scene 549 scene_index = -1 550 for scene in scenes_xml_metadata: 551 if scene.attrib["Name"] == name: 552 scene_index = int(scene.attrib["Index"]) 553 break 554 if scene_index == -1: 555 raise ValueError(f"ROI {name} does not correspond to any scenes") 556 else: 557 roi_indices.append(scene_index) 558 # Extract other metadata 559 roi_limits = [ 560 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 561 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 562 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 563 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 564 ] 565 # Convert to top-left and bottom-right 566 roi_limits = [ 567 round(roi_limits[0] - roi_limits[2] / 2), 568 round(roi_limits[1] - roi_limits[3] / 2), 569 round(roi_limits[0] + roi_limits[2] / 2), 570 round(roi_limits[1] + roi_limits[3] / 2), 571 ] 572 # Bound the ROI to the actual scan limits 573 roi_limits = [ 574 max(roi_limits[0], limits[0]), 575 max(roi_limits[1], limits[1]), 576 min(roi_limits[2], limits[2]), 577 min(roi_limits[3], limits[3]), 578 ] 579 580 tile_rows = int(roi_xml.find("Rows").text) 581 # Current best way of reliably extracting; <Columns> entry can be wrong 582 if (roi_shape["M"][1] % tile_rows) != 0: 583 raise ValueError( 584 f"The number of tiles {roi_shape['M'][1]} is not " 585 f"divisible by the tile rows {tile_rows}; metadata " 586 f"must be messed up. Thanks Zeiss" 587 ) 588 else: 589 tile_cols = int(roi_shape["M"][1] / tile_rows) 590 # Support points are actually the relevant focus points for this ROI 591 focus_points = [] 592 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 593 focus_points.append( 594 [ 595 int(float(focus_point.find("X").text)), 596 int(float(focus_point.find("Y").text)), 597 int(float(focus_point.find("Z").text)), 598 ] 599 ) 600 # Strip all sub-micron precision, it does not matter 601 scan.roi.append( 602 cls.ROI( 603 origin_x_um=roi_limits[0], 604 origin_y_um=roi_limits[1], 605 width_um=roi_limits[2] - roi_limits[0], 606 height_um=roi_limits[3] - roi_limits[1], 607 tile_rows=tile_rows, 608 tile_cols=tile_cols, 609 focus_points=focus_points, 610 ) 611 ) 612 # Sort based on the scene indices 613 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 614 615 return scan 616 617 @classmethod 618 def load_txt(cls, input_path: str) -> Self: 619 """ 620 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 621 from the BZScanner. Some metadata is filled in or adjusted to fit 622 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 623 :return: a Scan object 624 """ 625 # Set paths 626 input_path = os.path.abspath(input_path) 627 if os.path.isdir(input_path): 628 input_path = os.path.join( 629 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 630 ) 631 632 # Read in metadata as a dict 633 with open(input_path, "r") as file: 634 metadata_contents = file.read() 635 # Read each line, splitting on the = sign 636 metadata_dict = {} 637 for line in metadata_contents.splitlines(): 638 key, value = line.split("=") 639 metadata_dict[key] = value 640 641 # Populate metadata 642 scan = cls() 643 644 scan.slide_id = metadata_dict["SLIDEID"] 645 scan.slide_id = scan.slide_id.strip().upper() 646 647 scan.path = metadata_dict["SLIDEDIR"] 648 649 # Extract start and finish datetimes 650 date = metadata_dict["DATE"] 651 date_as_datetime = datetime.datetime.strptime( 652 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 653 ) 654 date_as_datetime = date_as_datetime.astimezone( 655 zoneinfo.ZoneInfo("America/Los_Angeles") 656 ) # Hardcoded because BZScanners are here 657 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 658 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 659 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 660 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 661 662 # Map the raw scanner ID (service ID) to our IDs 663 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 664 scan.tray_pos = 0 # only one tray_pos in a BZScanner 665 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 666 667 # Get camera and magnifying info 668 scan.camera = "" 669 magnification = 10 670 aperture = 0 # TODO: find the actual aperture 671 scan.objective = f"{magnification}x-{aperture}" 672 scan.pixel_size_um = 0.591 # Estimated from image metadata 673 674 # Get tile information 675 scan.tile_width_px = 1362 # Known from image metadata 676 scan.tile_height_px = 1004 # Known from image metadata 677 scan.tile_x_offset_px = 0 # Already removed 678 scan.tile_y_offset_px = 0 # Already removed 679 scan.tile_overlap_proportion = 0 # Already removed 680 681 # Extract channels and create Channel objects from them 682 if "gain_applied" in metadata_dict: 683 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 684 else: 685 gain_applied = True # Previous policy was always to apply gains 686 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 687 channel_settings = metadata_dict[channel].split(",") 688 if channel_settings[0] == "0": 689 continue 690 scan.channels.append( 691 cls.Channel( 692 name=cls.BZSCANNER_CHANNEL_MAP[channel], 693 exposure_ms=float(channel_settings[1]), 694 intensity=float(channel_settings[2]), 695 gain_applied=gain_applied, 696 ) 697 ) 698 699 # Get focus points 700 focus_points = [] 701 for i in range(33): 702 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 703 if focus_point[0] == "0": 704 break 705 focus_points.append( 706 [ 707 int(float(focus_point[1])), 708 int(float(focus_point[2])), 709 int(float(focus_point[3])), 710 ] 711 ) 712 713 # In the BZScanner, the slide is vertical instead of horizontal 714 # We put in nominal values for the ROI, which is oriented vertically as well 715 tile_rows = 96 716 tile_cols = 24 717 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 718 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 719 origin_x_um = 2500 + round((20000 - roi_width) / 2) 720 origin_y_um = 2500 + round((58000 - roi_height) / 2) 721 scan.roi.append( 722 cls.ROI( 723 origin_x_um=origin_x_um, 724 origin_y_um=origin_y_um, 725 width_um=roi_width, 726 height_um=roi_height, 727 tile_rows=tile_rows, 728 tile_cols=tile_cols, 729 focus_points=focus_points, 730 ) 731 ) 732 return scan 733 734 @classmethod 735 def load_from_folder(cls, input_path: str) -> Self: 736 """ 737 Load a Scan object from a folder that contains defaultly-named metadata files, 738 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 739 :param input_path: /path/to/folder 740 :return: a Scan object 741 """ 742 input_path = os.path.abspath(input_path) 743 if os.path.isfile( 744 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 745 ): 746 return cls.load_yaml(input_path) 747 elif os.path.isfile( 748 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 749 ): 750 return cls.load_txt(input_path) 751 else: 752 raise ValueError( 753 f"No scan metadata files " 754 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 755 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 756 f"{input_path}" 757 ) 758 pass 759 760 @classmethod 761 def make_placeholder( 762 cls, 763 slide_id: str, 764 n_tile: int = 2303, 765 n_roi: int = 0, 766 scanner_type: Type = Type.BZSCANNER, 767 ) -> Self: 768 """ 769 Make a placeholder Scan object with only basic required information filled in. 770 :param slide_id: the slide ID 771 :param n_tile: the number of this tile, which will become the number of 772 tiles in the scan 773 :param n_roi: the number of ROIs in the scan 774 :return: a Scan object 775 """ 776 # Sanitize inputs here 777 slide_id = str(slide_id).strip().upper() 778 n_tile = int(n_tile) 779 n_roi = int(n_roi) 780 # Generate the object 781 scan = cls() 782 scan.slide_id = slide_id 783 if scanner_type == cls.Type.AXIOSCAN7: 784 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 785 elif scanner_type == cls.Type.BZSCANNER: 786 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 787 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 788 scan.roi[0].tile_rows = 1 789 scan.roi[0].tile_cols = n_tile + 1 790 return scan
Class that composes a whole scan's metadata. Contains some universal data, plus lists for channels and ROIs.
Scans -> scan-level coordinate frames. Each scan maintains its own scan-level coordinate frames based on the scanner it was scanned with.
Slide-level coordinate frame (common, agreed-upon frame of reference for a slide).
132 def __init__( 133 self, 134 slide_id: str = "", 135 exists: bool = True, 136 path: str = "", 137 start_datetime: str = "", 138 end_datetime: str = "", 139 scan_time_s: int = -1, 140 scanner_id: str = "", 141 tray_pos: int = -1, 142 slide_pos: int = -1, 143 camera: str = "", 144 objective: str = "", 145 pixel_size_um: float = -1.0, 146 tile_width_px: int = -1, 147 tile_height_px: int = -1, 148 tile_x_offset_px: int = -1, 149 tile_y_offset_px: int = -1, 150 tile_overlap_proportion: int = -1, 151 channels: list[Channel] = None, 152 roi: list[ROI] = None, 153 ): 154 if roi is None: 155 roi = [] 156 if channels is None: 157 channels = [] 158 self.slide_id = slide_id 159 self.exists = exists 160 self.path = path 161 self.start_datetime = start_datetime 162 self.end_datetime = end_datetime 163 self.scan_time_s = scan_time_s 164 self.scanner_id = scanner_id 165 self.tray_pos = tray_pos 166 self.slide_pos = slide_pos 167 self.camera = camera 168 self.objective = objective 169 self.pixel_size_um = pixel_size_um 170 self.tile_width_px = tile_width_px 171 self.tile_height_px = tile_height_px 172 self.tile_x_offset_px = tile_x_offset_px 173 self.tile_y_offset_px = tile_y_offset_px 174 self.tile_overlap_proportion = tile_overlap_proportion 175 self.channels = channels 176 self.roi = roi
Axioscan 7 scanner IDs (service number), mapped to our scanner IDs
208 def has_same_profile(self, other): 209 return ( 210 self.camera == other.camera 211 and self.objective == other.objective 212 and self.pixel_size_um == other.pixel_size_um 213 and self.tile_width_px == other.tile_width_px 214 and self.tile_height_px == other.tile_height_px 215 and self.tile_x_offset_px == other.tile_x_offset_px 216 and self.tile_y_offset_px == other.tile_y_offset_px 217 and self.tile_overlap_proportion == other.tile_overlap_proportion 218 and self.channels == other.channels 219 and all(a.similar(b) for a, b in zip(self.roi, other.roi)) 220 )
222 def get_channel_names(self) -> list[str]: 223 """ 224 Get the channel names in the scan's channel order. 225 :return: a list of channel names. 226 """ 227 return [channel.name for channel in self.channels]
Get the channel names in the scan's channel order.
Returns
a list of channel names.
229 def get_channel_indices(self, channel_names: Iterable[str | None]) -> list[int]: 230 """ 231 Given a list of channel names, return the corresponding indices in the scan's 232 channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the 233 actual AlexaFluor names (AF555, AF647, AF488). 234 If a list entry is None, it will return -1 for that entry. 235 :param channel_names: a list of channel names. 236 :return: a list of channel indices. 237 """ 238 # Get the scan's channel name list 239 scan_channel_names = self.get_channel_names() 240 241 channel_indices = [] 242 for name in channel_names: 243 # Convert any BZScanner channel names to the actual channel names 244 if name in self.BZSCANNER_CHANNEL_MAP: 245 name = self.BZSCANNER_CHANNEL_MAP[name] 246 247 # Append the corresponding index if possible 248 if name is None: 249 channel_indices.append(-1) 250 elif name in scan_channel_names: 251 channel_indices.append(scan_channel_names.index(name)) 252 else: 253 raise ValueError( 254 f"Channel name {name} not found in scan channels {scan_channel_names}" 255 ) 256 return channel_indices
Given a list of channel names, return the corresponding indices in the scan's channel order. Will convert BZScanner channel names (TRITC, CY5, FITC) to the actual AlexaFluor names (AF555, AF647, AF488). If a list entry is None, it will return -1 for that entry.
Parameters
- channel_names: a list of channel names.
Returns
a list of channel indices.
258 def get_image_size(self) -> tuple[int, int]: 259 """ 260 Get the real size of the image in pixels after subtracting overlap. 261 :return: a tuple of (real_height, real_width) for easy comparison to arrays 262 """ 263 width_overlap = math.floor(self.tile_width_px * self.tile_overlap_proportion) 264 height_overlap = math.floor(self.tile_height_px * self.tile_overlap_proportion) 265 return self.tile_height_px - height_overlap, self.tile_width_px - width_overlap
Get the real size of the image in pixels after subtracting overlap.
Returns
a tuple of (real_height, real_width) for easy comparison to arrays
267 def save_yaml(self, output_path: str): 268 """ 269 Write the Scan object to a .yaml file. 270 :param output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml 271 :return: nothing; will raise an error on failure 272 """ 273 # Create necessary folders 274 output_path = os.path.abspath(output_path) 275 if os.path.splitext(output_path)[1] == ".yaml": 276 os.makedirs(os.path.dirname(output_path), exist_ok=True) 277 else: 278 os.makedirs(output_path, exist_ok=True) 279 # Add the standard metadata file name to the path if needed 280 output_path = os.path.join( 281 output_path, self.METADATA_FILE_NAME[self.Type.AXIOSCAN7] 282 ) 283 284 # Populate the file 285 with open(output_path, "w") as file: 286 yaml.dump(self, stream=file, sort_keys=False)
Write the Scan object to a .yaml file.
Parameters
- output_path: /path/to/file.yaml or /path/to/folder to put scan.yaml
Returns
nothing; will raise an error on failure
288 @classmethod 289 def load_yaml(cls, input_path: str) -> Self: 290 """ 291 Load a Scan object from a .yaml file. 292 :param input_path: /path/to/file.yaml or /path/to/folder with scan.yaml 293 :return: a Scan object 294 """ 295 input_path = os.path.abspath(input_path) 296 if os.path.isdir(input_path): 297 input_path = os.path.join( 298 input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7] 299 ) 300 with open(input_path, "r") as file: 301 metadata_obj = yaml.load(file, Loader=yaml.Loader) 302 return metadata_obj
Load a Scan object from a .yaml file.
Parameters
- input_path: /path/to/file.yaml or /path/to/folder with scan.yaml
Returns
a Scan object
304 def to_dict(self) -> dict: 305 """ 306 Convert the Scan object to a dictionary with keys matching database columns 307 and values matching database entries 308 :return: a dictionary 309 """ 310 # Dump to json; then add indents and a top-level key 311 channels_json = json.dumps( 312 self.channels, default=lambda x: x.__dict__, indent=2 313 ) 314 channels_json = " ".join(channels_json.splitlines(True)) 315 channels_json = "{\n " + '"data": ' + channels_json + "\n}" 316 317 roi_json = json.dumps(self.roi, default=lambda x: x.__dict__, indent=2) 318 roi_json = " ".join(roi_json.splitlines(True)) 319 roi_json = "{\n " + '"data": ' + roi_json + "\n}" 320 321 # Keys are named the same as database columns 322 return { 323 "scanner_id": self.scanner_id, 324 "slide_id": self.slide_id, 325 "exists": self.exists, 326 "path": self.path, 327 "start_datetime": self.start_datetime, 328 "end_datetime": self.end_datetime, 329 "tray_pos": self.tray_pos, 330 "slide_pos": self.slide_pos, 331 "tile_width": self.tile_width_px, 332 "tile_height": self.tile_height_px, 333 "tile_x_offset": self.tile_x_offset_px, 334 "tile_y_offset": self.tile_y_offset_px, 335 "tile_overlap": self.tile_overlap_proportion, 336 "camera": self.camera, 337 "objective": self.objective, 338 "pixel_size": self.pixel_size_um, 339 "channels": channels_json, 340 "roi": roi_json, 341 }
Convert the Scan object to a dictionary with keys matching database columns and values matching database entries
Returns
a dictionary
343 @classmethod 344 def from_dict(cls, scan_dict) -> Self: 345 """ 346 Convert a dictionary from to_dict() or the database to a Scan object 347 :param scan_dict: a dictionary 348 :return: a Scan object 349 """ 350 local_timezone = zoneinfo.ZoneInfo("localtime") 351 if isinstance(scan_dict["start_datetime"], str): 352 start_datetime = datetime.datetime.strptime( 353 scan_dict["start_datetime"], cls.STANDARD_DATETIME_FORMAT 354 ).astimezone(local_timezone) 355 else: 356 start_datetime = scan_dict["start_datetime"].astimezone(local_timezone) 357 if isinstance(scan_dict["end_datetime"], str): 358 end_datetime = datetime.datetime.strptime( 359 scan_dict["end_datetime"], cls.STANDARD_DATETIME_FORMAT 360 ).astimezone(local_timezone) 361 else: 362 end_datetime = scan_dict["end_datetime"].astimezone(local_timezone) 363 dt = (end_datetime - start_datetime).total_seconds() 364 result = cls( 365 scanner_id=scan_dict["scanner_id"], 366 slide_id=scan_dict["slide_id"], 367 exists=scan_dict["exists"], 368 path=scan_dict["path"], 369 start_datetime=start_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 370 end_datetime=end_datetime.strftime(cls.STANDARD_DATETIME_FORMAT), 371 scan_time_s=int(dt), 372 tray_pos=scan_dict["tray_pos"], 373 slide_pos=scan_dict["slide_pos"], 374 tile_width_px=scan_dict["tile_width"], 375 tile_height_px=scan_dict["tile_height"], 376 tile_x_offset_px=scan_dict["tile_x_offset"], 377 tile_y_offset_px=scan_dict["tile_y_offset"], 378 tile_overlap_proportion=scan_dict["tile_overlap"], 379 camera=scan_dict["camera"], 380 objective=scan_dict["objective"], 381 pixel_size_um=scan_dict["pixel_size"], 382 ) 383 for channel_json in json.loads(scan_dict["channels"])["data"]: 384 result.channels.append( 385 cls.Channel( 386 name=channel_json["name"], 387 exposure_ms=channel_json["exposure_ms"], 388 intensity=channel_json["intensity"], 389 gain_applied=channel_json["gain_applied"], 390 ) 391 ) 392 for roi_json in json.loads(scan_dict["roi"])["data"]: 393 result.roi.append( 394 cls.ROI( 395 origin_x_um=roi_json["origin_x_um"], 396 origin_y_um=roi_json["origin_y_um"], 397 width_um=roi_json["width_um"], 398 height_um=roi_json["height_um"], 399 tile_rows=roi_json["tile_rows"], 400 tile_cols=roi_json["tile_cols"], 401 focus_points=roi_json["focus_points"], 402 ) 403 ) 404 return result
Convert a dictionary from to_dict() or the database to a Scan object
Parameters
- scan_dict: a dictionary
Returns
a Scan object
406 @classmethod 407 def load_czi(cls, input_path: str) -> Self: 408 """ 409 Extracts metadata from a .czi file, which is the output of the Axioscan 410 :param input_path: the path to the .czi file 411 :return: a Scan object 412 """ 413 if aicspylibczi is None: 414 raise ModuleNotFoundError( 415 "aicspylibczi library not installed. " 416 "Install csi-images with [imageio] option to resolve." 417 ) 418 419 # Normalize paths 420 input_path = os.path.abspath(input_path) 421 422 with open(input_path, "rb") as file: 423 # Read in metadata as XML elements 424 metadata_xml = aicspylibczi.CziFile(file).meta 425 # Read in shape metadata from binary 426 rois_shape = aicspylibczi.CziFile(file).get_dims_shape() 427 428 # Populate metadata 429 scan = cls() 430 431 scan.slide_id = metadata_xml.find(".//Label/Barcodes/Barcode/Content").text 432 if scan.slide_id is not None: 433 scan.slide_id = scan.slide_id.strip().upper() 434 # Map the raw scanner ID (service ID) to our IDs 435 scan.scanner_id = cls.SCANNER_IDS[ 436 metadata_xml.find(".//Microscope/UserDefinedName").text 437 ] 438 439 # Extract start and finish datetimes 440 date = metadata_xml.find(".//Document/CreationDate").text 441 # Strip out sub-second precision 442 date = date[: date.find(".")] + date[max(date.rfind("-"), date.rfind("+")) :] 443 date_as_datetime = datetime.datetime.strptime( 444 date, cls.DATETIME_FORMAT[cls.Type.AXIOSCAN7] 445 ) 446 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 447 scan.scan_time_s = round( 448 float(metadata_xml.find(".//Image/AcquisitionDuration").text) / 1000 449 ) 450 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 451 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 452 453 scan.tray_pos = int(metadata_xml.find(".//SlotNumberOfLoadedTray").text) 454 scan.slide_pos = int(metadata_xml.find(".//SlideScannerPosition").text[-1]) 455 456 # Get camera and magnifying info 457 scan.camera = ( 458 metadata_xml.find(".//Information/Instrument/Detectors/Detector").attrib 459 )["Name"] 460 magnification = metadata_xml.find( 461 ".//Objectives/Objective/NominalMagnification" 462 ) 463 aperture = metadata_xml.find(".//Objectives/Objective/LensNA") 464 scan.objective = f"{magnification.text}x-{aperture.text}" 465 scan.pixel_size_um = ( 466 float(metadata_xml.find(".//Scaling/Items/Distance/Value").text) * 1e6 467 ) 468 # Round off the pixel size to nanometers; might not be optimal, but this 469 # gets rounded when we send it to the database anyways (to 7 places) 470 scan.pixel_size_um = round(scan.pixel_size_um, 3) 471 472 # Get tile information 473 # Note: X Y is untested, could be flipped. I always forget. Just don't use 474 # non-square frames and we're all good. 475 selected_detector = metadata_xml.find(".//SelectedDetector").text 476 detectors = metadata_xml.findall(".//Detectors/Detector") 477 for detector in detectors: 478 if detector.attrib["Id"] == selected_detector: 479 tile_info = detector.find(".//Frame") 480 break 481 # Convert to integers 482 tile_info = [int(coordinate) for coordinate in tile_info.text.split(",")] 483 484 scan.tile_x_offset_px = tile_info[0] 485 scan.tile_y_offset_px = tile_info[1] 486 scan.tile_width_px = tile_info[2] 487 scan.tile_height_px = tile_info[3] 488 scan.tile_overlap_proportion = float(metadata_xml.find(".//Overlap").text) 489 490 # Extract channels and create Channel objects from them 491 channel_indices = [] 492 for channel in metadata_xml.findall(".//Image/Dimensions/Channels/Channel"): 493 channel_indices.append(int(channel.attrib["Id"][-1])) 494 intensity_xml = channel.find(".//Intensity") 495 if intensity_xml is None: 496 intensity = 0 497 else: 498 intensity = float(intensity_xml.text[:-2]) * 1e-2 499 scan.channels.append( 500 cls.Channel( 501 name=channel.attrib["Name"].upper(), 502 exposure_ms=float(channel.find("./ExposureTime").text) * 1e-6, 503 intensity=intensity, 504 gain_applied=True, # In Axioscan, we will always use gain = 1 505 ) 506 ) 507 # Make sure the channels are sorted 508 scan.channels = [ 509 channel for _, channel in sorted(zip(channel_indices, scan.channels)) 510 ] 511 # Verify that the shape corresponds to the channels 512 for roi in rois_shape: 513 if roi["C"][1] != len(scan.channels): 514 raise ValueError( 515 f"Number of channels {len(scan.channels)} " 516 f"is not the same as the number of channels in an ROI: " 517 f"{roi['C'][1]}" 518 ) 519 520 # Get the real ROI limits; the metadata is not always correct 521 limits_xml = metadata_xml.findall(".//AllowedScanArea") 522 limits = [ 523 round(float(limits_xml[0].find("Center").text.split(",")[0])), 524 round(float(limits_xml[0].find("Center").text.split(",")[1])), 525 round(float(limits_xml[0].find("Size").text.split(",")[0])), 526 round(float(limits_xml[0].find("Size").text.split(",")[1])), 527 ] 528 # Convert to top-left and bottom-right 529 limits = [ 530 round(limits[0] - limits[2] / 2), 531 round(limits[1] - limits[3] / 2), 532 round(limits[0] + limits[2] / 2), 533 round(limits[1] + limits[3] / 2), 534 ] 535 536 # Extract ROIs and create ROI objects from them 537 rois_xml_metadata = metadata_xml.findall(".//TileRegions/TileRegion") 538 scenes_xml_metadata = metadata_xml.findall(".//S/Scenes/Scene") 539 if len(rois_xml_metadata) != len(rois_shape): 540 raise ValueError( 541 f"Metadata and binary data from {input_path} " 542 f"do not match in number of ROIs" 543 ) 544 # We need both to determine the number of rows/columns because the XML lies 545 roi_indices = [] 546 for roi_xml, roi_shape in zip(rois_xml_metadata, rois_shape): 547 name = roi_xml.attrib["Name"] 548 # Determine the index of this scene 549 scene_index = -1 550 for scene in scenes_xml_metadata: 551 if scene.attrib["Name"] == name: 552 scene_index = int(scene.attrib["Index"]) 553 break 554 if scene_index == -1: 555 raise ValueError(f"ROI {name} does not correspond to any scenes") 556 else: 557 roi_indices.append(scene_index) 558 # Extract other metadata 559 roi_limits = [ 560 round(float(roi_xml.find("CenterPosition").text.split(",")[0])), 561 round(float(roi_xml.find("CenterPosition").text.split(",")[1])), 562 round(float(roi_xml.find("ContourSize").text.split(",")[0])), 563 round(float(roi_xml.find("ContourSize").text.split(",")[1])), 564 ] 565 # Convert to top-left and bottom-right 566 roi_limits = [ 567 round(roi_limits[0] - roi_limits[2] / 2), 568 round(roi_limits[1] - roi_limits[3] / 2), 569 round(roi_limits[0] + roi_limits[2] / 2), 570 round(roi_limits[1] + roi_limits[3] / 2), 571 ] 572 # Bound the ROI to the actual scan limits 573 roi_limits = [ 574 max(roi_limits[0], limits[0]), 575 max(roi_limits[1], limits[1]), 576 min(roi_limits[2], limits[2]), 577 min(roi_limits[3], limits[3]), 578 ] 579 580 tile_rows = int(roi_xml.find("Rows").text) 581 # Current best way of reliably extracting; <Columns> entry can be wrong 582 if (roi_shape["M"][1] % tile_rows) != 0: 583 raise ValueError( 584 f"The number of tiles {roi_shape['M'][1]} is not " 585 f"divisible by the tile rows {tile_rows}; metadata " 586 f"must be messed up. Thanks Zeiss" 587 ) 588 else: 589 tile_cols = int(roi_shape["M"][1] / tile_rows) 590 # Support points are actually the relevant focus points for this ROI 591 focus_points = [] 592 for focus_point in roi_xml.findall("SupportPoints/SupportPoint"): 593 focus_points.append( 594 [ 595 int(float(focus_point.find("X").text)), 596 int(float(focus_point.find("Y").text)), 597 int(float(focus_point.find("Z").text)), 598 ] 599 ) 600 # Strip all sub-micron precision, it does not matter 601 scan.roi.append( 602 cls.ROI( 603 origin_x_um=roi_limits[0], 604 origin_y_um=roi_limits[1], 605 width_um=roi_limits[2] - roi_limits[0], 606 height_um=roi_limits[3] - roi_limits[1], 607 tile_rows=tile_rows, 608 tile_cols=tile_cols, 609 focus_points=focus_points, 610 ) 611 ) 612 # Sort based on the scene indices 613 scan.roi = [roi for _, roi in sorted(zip(roi_indices, scan.roi))] 614 615 return scan
Extracts metadata from a .czi file, which is the output of the Axioscan
Parameters
- input_path: the path to the .czi file
Returns
a Scan object
617 @classmethod 618 def load_txt(cls, input_path: str) -> Self: 619 """ 620 Loads a Scan object from a .txt file, usually slideinfo.txt, which originates 621 from the BZScanner. Some metadata is filled in or adjusted to fit 622 :param input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt 623 :return: a Scan object 624 """ 625 # Set paths 626 input_path = os.path.abspath(input_path) 627 if os.path.isdir(input_path): 628 input_path = os.path.join( 629 input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER] 630 ) 631 632 # Read in metadata as a dict 633 with open(input_path, "r") as file: 634 metadata_contents = file.read() 635 # Read each line, splitting on the = sign 636 metadata_dict = {} 637 for line in metadata_contents.splitlines(): 638 key, value = line.split("=") 639 metadata_dict[key] = value 640 641 # Populate metadata 642 scan = cls() 643 644 scan.slide_id = metadata_dict["SLIDEID"] 645 scan.slide_id = scan.slide_id.strip().upper() 646 647 scan.path = metadata_dict["SLIDEDIR"] 648 649 # Extract start and finish datetimes 650 date = metadata_dict["DATE"] 651 date_as_datetime = datetime.datetime.strptime( 652 date, cls.DATETIME_FORMAT[cls.Type.BZSCANNER] 653 ) 654 date_as_datetime = date_as_datetime.astimezone( 655 zoneinfo.ZoneInfo("America/Los_Angeles") 656 ) # Hardcoded because BZScanners are here 657 scan.start_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 658 scan.scan_time_s = 90 * 60 # estimated 90 minutes per scan 659 date_as_datetime += datetime.timedelta(seconds=scan.scan_time_s) 660 scan.end_datetime = date_as_datetime.strftime(cls.STANDARD_DATETIME_FORMAT) 661 662 # Map the raw scanner ID (service ID) to our IDs 663 scan.scanner_id = f'{cls.Type.BZSCANNER.value}_{metadata_dict["INSTRUMENT"]}' 664 scan.tray_pos = 0 # only one tray_pos in a BZScanner 665 scan.slide_pos = int(metadata_dict["SLIDEPOS"]) - 1 # 1-indexed 666 667 # Get camera and magnifying info 668 scan.camera = "" 669 magnification = 10 670 aperture = 0 # TODO: find the actual aperture 671 scan.objective = f"{magnification}x-{aperture}" 672 scan.pixel_size_um = 0.591 # Estimated from image metadata 673 674 # Get tile information 675 scan.tile_width_px = 1362 # Known from image metadata 676 scan.tile_height_px = 1004 # Known from image metadata 677 scan.tile_x_offset_px = 0 # Already removed 678 scan.tile_y_offset_px = 0 # Already removed 679 scan.tile_overlap_proportion = 0 # Already removed 680 681 # Extract channels and create Channel objects from them 682 if "gain_applied" in metadata_dict: 683 gain_applied = True if metadata_dict["gain_applied"] == "1" else False 684 else: 685 gain_applied = True # Previous policy was always to apply gains 686 for channel in list(cls.BZSCANNER_CHANNEL_MAP.keys()): 687 channel_settings = metadata_dict[channel].split(",") 688 if channel_settings[0] == "0": 689 continue 690 scan.channels.append( 691 cls.Channel( 692 name=cls.BZSCANNER_CHANNEL_MAP[channel], 693 exposure_ms=float(channel_settings[1]), 694 intensity=float(channel_settings[2]), 695 gain_applied=gain_applied, 696 ) 697 ) 698 699 # Get focus points 700 focus_points = [] 701 for i in range(33): 702 focus_point = metadata_dict["FOCUSPOS" + str(i)].split(",") 703 if focus_point[0] == "0": 704 break 705 focus_points.append( 706 [ 707 int(float(focus_point[1])), 708 int(float(focus_point[2])), 709 int(float(focus_point[3])), 710 ] 711 ) 712 713 # In the BZScanner, the slide is vertical instead of horizontal 714 # We put in nominal values for the ROI, which is oriented vertically as well 715 tile_rows = 96 716 tile_cols = 24 717 roi_width = round(scan.pixel_size_um * scan.tile_width_px * tile_cols) 718 roi_height = round(scan.pixel_size_um * scan.tile_height_px * tile_rows) 719 origin_x_um = 2500 + round((20000 - roi_width) / 2) 720 origin_y_um = 2500 + round((58000 - roi_height) / 2) 721 scan.roi.append( 722 cls.ROI( 723 origin_x_um=origin_x_um, 724 origin_y_um=origin_y_um, 725 width_um=roi_width, 726 height_um=roi_height, 727 tile_rows=tile_rows, 728 tile_cols=tile_cols, 729 focus_points=focus_points, 730 ) 731 ) 732 return scan
Loads a Scan object from a .txt file, usually slideinfo.txt, which originates from the BZScanner. Some metadata is filled in or adjusted to fit
Parameters
- input_path: /path/to/file.txt or /path/to/folder containing slideinfo.txt
Returns
a Scan object
734 @classmethod 735 def load_from_folder(cls, input_path: str) -> Self: 736 """ 737 Load a Scan object from a folder that contains defaultly-named metadata files, 738 scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist 739 :param input_path: /path/to/folder 740 :return: a Scan object 741 """ 742 input_path = os.path.abspath(input_path) 743 if os.path.isfile( 744 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]) 745 ): 746 return cls.load_yaml(input_path) 747 elif os.path.isfile( 748 os.path.join(input_path, cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]) 749 ): 750 return cls.load_txt(input_path) 751 else: 752 raise ValueError( 753 f"No scan metadata files " 754 f"({cls.METADATA_FILE_NAME[cls.Type.AXIOSCAN7]}, " 755 f"{cls.METADATA_FILE_NAME[cls.Type.BZSCANNER]}) found in folder " 756 f"{input_path}" 757 ) 758 pass
Load a Scan object from a folder that contains defaultly-named metadata files, scan.yaml or slideinfo.txt. Prefers scan.yaml if both exist
Parameters
- input_path: /path/to/folder
Returns
a Scan object
760 @classmethod 761 def make_placeholder( 762 cls, 763 slide_id: str, 764 n_tile: int = 2303, 765 n_roi: int = 0, 766 scanner_type: Type = Type.BZSCANNER, 767 ) -> Self: 768 """ 769 Make a placeholder Scan object with only basic required information filled in. 770 :param slide_id: the slide ID 771 :param n_tile: the number of this tile, which will become the number of 772 tiles in the scan 773 :param n_roi: the number of ROIs in the scan 774 :return: a Scan object 775 """ 776 # Sanitize inputs here 777 slide_id = str(slide_id).strip().upper() 778 n_tile = int(n_tile) 779 n_roi = int(n_roi) 780 # Generate the object 781 scan = cls() 782 scan.slide_id = slide_id 783 if scanner_type == cls.Type.AXIOSCAN7: 784 scan.scanner_id = f"{cls.Type.AXIOSCAN7.value}_placeholder" 785 elif scanner_type == cls.Type.BZSCANNER: 786 scan.scanner_id = f"{cls.Type.BZSCANNER.value}_placeholder" 787 scan.roi = [cls.ROI() for _ in range(n_roi + 1)] 788 scan.roi[0].tile_rows = 1 789 scan.roi[0].tile_cols = n_tile + 1 790 return scan
Make a placeholder Scan object with only basic required information filled in.
Parameters
- slide_id: the slide ID
- n_tile: the number of this tile, which will become the number of tiles in the scan
- n_roi: the number of ROIs in the scan
Returns
a Scan object
60 class Channel(yaml.YAMLObject): 61 """ 62 Class that comprises a channel; we usually have multiple (2-5) per scan. 63 Contains three fields: 64 - name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD) 65 - exposure_ms: the exposure time to capture a frame in milliseconds 66 - intensity: the light intensity used OR the gain applied to the channel 67 """ 68 69 yaml_tag = "csi_utils.csi_scans.Scan.Channel" 70 71 def __init__( 72 self, 73 name: str = "", 74 exposure_ms: float = -1.0, 75 intensity: float = -1.0, 76 gain_applied: bool = False, 77 ): 78 self.name = name 79 self.exposure_ms = exposure_ms 80 self.intensity = intensity 81 self.gain_applied = gain_applied 82 83 def __repr__(self): 84 return yaml.dump(self, sort_keys=False) 85 86 def __eq__(self, other): 87 return self.__repr__() == other.__repr__()
Class that comprises a channel; we usually have multiple (2-5) per scan. Contains three fields:
- name: the name of the channel (e.g. DAPI, AF647, AF555, AF488, BRIGHTFIELD)
- exposure_ms: the exposure time to capture a frame in milliseconds
- intensity: the light intensity used OR the gain applied to the channel
89 class ROI(yaml.YAMLObject): 90 """ 91 Class that comprises an ROI; we usually have 1, but may have more in a scan. 92 """ 93 94 yaml_tag = "csi_utils.csi_scans.Scan.ROI" 95 96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points 115 116 def __repr__(self): 117 return yaml.dump(self, sort_keys=False) 118 119 def __eq__(self, other): 120 return self.__repr__() == other.__repr__() 121 122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 )
Class that comprises an ROI; we usually have 1, but may have more in a scan.
96 def __init__( 97 self, 98 origin_x_um: int = -1, 99 origin_y_um: int = -1, 100 width_um: int = -1, 101 height_um: int = -1, 102 tile_rows: int = -1, 103 tile_cols: int = -1, 104 focus_points=None, 105 ): 106 if focus_points is None: 107 focus_points = [] 108 self.origin_x_um = origin_x_um 109 self.origin_y_um = origin_y_um 110 self.width_um = width_um 111 self.height_um = height_um 112 self.tile_rows = tile_rows 113 self.tile_cols = tile_cols 114 self.focus_points = focus_points
122 def similar(self, other): 123 return ( 124 self.origin_y_um == other.origin_y_um 125 and self.origin_x_um == other.origin_x_um 126 and self.width_um == other.width_um 127 and self.height_um == other.height_um 128 and self.tile_rows == other.tile_rows 129 and self.tile_cols == other.tile_cols 130 )