Source code for biscot.Map

import Misc

import copy
import logging


[docs]class Map:
[docs] def __init__(self, map_id, labels_1, labels_2, alignments=[]): self.map_id = map_id self.labels_1 = labels_1 self.labels_2 = labels_2 self.nb_channel_1_labels = len(labels_1) self.nb_channel_2_labels = len(labels_2) self.alignments = copy.deepcopy(alignments) self.contig_maps = []
[docs] def add_channel_1_label(self, label_position): """ Adds a label to the list of channel 1 labels :param label_position: Position of the label on the map :type label_position: int """ self.nb_channel_1_labels += 1 self.labels_1.append((self.nb_channel_1_labels, label_position))
[docs] def add_channel_2_label(self, label_position): """ Adds a label to the list of channel 2 labels :param label_position: Position of the label on the map :type label_position: int """ self.nb_channel_2_labels += 1 self.labels_2.append((self.nb_channel_2_labels, label_position))
[docs] def get_label_position(self, label_id, channel): """ Returns a label position on a map based on its id and channel :param label_id: Label id to look for :type label_id: integer :param channel: Enzyme channel of the searched label :type channel: int :raises Exception: If the label couldn't be found :return: Searched label position :rtype: int """ if channel == 1: for label, position in self.labels_1: if label == label_id: return position elif channel == 2: for label, position in self.labels_2: if label == label_id: return position raise Exception( f"Didn't find label {label_id} on map {self.map_id} on channel {channel}." )
[docs] def add_alignment(self, aln): """ Adds an Alignment object to the list of alignments :param aln: Alignment to add :type aln: Alignment """ self.alignments.append(aln) self.contig_maps.append(aln.map_id)
[docs] def sort_alignments(self): """ Sorts the list of alignments """ self.alignments = sorted( self.alignments, key=lambda alignment: alignment.reference_start )
[docs] def check_containment(self): """ Parses the list of alignments in search of alignments that could be contained into another one, i.e. reference_start_aln_1 < reference_start_aln_2 and reference_end_aln_1 > reference_end_aln_2 :return: List of tuples containing the contained alignment at the second position and the alignment containing it at the first position :rtype: list(tuple(Alignment, Alignment)) """ contained_alns = [] for i in range(0, len(self.alignments) - 1): aln_1 = self.alignments[i] for j in range(i + 1, len(self.alignments)): aln_2 = self.alignments[j] if ( aln_1.reference_start < aln_2.reference_start and aln_1.reference_end > aln_2.reference_end ): logging.debug( f"Map {aln_2.map_id} ({aln_2.reference_start} -> {aln_2.reference_end}) is contained in map {aln_1.map_id} ({aln_1.reference_start} -> {aln_1.reference_end}) on anchor {aln_1.reference_id} channel {aln_1.channel}" ) contained_alns.append((aln_1, aln_2)) elif ( aln_2.reference_start < aln_1.reference_start and aln_2.reference_end > aln_1.reference_end ): logging.debug( f"Map {aln_1.map_id} ({aln_1.reference_start} -> {aln_1.reference_end}) is contained in map {aln_2.map_id} ({aln_2.reference_start} -> {aln_2.reference_end}) on anchor {aln_2.reference_id} channel {aln_2.channel}" ) contained_alns.append((aln_2, aln_1)) return contained_alns
[docs] def print_alignments(self): """ Prints the alignments of a Map object """ for aln in self.alignments: print(aln, flush=True)
[docs] def __str__(self): txt = f"{self.map_id}\t{self.nb_channel_1_labels}\t{self.nb_channel_2_labels}\t{self.alignments}" return txt
[docs]def parse_reference_cmap(reference_cmap_file_path): """ Parses a reference CMAP file to extract anchor labels :param reference_cmap_file_path: Path to a CMAP file :type reference_cmap_file_path: str :return: Dict containing anchor maps :rtype: dict(int, Map) """ reference_cmap_file = open(reference_cmap_file_path) reference_maps_dict = {} for line in reference_cmap_file: if not line.startswith("#"): line = line.rstrip("\n").split("\t") map_id = int(line[0]) label_channel = int(line[4]) label_position = int(line[5].split(".")[0]) if map_id not in reference_maps_dict: reference_maps_dict[map_id] = Map(map_id, [], []) if label_channel == 1: reference_maps_dict[map_id].add_channel_1_label(label_position) elif label_channel == 2: reference_maps_dict[map_id].add_channel_2_label(label_position) reference_cmap_file.close() return reference_maps_dict
[docs]def parse_contig_cmap(cmap_1_path, cmap_2_path): """ Parses one or two contig CMAP files to extract contig labels :param cmap_1_path: Path to a CMAP file :type cmap_1_path: str :param cmap_2_path: Path to a CMAP file :type cmap_2_path: str :return: Dict containing contg maps :rtype: dict(str: Map) """ cmap_1_file = open(cmap_1_path) contigs_map_dict = {} for line in cmap_1_file: if not line.startswith("#"): line = line.rstrip("\n").split("\t") map_id = int(line[0]) label_position = int(line[5].split(".")[0]) if map_id not in contigs_map_dict: contigs_map_dict[map_id] = Map(map_id, [], []) contigs_map_dict[map_id].add_channel_1_label(label_position) cmap_1_file.close() if cmap_2_path: cmap_2_file = open(cmap_2_path) for line in cmap_2_file: if not line.startswith("#"): line = line.rstrip("\n").split("\t") map_id = int(line[0]) label_position = int(line[5].split(".")[0]) if map_id not in contigs_map_dict: contigs_map_dict[map_id] = Map(map_id, [], []) contigs_map_dict[map_id].add_channel_2_label(label_position) cmap_2_file.close() return contigs_map_dict
[docs]def sort_map_alignments(reference_maps_dict): """ Sorts the alignments of a Map object by reference_start value :param reference_maps_dict: Dict containing anchor maps :type reference_maps_dict: dict(int, Map) """ logging.info("Sorting alignments") for map in reference_maps_dict: reference_maps_dict[map].sort_alignments()
[docs]def check_map_containment(reference_maps_dict): """ Parses all alignments of a map in search of contaned alignments :param reference_maps_dict: Dict containing anchor maps :type reference_maps_dict: dict(int: Map) :return: A list containing containing contained alignments :rtype: list(tuple(Alignment, Alignment)) """ logging.info("Looking for contained maps") contained_alignments = [] for anchor in reference_maps_dict: alns = reference_maps_dict[anchor].check_containment() if len(alns) > 0: contained_alignments.append(alns) return contained_alignments