Source code for pacbio_data_processing.parameters

#######################################################################
#
# Copyright (C) 2021 David Palao
#
# This file is part of PacBio data processing.
#
#  PacBioDataProcessing is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  PacBio data processing is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with PacBioDataProcessing. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################

from pathlib import Path
import logging

from pkg_resources import Requirement, resource_filename

from .filters import filter_mappings_binary, filter_mappings_ratio
from . import __version__ as VERSION


[docs]class ParametersBase:
[docs] def __init__(self, cl_input): self._cl_input = cl_input
def __getattr__(self, attr): return getattr(self._cl_input, attr)
[docs]class BamFilteringParameters(ParametersBase): def __str__(self): s = ( f"Filtering '{self.input_bam_file}' to produce " f"'{self.out_bam_file}' with:\n" f" minimun DNA sequence length: {self.min_dna_seq_length}\n" f" minimun subreads per molecule: " f"{self.min_subreads_per_molecule}\n" f" quality of sequencing: {self.quality_threshold}\n" f" mappings: {self.mappings}\n" f" min mapping ratio: {self.min_relative_mapping_ratio}\n" ) return s @property def out_bam_file(self): base = self.input_bam_file.name new_base = "parsed." + base return self.input_bam_file.parent/new_base @property def limit_mappings(self): if self._cl_input.mappings != "all": return self._cl_input.mappings @property def filter_mappings(self): if self.min_relative_mapping_ratio: return filter_mappings_ratio return filter_mappings_binary @property def min_relative_mapping_ratio(self): ratio = self._cl_input.min_relative_mapping_ratio if ratio > 1: ratio = 1.0 elif ratio < 0: ratio = 0.0 return ratio
[docs]class SingleMoleculeAnalysisParameters(ParametersBase): def _make_out_filename(self, *, suff: str, pref: str = "") -> Path: base = self.input_bam_file.name new_base = "sm-analysis." + base if self.partition: partition, partitions = self.partition new_base = f"partition_{partition}of{partitions}."+new_base new_name = self.input_bam_file.parent/(pref+new_base) if new_name.suffix != ".bam": suff = new_name.suffix+suff return new_name.with_suffix(suff) @property def joint_gff_filename(self): return self._make_out_filename(suff=".gff") @property def one_line_per_mod_filename(self): return self._make_out_filename(suff=".csv") @property def summary_report_html_filename(self): return self._make_out_filename(suff=".html", pref="summary.") def _resolve_model_from_resources(self, model_name): r = Requirement.parse("kineticsTools") return Path( resource_filename( r, f"kineticsTools/resources/{model_name}.npz.gz") ) @property def ipd_model(self): raw_model = self._cl_input.ipd_model if raw_model: model = Path(raw_model) if not model.is_file(): model = self._resolve_model_from_resources(raw_model) if not model.is_file(): model = None return model @property def partition(self): try: partition, partitions = self._cl_input.partition.split(":") except (ValueError, AttributeError): return try: partition = int(partition) partitions = int(partitions) except ValueError: return if partition <= partitions: return partition, partitions def __str__(self): s = ( f"Starting 'sm-analysis' (version {VERSION}) with:\n" f" Input BAM file: '{self.input_bam_file}'\n" f" Reference file: '{self.fasta}'\n" f" ipd program: '{self.ipdsummary_path}'\n" f" # ipd program instances: {self.num_simultaneous_ipdsummarys}" f"\n" f" # workers per ipd instance: {self.num_workers_per_ipdsummary}" f"\n" f" modification types: {self.modification_types}\n" f" aligner: '{self.blasr_path}'\n" f" # workers blasr: {self.nprocs_blasr}\n" f" indexer: '{self.pbindex_path}'\n" ) if self.ipd_model: s = s + f" ipd model: {self.ipd_model}\n" elif self._cl_input.ipd_model: # In this case the user entered the model but it wasn't found: logging.error( f"Model '{self._cl_input.ipd_model}' " "not found. Using default model" ) if self.partition: s += f" partition: {self.partition[0]} of {self.partition[1]}\n" if self.aligned_CCS_bam_file: s += f" aligned CCS bam file: '{self.aligned_CCS_bam_file}'\n" if self.CCS_bam_file: s += f" CCS bam file: '{self.CCS_bam_file}'\n" if self.keep_temp_dir: s += " keep temp dir: yes\n" if self.only_produce_methylation_report: s += " only produce methylation report: yes\n" return s