Source code for pacbio_data_processing.external

#######################################################################
#
# Copyright (C) 2021, 2022 David Palao
#
# This file is part of PacBioDataProcessing.
#
#  PacBioDataProcessing is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  PacBio data processing is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with PacBioDataProcessing. If not, see <http://www.gnu.org/licenses/>.
#
#######################################################################

import subprocess
import logging
from pathlib import Path
from typing import Optional

from .types import PathOrStr, ReturnCode
from .sentinel import Sentinel, SentinelFileFound, SentinelFileNotFound


[docs]class ExternalProgram: """A base class with common functionality to all external programs' classes that: 1. produce an output file, and 2. its production is to be protected by a ``Sentinel``. This base class provides the interface and the ``Sentinel`` protection. """
[docs] def __init__(self, path: PathOrStr) -> None: self.program = path self.program_name = self.__class__.__name__.lower()
def _run(self, *args, **kwargs) -> tuple[ReturnCode, str]: """Abstract internal method used by ``__call__``.""" raise NotImplementedError def _log_ok_computation(self, outfile: PathOrStr) -> None: """Abstract internal method used by ``__call__``.""" raise NotImplementedError def _log_err_computation( self, infile: PathOrStr, outfile: PathOrStr, err_msg: str) -> None: """Abstract internal method used by ``__call__``.""" raise NotImplementedError
[docs] def __call__( self, infile: PathOrStr, outfile: PathOrStr, *args, **kwargs ) -> Optional[ReturnCode]: """It runs the executable, with the given paramenters. The return code of the associated process is returned by this method *if* the executable could run at all, else ``None`` is returned. One case where the executable cannot run is when the sentinel file is there *before* the executable process is run. """ try: sentinel = Sentinel(Path(outfile)) with sentinel: result, err_msg = self._run(infile, outfile, *args, **kwargs) except SentinelFileFound: result = None logging.warning( f"Sentinel file '{sentinel.path}' detected! " f"Delaying {self.program_name} computation." ) except SentinelFileNotFound: logging.warning( f"Sentinel file '{sentinel.path}' disappeared before " f"{self.program_name} finished its computation!" ) logging.warning( " ...some other person/process is probably carrying out a " "similar computation in the same directory and messing up." ) logging.warning( " The integrity of the results may be compromised!" ) else: if result == 0: self._log_ok_computation(outfile) else: self._log_err_computation(infile, outfile, err_msg) return result
[docs]class Blasr(ExternalProgram): """An object to interact with the ``blasr`` aligner. """
[docs] def __call__(self, in_bamfile: PathOrStr, fasta: PathOrStr, out_bamfile: PathOrStr, nprocs: int = 1) -> Optional[ReturnCode]: return super().__call__(in_bamfile, out_bamfile, fasta, nprocs)
def _run(self, in_bamfile: PathOrStr, out_bamfile: PathOrStr, fasta: PathOrStr, nprocs: int = 1) -> tuple[ReturnCode, str]: blasr_proc = subprocess.run( (self.program, in_bamfile, fasta, "--nproc", f"{nprocs}", "--bam", "--out", out_bamfile), capture_output=True ) return (blasr_proc.returncode, blasr_proc.stderr.decode()) def _log_ok_computation(self, outfile: PathOrStr) -> None: logging.info( f"[{self.program_name}] Aligned file '{outfile}' " "generated" ) def _log_err_computation( self, infile: PathOrStr, outfile: PathOrStr, err_msg: str) -> None: logging.error( f"'{self.program}' could not align the input file " f"'{infile}'") logging.error(f" ...the error was: {err_msg}")
[docs]class CCS(ExternalProgram): """An object to interact with the ``ccs`` program. """
[docs] def __call__(self, in_bamfile: PathOrStr, out_bamfile: PathOrStr, ) -> Optional[ReturnCode]: return super().__call__(in_bamfile, out_bamfile)
def _run(self, in_bamfile: PathOrStr, out_bamfile: PathOrStr, ) -> tuple[ReturnCode, str]: ccs_proc = subprocess.run( (self.program, in_bamfile, out_bamfile), capture_output=True ) return (ccs_proc.returncode, ccs_proc.stderr.decode()) def _log_ok_computation(self, outfile: PathOrStr) -> None: logging.info(f"[{self.program_name}] File '{outfile}' generated") def _log_err_computation( self, infile: PathOrStr, outfile: PathOrStr, err_msg: str) -> None: logging.error( f"[{self.program_name}] During the execution of '{self.program}' " "an error occurred" ) logging.error( f"[{self.program_name}] The following command was issued:" ) logging.error(f" '{self.program} {infile} {outfile}'") logging.error(f"[{self.program_name}] ...the error was: {err_msg}")