Module xi_covutils.blast.wrappers

BlastP classes and functions

Expand source code
"""
BlastP classes and functions
"""
from io import StringIO
import subprocess
from typing import Any, Literal, Optional, Union

import pandas as pd

Blastptask = Union[
  Literal["blastp"],
  Literal["blastp-fast"],
  Literal["blastp-short"]
]

Blastntask = Union[
  Literal["blastn"],
  Literal["blastn-short"],
  Literal["dc-megablast"],
  Literal["megablast"],
  Literal["rmblastn"]
]

class BlastResult:
  """
  BlastResult. Represent the results of a blast search.
  """

  def __init__(self):
    self.data:list[list[Any]] = []
    self.headers:list[str] = []

  def __len__(self) -> int:
    """
    Returns the number of data rows in the BlastResult object.
    """
    return len(self.data)

  def set_headers(self, headers:list[str]):
    """
    Sets the headers for the data fields in the BlastResult object.
    Args:
      headers (list[str]): The headers for the data fields.
    """
    self.headers = headers

  def has_headers(self) -> bool:
    """
    Checks if the BlastResult object has headers.

    Returns:
      bool: True if headers are set, False otherwise.
    """
    return bool(self.headers)

  def add(self, row:list[Any]):
    """
    Adds a row of data to the BlastResult object.

    Args:
      row (list[Any]): A row of data.
    """
    if not len(row) == len(self.headers):
      raise ValueError(
        f"Data has {len(row)} fields, header has {len(self.headers)} fields"
      )
    self.data.append(row)

  def exclude(self, header:str, values:set[Any]) -> "BlastResult":
    """
    Removes those hits from the BlastResult that matches with any of the
    given values in the given header.

    Args:
      header (str): A header name.
      values (set[Any]): A set of values to exclude.

    Returns:
      BlastResult: The modified BlastResult object.
    """
    try:
      col_index = self.headers.index(header)
    except ValueError:
      return self
    self.data = [
      row
      for row in self.data
      if not row[col_index] in values
    ]
    return self


  def as_dataframe(self) -> pd.DataFrame:
    """
    Convert the results as a pandas DataFrame.

    Returns:
      pd.DataFrame: A pandas Dataframe.
    """
    dataframe = pd.DataFrame(
      self.data,
      columns = self.headers
    )
    return dataframe

  @staticmethod
  def _parse_fields(line:str) -> list[Union[str, int, float]]:
    """
    Parses a line of data and returns a list of fields.

    Args:
      line (str): A line of data.

    Returns:
      list[Union[str, int, float]]: A list of fields parsed from the line of
        data.
    """
    fields = []
    for c_field in line.split("\t"):
      try:
        c_field = int(c_field)
        fields.append(c_field)
        continue
      except ValueError:
        pass
      try:
        c_field = float(c_field)
        fields.append(c_field)
        continue
      except ValueError:
        pass
      fields.append(c_field)
    return fields

  @staticmethod
  def from_tabular_with_headers(input_data) -> "BlastResult":
    """
    Creates a BlastResult object from tabular data with headers.

    Args:
      input_data: The tabular data with headers.

    Returns:
      BlastResult: A BlastResult object.
    """
    result = BlastResult()
    for line in input_data:
      line = line.rstrip()
      if line.startswith("# Fields: "):
        if not result.has_headers():
          line = line[10:]
          fields = [x.strip() for x in line.split(",")]
          result.set_headers(fields)
        continue
      if line.startswith("#"):
        continue
      fields = BlastResult._parse_fields(line)
      result.add(fields)
    return result

  @staticmethod
  def from_tabular_without_headers(
      headers: list[str],
      input_data
    ) -> "BlastResult":
    """
    Creates a BlastResult object from tabular data without headers.

    Args:
      headers (list[str]): The headers for the data fields.
      input_data: The tabular data without headers.

    Returns:
      BlastResult: A BlastResult object.
    """
    result = BlastResult()
    result.set_headers(headers)
    for line in input_data:
      line = line.rstrip()
      fields = BlastResult._parse_fields(line)
      result.add(fields)
    return result

class BlastWrapper: # pylint: disable=too-many-instance-attributes
  """
  A abstract wrapper for blast commands.
  Do not use this class. Use BlastP or BlastN classes.
  """

  def __init__(self):
    self.command: str = ""
    self.query: Optional[str] = None
    self.query_string: Optional[str] = None
    self.database: Optional[str] = None
    self.subject: Optional[str] = None
    self.args: list[str] = []
    self.fields = ["std"]
    self.evalue: float = 1E-5
    self.outfile: Optional[str] = None
    self.task: str = ""

  def with_executable_command(self, blast_command:str) -> "BlastWrapper":
    """
    Sets the path of the current blast executable.

    Args:
      blast_command (str): Path to the blast command.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.command = blast_command
    return self

  def with_query_string(self, query:str) -> "BlastWrapper":
    """
    Set the query sequence as a string.

    Args:
      query (str): The input sequence.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.query_string = query
    return self

  def with_query(self, query_file:str) -> "BlastWrapper":
    """
    Sets the query file.

    Args:
      query_file (str): The path of the query file.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.query = query_file
    return self

  def with_db(self, db_path:str) -> "BlastWrapper":
    """
    Set the Database to search.
    Incomptible with subject.

    Args:
      db_path (str): The path prefix to the database.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.database = db_path
    self.subject = None
    return self

  def with_subject(self, subject_path:str) -> "BlastWrapper":
    """
    Set the subject file for the search.
    Incompatible with db.

    Args:
      subject_path (str): Path to a file with the subjects.

    Returns:
      'BlastWrapper': A BlastWrapper wrapper class.
    """
    self.subject = subject_path
    self.query = None
    return self

  def with_evalue(self, evalue:float) -> "BlastWrapper":
    """
    Sets the e-value.

    Args:
      evalue (float): The e-value threshold for saving hits.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.evalue = evalue
    return self

  def with_arguments(self, *args:Any) -> "BlastWrapper":
    """
    Adds additional arguments to the BlastWrapper command.

    Args:
      *args (Any): Additional arguments for the BlastWrapper command.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    str_args = [str(x) for x in args]
    self.args.extend(str_args)
    return self

  def with_output_file(self, outfile:str) -> "BlastWrapper":
    """
    Sets the output file for the BlastWrapper results.

    Args:
      outfile (str): Path to the output file.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.outfile = outfile
    return self

  def with_output_fields(self, fields:list[str]) -> "BlastWrapper":
    """
    Sets the output fields for the BlastWrapper results.

    Args:
      fields (list[str]): List of fields to include in the output.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = fields
    return self

  def with_output_fields_std(self) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std"]
    return self

  def with_output_fields_std_plus(self, fields:list[str]) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields plus additional specified
      fields.

    Args:
      fields (list[str]): Additional fields to include in the output.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std"] + fields
    return self

  def with_output_fields_recommended(self) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields plus qcovs and staxid.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std", "qcovs", "staxid"]
    return self

  def _run_with_query_string(self) -> subprocess.CompletedProcess:
    """
    Executes the blast command with the query sequence provided as a string.

    Returns:
      CompletedProcess: An object that represents the completed execution of
        the blast command.
    """
    cmd = self.running_command()
    proc = subprocess.run(
      cmd,
      check=False,
      capture_output=True,
      input=self.query_string,
      text=True,
    )
    return proc

  def _run_with_input_file(self):
    """
    Executes the blast command with the query sequence provided as an input
      file.

    Returns:
      CompletedProcess: An object that represents the completed execution of
        the blast command.
    """
    cmd = self.running_command()
    proc = subprocess.run(
      cmd,
      check=False,
      capture_output=True,
      text=True,
    )
    return proc

  def run(self) -> tuple[Optional[BlastResult], str]:
    """
    Runs the blast command and returns the result and any errors.

    Returns:
      tuple[Optional[BlastResult], str]: A tuple containing the BlastResult
        object (or None if results are save to a file) and any error messages.
    """
    if self.query_string:
      proc = self._run_with_query_string()
    else:
      proc = self._run_with_input_file()
    error = proc.stderr
    if self.outfile:
      return None, error
    bresult = BlastResult.from_tabular_with_headers(
      StringIO(proc.stdout)
    )
    return bresult, error

  def running_command_string(self) -> str:
    """
    Generates the full string of the blast command that will be executed.

    Returns:
      str: A string of the blast command that will be executed.
    """
    return " ".join(self.running_command())

  def running_command(self) -> list[str]:
    """
    Generates the command list for the blast process.

    Returns:
      list[str]: A list containing the blast command and its arguments that
        will be executed.

    Raises:
      ValueError: If neither a database nor a subject, or both, are provided.
      ValueError: If neither a query file nor a query string, or both, are
        provided.
    """
    if (
      (not self.database and not self.subject) or
      (self.database and self.subject)
    ):
      raise ValueError("Blast Wrapper needs a db value or a subject.")
    if (
      (not self.query and not self.query_string) or
      (self.query and self.query_string)
    ):
      raise ValueError("Blast Wrapper needs a query or a query_string")
    cmd = [
      self.command,
      '-query',
      self.query if self.query else "-",
    ]
    if self.database:
      cmd.append('-db')
      cmd.append(self.database)
    if self.subject:
      cmd.append('-subject')
      cmd.append(self.subject)
    cmd.append("-outfmt")
    cmd.append(f"7 {' '.join(self.fields)}")
    if self.outfile:
      cmd.append("out")
      cmd.append(self.outfile)
    cmd.extend(self.args)
    cmd.append("-task")
    cmd.append(self.task)
    cmd.append("-evalue")
    cmd.append(str(self.evalue))
    return cmd

class BlastP(BlastWrapper): # pylint: disable=too-many-instance-attributes
  """
  Wrapper for local blastp command.

  Creates a the command line to perform a blast search and
  can execute the command.

  No makes an extensive check of all blastp arguments before running.
  """
  def __init__(self):
    super().__init__()
    self.command: str = "blastp"
    self.task: Blastptask = "blastp"

  def with_task(self, task: Blastptask) -> "BlastP":
    """
    Sets the BLASTP task type.

    Args:
      task (Blastptask): The BLASTP task to perform.

    Returns:
      'BlastP': A BlastP command wrapper class.
    """
    self.task = task
    return self

class BlastN(BlastWrapper): # pylint: disable=too-many-instance-attributes
  """
  Wrapper for local blastn command.

  Creates a the command line to perform a blast search and
  can execute the command.

  No makes an extensive check of all blastn arguments before running.
  """
  def __init__(self):
    super().__init__()
    self.command: str = "blastn"
    self.task: Blastntask = "blastn"

  def with_task(self, task: Blastntask) -> "BlastN":
    """
    Sets the BLASTN task type.

    Args:
      task (Blastntask): The BLASTN task to perform.

    Returns:
      'BlastN': A BlastN command wrapper class.
    """
    self.task = task
    return self

Classes

class BlastN

Wrapper for local blastn command.

Creates a the command line to perform a blast search and can execute the command.

No makes an extensive check of all blastn arguments before running.

Expand source code
class BlastN(BlastWrapper): # pylint: disable=too-many-instance-attributes
  """
  Wrapper for local blastn command.

  Creates a the command line to perform a blast search and
  can execute the command.

  No makes an extensive check of all blastn arguments before running.
  """
  def __init__(self):
    super().__init__()
    self.command: str = "blastn"
    self.task: Blastntask = "blastn"

  def with_task(self, task: Blastntask) -> "BlastN":
    """
    Sets the BLASTN task type.

    Args:
      task (Blastntask): The BLASTN task to perform.

    Returns:
      'BlastN': A BlastN command wrapper class.
    """
    self.task = task
    return self

Ancestors

Methods

def with_task(self, task: Union[Literal['blastn'], Literal['blastn-short'], Literal['dc-megablast'], Literal['megablast'], Literal['rmblastn']]) ‑> BlastN

Sets the BLASTN task type.

Args

task : Blastntask
The BLASTN task to perform.

Returns

'BlastN': A BlastN command wrapper class.

Expand source code
def with_task(self, task: Blastntask) -> "BlastN":
  """
  Sets the BLASTN task type.

  Args:
    task (Blastntask): The BLASTN task to perform.

  Returns:
    'BlastN': A BlastN command wrapper class.
  """
  self.task = task
  return self
class BlastP

Wrapper for local blastp command.

Creates a the command line to perform a blast search and can execute the command.

No makes an extensive check of all blastp arguments before running.

Expand source code
class BlastP(BlastWrapper): # pylint: disable=too-many-instance-attributes
  """
  Wrapper for local blastp command.

  Creates a the command line to perform a blast search and
  can execute the command.

  No makes an extensive check of all blastp arguments before running.
  """
  def __init__(self):
    super().__init__()
    self.command: str = "blastp"
    self.task: Blastptask = "blastp"

  def with_task(self, task: Blastptask) -> "BlastP":
    """
    Sets the BLASTP task type.

    Args:
      task (Blastptask): The BLASTP task to perform.

    Returns:
      'BlastP': A BlastP command wrapper class.
    """
    self.task = task
    return self

Ancestors

Methods

def with_task(self, task: Union[Literal['blastp'], Literal['blastp-fast'], Literal['blastp-short']]) ‑> BlastP

Sets the BLASTP task type.

Args

task : Blastptask
The BLASTP task to perform.

Returns

'BlastP': A BlastP command wrapper class.

Expand source code
def with_task(self, task: Blastptask) -> "BlastP":
  """
  Sets the BLASTP task type.

  Args:
    task (Blastptask): The BLASTP task to perform.

  Returns:
    'BlastP': A BlastP command wrapper class.
  """
  self.task = task
  return self
class BlastResult

BlastResult. Represent the results of a blast search.

Expand source code
class BlastResult:
  """
  BlastResult. Represent the results of a blast search.
  """

  def __init__(self):
    self.data:list[list[Any]] = []
    self.headers:list[str] = []

  def __len__(self) -> int:
    """
    Returns the number of data rows in the BlastResult object.
    """
    return len(self.data)

  def set_headers(self, headers:list[str]):
    """
    Sets the headers for the data fields in the BlastResult object.
    Args:
      headers (list[str]): The headers for the data fields.
    """
    self.headers = headers

  def has_headers(self) -> bool:
    """
    Checks if the BlastResult object has headers.

    Returns:
      bool: True if headers are set, False otherwise.
    """
    return bool(self.headers)

  def add(self, row:list[Any]):
    """
    Adds a row of data to the BlastResult object.

    Args:
      row (list[Any]): A row of data.
    """
    if not len(row) == len(self.headers):
      raise ValueError(
        f"Data has {len(row)} fields, header has {len(self.headers)} fields"
      )
    self.data.append(row)

  def exclude(self, header:str, values:set[Any]) -> "BlastResult":
    """
    Removes those hits from the BlastResult that matches with any of the
    given values in the given header.

    Args:
      header (str): A header name.
      values (set[Any]): A set of values to exclude.

    Returns:
      BlastResult: The modified BlastResult object.
    """
    try:
      col_index = self.headers.index(header)
    except ValueError:
      return self
    self.data = [
      row
      for row in self.data
      if not row[col_index] in values
    ]
    return self


  def as_dataframe(self) -> pd.DataFrame:
    """
    Convert the results as a pandas DataFrame.

    Returns:
      pd.DataFrame: A pandas Dataframe.
    """
    dataframe = pd.DataFrame(
      self.data,
      columns = self.headers
    )
    return dataframe

  @staticmethod
  def _parse_fields(line:str) -> list[Union[str, int, float]]:
    """
    Parses a line of data and returns a list of fields.

    Args:
      line (str): A line of data.

    Returns:
      list[Union[str, int, float]]: A list of fields parsed from the line of
        data.
    """
    fields = []
    for c_field in line.split("\t"):
      try:
        c_field = int(c_field)
        fields.append(c_field)
        continue
      except ValueError:
        pass
      try:
        c_field = float(c_field)
        fields.append(c_field)
        continue
      except ValueError:
        pass
      fields.append(c_field)
    return fields

  @staticmethod
  def from_tabular_with_headers(input_data) -> "BlastResult":
    """
    Creates a BlastResult object from tabular data with headers.

    Args:
      input_data: The tabular data with headers.

    Returns:
      BlastResult: A BlastResult object.
    """
    result = BlastResult()
    for line in input_data:
      line = line.rstrip()
      if line.startswith("# Fields: "):
        if not result.has_headers():
          line = line[10:]
          fields = [x.strip() for x in line.split(",")]
          result.set_headers(fields)
        continue
      if line.startswith("#"):
        continue
      fields = BlastResult._parse_fields(line)
      result.add(fields)
    return result

  @staticmethod
  def from_tabular_without_headers(
      headers: list[str],
      input_data
    ) -> "BlastResult":
    """
    Creates a BlastResult object from tabular data without headers.

    Args:
      headers (list[str]): The headers for the data fields.
      input_data: The tabular data without headers.

    Returns:
      BlastResult: A BlastResult object.
    """
    result = BlastResult()
    result.set_headers(headers)
    for line in input_data:
      line = line.rstrip()
      fields = BlastResult._parse_fields(line)
      result.add(fields)
    return result

Static methods

def from_tabular_with_headers(input_data) ‑> BlastResult

Creates a BlastResult object from tabular data with headers.

Args

input_data
The tabular data with headers.

Returns

BlastResult
A BlastResult object.
Expand source code
@staticmethod
def from_tabular_with_headers(input_data) -> "BlastResult":
  """
  Creates a BlastResult object from tabular data with headers.

  Args:
    input_data: The tabular data with headers.

  Returns:
    BlastResult: A BlastResult object.
  """
  result = BlastResult()
  for line in input_data:
    line = line.rstrip()
    if line.startswith("# Fields: "):
      if not result.has_headers():
        line = line[10:]
        fields = [x.strip() for x in line.split(",")]
        result.set_headers(fields)
      continue
    if line.startswith("#"):
      continue
    fields = BlastResult._parse_fields(line)
    result.add(fields)
  return result
def from_tabular_without_headers(headers: list, input_data) ‑> BlastResult

Creates a BlastResult object from tabular data without headers.

Args

headers : list[str]
The headers for the data fields.
input_data
The tabular data without headers.

Returns

BlastResult
A BlastResult object.
Expand source code
@staticmethod
def from_tabular_without_headers(
    headers: list[str],
    input_data
  ) -> "BlastResult":
  """
  Creates a BlastResult object from tabular data without headers.

  Args:
    headers (list[str]): The headers for the data fields.
    input_data: The tabular data without headers.

  Returns:
    BlastResult: A BlastResult object.
  """
  result = BlastResult()
  result.set_headers(headers)
  for line in input_data:
    line = line.rstrip()
    fields = BlastResult._parse_fields(line)
    result.add(fields)
  return result

Methods

def add(self, row: list)

Adds a row of data to the BlastResult object.

Args

row : list[Any]
A row of data.
Expand source code
def add(self, row:list[Any]):
  """
  Adds a row of data to the BlastResult object.

  Args:
    row (list[Any]): A row of data.
  """
  if not len(row) == len(self.headers):
    raise ValueError(
      f"Data has {len(row)} fields, header has {len(self.headers)} fields"
    )
  self.data.append(row)
def as_dataframe(self) ‑> pandas.core.frame.DataFrame

Convert the results as a pandas DataFrame.

Returns

pd.DataFrame
A pandas Dataframe.
Expand source code
def as_dataframe(self) -> pd.DataFrame:
  """
  Convert the results as a pandas DataFrame.

  Returns:
    pd.DataFrame: A pandas Dataframe.
  """
  dataframe = pd.DataFrame(
    self.data,
    columns = self.headers
  )
  return dataframe
def exclude(self, header: str, values: set) ‑> BlastResult

Removes those hits from the BlastResult that matches with any of the given values in the given header.

Args

header : str
A header name.
values : set[Any]
A set of values to exclude.

Returns

BlastResult
The modified BlastResult object.
Expand source code
def exclude(self, header:str, values:set[Any]) -> "BlastResult":
  """
  Removes those hits from the BlastResult that matches with any of the
  given values in the given header.

  Args:
    header (str): A header name.
    values (set[Any]): A set of values to exclude.

  Returns:
    BlastResult: The modified BlastResult object.
  """
  try:
    col_index = self.headers.index(header)
  except ValueError:
    return self
  self.data = [
    row
    for row in self.data
    if not row[col_index] in values
  ]
  return self
def has_headers(self) ‑> bool

Checks if the BlastResult object has headers.

Returns

bool
True if headers are set, False otherwise.
Expand source code
def has_headers(self) -> bool:
  """
  Checks if the BlastResult object has headers.

  Returns:
    bool: True if headers are set, False otherwise.
  """
  return bool(self.headers)
def set_headers(self, headers: list)

Sets the headers for the data fields in the BlastResult object.

Args

headers : list[str]
The headers for the data fields.
Expand source code
def set_headers(self, headers:list[str]):
  """
  Sets the headers for the data fields in the BlastResult object.
  Args:
    headers (list[str]): The headers for the data fields.
  """
  self.headers = headers
class BlastWrapper

A abstract wrapper for blast commands. Do not use this class. Use BlastP or BlastN classes.

Expand source code
class BlastWrapper: # pylint: disable=too-many-instance-attributes
  """
  A abstract wrapper for blast commands.
  Do not use this class. Use BlastP or BlastN classes.
  """

  def __init__(self):
    self.command: str = ""
    self.query: Optional[str] = None
    self.query_string: Optional[str] = None
    self.database: Optional[str] = None
    self.subject: Optional[str] = None
    self.args: list[str] = []
    self.fields = ["std"]
    self.evalue: float = 1E-5
    self.outfile: Optional[str] = None
    self.task: str = ""

  def with_executable_command(self, blast_command:str) -> "BlastWrapper":
    """
    Sets the path of the current blast executable.

    Args:
      blast_command (str): Path to the blast command.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.command = blast_command
    return self

  def with_query_string(self, query:str) -> "BlastWrapper":
    """
    Set the query sequence as a string.

    Args:
      query (str): The input sequence.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.query_string = query
    return self

  def with_query(self, query_file:str) -> "BlastWrapper":
    """
    Sets the query file.

    Args:
      query_file (str): The path of the query file.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.query = query_file
    return self

  def with_db(self, db_path:str) -> "BlastWrapper":
    """
    Set the Database to search.
    Incomptible with subject.

    Args:
      db_path (str): The path prefix to the database.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.database = db_path
    self.subject = None
    return self

  def with_subject(self, subject_path:str) -> "BlastWrapper":
    """
    Set the subject file for the search.
    Incompatible with db.

    Args:
      subject_path (str): Path to a file with the subjects.

    Returns:
      'BlastWrapper': A BlastWrapper wrapper class.
    """
    self.subject = subject_path
    self.query = None
    return self

  def with_evalue(self, evalue:float) -> "BlastWrapper":
    """
    Sets the e-value.

    Args:
      evalue (float): The e-value threshold for saving hits.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.evalue = evalue
    return self

  def with_arguments(self, *args:Any) -> "BlastWrapper":
    """
    Adds additional arguments to the BlastWrapper command.

    Args:
      *args (Any): Additional arguments for the BlastWrapper command.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    str_args = [str(x) for x in args]
    self.args.extend(str_args)
    return self

  def with_output_file(self, outfile:str) -> "BlastWrapper":
    """
    Sets the output file for the BlastWrapper results.

    Args:
      outfile (str): Path to the output file.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.outfile = outfile
    return self

  def with_output_fields(self, fields:list[str]) -> "BlastWrapper":
    """
    Sets the output fields for the BlastWrapper results.

    Args:
      fields (list[str]): List of fields to include in the output.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = fields
    return self

  def with_output_fields_std(self) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std"]
    return self

  def with_output_fields_std_plus(self, fields:list[str]) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields plus additional specified
      fields.

    Args:
      fields (list[str]): Additional fields to include in the output.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std"] + fields
    return self

  def with_output_fields_recommended(self) -> "BlastWrapper":
    """
    Sets the output fields to the standard fields plus qcovs and staxid.

    Returns:
      'BlastWrapper': A BlastWrapper command wrapper class.
    """
    self.fields = ["std", "qcovs", "staxid"]
    return self

  def _run_with_query_string(self) -> subprocess.CompletedProcess:
    """
    Executes the blast command with the query sequence provided as a string.

    Returns:
      CompletedProcess: An object that represents the completed execution of
        the blast command.
    """
    cmd = self.running_command()
    proc = subprocess.run(
      cmd,
      check=False,
      capture_output=True,
      input=self.query_string,
      text=True,
    )
    return proc

  def _run_with_input_file(self):
    """
    Executes the blast command with the query sequence provided as an input
      file.

    Returns:
      CompletedProcess: An object that represents the completed execution of
        the blast command.
    """
    cmd = self.running_command()
    proc = subprocess.run(
      cmd,
      check=False,
      capture_output=True,
      text=True,
    )
    return proc

  def run(self) -> tuple[Optional[BlastResult], str]:
    """
    Runs the blast command and returns the result and any errors.

    Returns:
      tuple[Optional[BlastResult], str]: A tuple containing the BlastResult
        object (or None if results are save to a file) and any error messages.
    """
    if self.query_string:
      proc = self._run_with_query_string()
    else:
      proc = self._run_with_input_file()
    error = proc.stderr
    if self.outfile:
      return None, error
    bresult = BlastResult.from_tabular_with_headers(
      StringIO(proc.stdout)
    )
    return bresult, error

  def running_command_string(self) -> str:
    """
    Generates the full string of the blast command that will be executed.

    Returns:
      str: A string of the blast command that will be executed.
    """
    return " ".join(self.running_command())

  def running_command(self) -> list[str]:
    """
    Generates the command list for the blast process.

    Returns:
      list[str]: A list containing the blast command and its arguments that
        will be executed.

    Raises:
      ValueError: If neither a database nor a subject, or both, are provided.
      ValueError: If neither a query file nor a query string, or both, are
        provided.
    """
    if (
      (not self.database and not self.subject) or
      (self.database and self.subject)
    ):
      raise ValueError("Blast Wrapper needs a db value or a subject.")
    if (
      (not self.query and not self.query_string) or
      (self.query and self.query_string)
    ):
      raise ValueError("Blast Wrapper needs a query or a query_string")
    cmd = [
      self.command,
      '-query',
      self.query if self.query else "-",
    ]
    if self.database:
      cmd.append('-db')
      cmd.append(self.database)
    if self.subject:
      cmd.append('-subject')
      cmd.append(self.subject)
    cmd.append("-outfmt")
    cmd.append(f"7 {' '.join(self.fields)}")
    if self.outfile:
      cmd.append("out")
      cmd.append(self.outfile)
    cmd.extend(self.args)
    cmd.append("-task")
    cmd.append(self.task)
    cmd.append("-evalue")
    cmd.append(str(self.evalue))
    return cmd

Subclasses

Methods

def run(self) ‑> tuple

Runs the blast command and returns the result and any errors.

Returns

tuple[Optional[BlastResult], str]
A tuple containing the BlastResult object (or None if results are save to a file) and any error messages.
Expand source code
def run(self) -> tuple[Optional[BlastResult], str]:
  """
  Runs the blast command and returns the result and any errors.

  Returns:
    tuple[Optional[BlastResult], str]: A tuple containing the BlastResult
      object (or None if results are save to a file) and any error messages.
  """
  if self.query_string:
    proc = self._run_with_query_string()
  else:
    proc = self._run_with_input_file()
  error = proc.stderr
  if self.outfile:
    return None, error
  bresult = BlastResult.from_tabular_with_headers(
    StringIO(proc.stdout)
  )
  return bresult, error
def running_command(self) ‑> list

Generates the command list for the blast process.

Returns

list[str]
A list containing the blast command and its arguments that will be executed.

Raises

ValueError
If neither a database nor a subject, or both, are provided.
ValueError
If neither a query file nor a query string, or both, are provided.
Expand source code
def running_command(self) -> list[str]:
  """
  Generates the command list for the blast process.

  Returns:
    list[str]: A list containing the blast command and its arguments that
      will be executed.

  Raises:
    ValueError: If neither a database nor a subject, or both, are provided.
    ValueError: If neither a query file nor a query string, or both, are
      provided.
  """
  if (
    (not self.database and not self.subject) or
    (self.database and self.subject)
  ):
    raise ValueError("Blast Wrapper needs a db value or a subject.")
  if (
    (not self.query and not self.query_string) or
    (self.query and self.query_string)
  ):
    raise ValueError("Blast Wrapper needs a query or a query_string")
  cmd = [
    self.command,
    '-query',
    self.query if self.query else "-",
  ]
  if self.database:
    cmd.append('-db')
    cmd.append(self.database)
  if self.subject:
    cmd.append('-subject')
    cmd.append(self.subject)
  cmd.append("-outfmt")
  cmd.append(f"7 {' '.join(self.fields)}")
  if self.outfile:
    cmd.append("out")
    cmd.append(self.outfile)
  cmd.extend(self.args)
  cmd.append("-task")
  cmd.append(self.task)
  cmd.append("-evalue")
  cmd.append(str(self.evalue))
  return cmd
def running_command_string(self) ‑> str

Generates the full string of the blast command that will be executed.

Returns

str
A string of the blast command that will be executed.
Expand source code
def running_command_string(self) -> str:
  """
  Generates the full string of the blast command that will be executed.

  Returns:
    str: A string of the blast command that will be executed.
  """
  return " ".join(self.running_command())
def with_arguments(self, *args: Any) ‑> BlastWrapper

Adds additional arguments to the BlastWrapper command.

Args

*args : Any
Additional arguments for the BlastWrapper command.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_arguments(self, *args:Any) -> "BlastWrapper":
  """
  Adds additional arguments to the BlastWrapper command.

  Args:
    *args (Any): Additional arguments for the BlastWrapper command.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  str_args = [str(x) for x in args]
  self.args.extend(str_args)
  return self
def with_db(self, db_path: str) ‑> BlastWrapper

Set the Database to search. Incomptible with subject.

Args

db_path : str
The path prefix to the database.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_db(self, db_path:str) -> "BlastWrapper":
  """
  Set the Database to search.
  Incomptible with subject.

  Args:
    db_path (str): The path prefix to the database.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.database = db_path
  self.subject = None
  return self
def with_evalue(self, evalue: float) ‑> BlastWrapper

Sets the e-value.

Args

evalue : float
The e-value threshold for saving hits.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_evalue(self, evalue:float) -> "BlastWrapper":
  """
  Sets the e-value.

  Args:
    evalue (float): The e-value threshold for saving hits.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.evalue = evalue
  return self
def with_executable_command(self, blast_command: str) ‑> BlastWrapper

Sets the path of the current blast executable.

Args

blast_command : str
Path to the blast command.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_executable_command(self, blast_command:str) -> "BlastWrapper":
  """
  Sets the path of the current blast executable.

  Args:
    blast_command (str): Path to the blast command.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.command = blast_command
  return self
def with_output_fields(self, fields: list) ‑> BlastWrapper

Sets the output fields for the BlastWrapper results.

Args

fields : list[str]
List of fields to include in the output.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_output_fields(self, fields:list[str]) -> "BlastWrapper":
  """
  Sets the output fields for the BlastWrapper results.

  Args:
    fields (list[str]): List of fields to include in the output.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.fields = fields
  return self

Sets the output fields to the standard fields plus qcovs and staxid.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_output_fields_recommended(self) -> "BlastWrapper":
  """
  Sets the output fields to the standard fields plus qcovs and staxid.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.fields = ["std", "qcovs", "staxid"]
  return self
def with_output_fields_std(self) ‑> BlastWrapper

Sets the output fields to the standard fields.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_output_fields_std(self) -> "BlastWrapper":
  """
  Sets the output fields to the standard fields.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.fields = ["std"]
  return self
def with_output_fields_std_plus(self, fields: list) ‑> BlastWrapper

Sets the output fields to the standard fields plus additional specified fields.

Args

fields : list[str]
Additional fields to include in the output.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_output_fields_std_plus(self, fields:list[str]) -> "BlastWrapper":
  """
  Sets the output fields to the standard fields plus additional specified
    fields.

  Args:
    fields (list[str]): Additional fields to include in the output.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.fields = ["std"] + fields
  return self
def with_output_file(self, outfile: str) ‑> BlastWrapper

Sets the output file for the BlastWrapper results.

Args

outfile : str
Path to the output file.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_output_file(self, outfile:str) -> "BlastWrapper":
  """
  Sets the output file for the BlastWrapper results.

  Args:
    outfile (str): Path to the output file.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.outfile = outfile
  return self
def with_query(self, query_file: str) ‑> BlastWrapper

Sets the query file.

Args

query_file : str
The path of the query file.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_query(self, query_file:str) -> "BlastWrapper":
  """
  Sets the query file.

  Args:
    query_file (str): The path of the query file.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.query = query_file
  return self
def with_query_string(self, query: str) ‑> BlastWrapper

Set the query sequence as a string.

Args

query : str
The input sequence.

Returns

'BlastWrapper': A BlastWrapper command wrapper class.

Expand source code
def with_query_string(self, query:str) -> "BlastWrapper":
  """
  Set the query sequence as a string.

  Args:
    query (str): The input sequence.

  Returns:
    'BlastWrapper': A BlastWrapper command wrapper class.
  """
  self.query_string = query
  return self
def with_subject(self, subject_path: str) ‑> BlastWrapper

Set the subject file for the search. Incompatible with db.

Args

subject_path : str
Path to a file with the subjects.

Returns

'BlastWrapper': A BlastWrapper wrapper class.

Expand source code
def with_subject(self, subject_path:str) -> "BlastWrapper":
  """
  Set the subject file for the search.
  Incompatible with db.

  Args:
    subject_path (str): Path to a file with the subjects.

  Returns:
    'BlastWrapper': A BlastWrapper wrapper class.
  """
  self.subject = subject_path
  self.query = None
  return self