Module xi_covutils.fastq

A simple module to work with fastq files

Expand source code
"""
A simple module to work with fastq files
"""

from dataclasses import dataclass
from io import TextIOWrapper
import re
from typing import Iterator, Optional, Tuple, cast

@dataclass
class FastqEntry:
  """
  Fastq Entry.
  """
  identifier:str
  description:str
  sequence:str
  quality:str
  def __len__(self) -> int:
    """
    Return the sequence length.
    """
    return len(self.sequence)

class FastqReader:
  """
  FastqReader: A simple fastq file reader.
  """
  first_line_pattern = re.compile(r"^@([^\s]+)( (.+))*$")
  @staticmethod
  def fastq_entry_from_lines(
    lines:Tuple[str, str, str, str]
  ) -> Optional[FastqEntry]:
    """
    Create a FastqEntry from the a tuple withg the 4 text lines of the fastq
      format file.

    Args:
      lines (Tuple[str, str, str, str]): The text lines from a fastq file for a
        single entry.

    Returns:
      Optional[FastqEntry]: The resulting FastqEntry or None if fail.
    """
    identifier:str = ""
    description:str = ""
    line1, line2, _, line4 = lines
    if not (matching := re.match(FastqReader.first_line_pattern, line1)):
      return None
    identifier = matching.group(1)
    description = matching.group(3)
    return FastqEntry(identifier, description, line2, line4)

  def read_next_fastq_entry(
    self,
    text_wrapper: TextIOWrapper
  ) -> Optional[FastqEntry]:
    """
    Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from
      them.

    Args:
      text_wrapper (TextIOWrapper): The input text source, generated by open
        function.

    Returns:
      Optional[FastqEntry]: The resulting FastqEntry or None if fail.
    """
    lines = [
      next(text_wrapper, None)
      for _ in range(4)
    ]
    if any(line is None or line =="" for line in lines):
      return None
    lines = cast(list[str], lines)
    lines = tuple(map(str.strip, lines))
    assert len(lines) == 4
    return FastqReader.fastq_entry_from_lines(lines)

  def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]:
    """
    Reads a fastq file and produces a iterator of fastq entries.

    Args:
      filename (str): The input fastq file.

    Yields:
      Iterator[FastqEntry]: An iterator of generator FastqEntry's.
    """
    with open(filename, 'r', encoding='utf8') as f_in:
      while True:
        next_entry = self.read_next_fastq_entry(f_in)
        if next_entry is None:
          return
        yield next_entry

  def read_fastq_from_text_source(
    self,
    text_source: TextIOWrapper
  ) -> Iterator[FastqEntry]:
    """
    Reads fastq entries from a text source.

    Args:
      text_source (TextIOWrapper): The text source.

    Yields:
      Iterator[FastqEntry]: An iterator of produced FastqEntry's.
    """
    while True:
      next_entry = self.read_next_fastq_entry(text_source)
      if next_entry is None:
        return
      yield next_entry

class FastqWriter:
  """
  Writes fastq entries to a file.
  """
  def __init__(self, outfile:str):
    self.fp:TextIOWrapper = open(outfile, "w", encoding="utf-8")
  def __enter__(self):
    return self
  def __exit__(self, type, value, traceback):
    if self.fp:
      self.fp.close()
  def write(self, entry:FastqEntry):
    """
    Writes a fastq entry.
    Args:
      entry (FastqEntry): A single fastq entry.
    """
    if self.fp:
      self.fp.write(f"@{entry.identifier} {entry.description}\n")
      self.fp.write(f"{entry.sequence}\n")
      self.fp.write("+\n")
      self.fp.write(f"{entry.quality}\n")

Classes

class FastqEntry (identifier: str, description: str, sequence: str, quality: str)

Fastq Entry.

Expand source code
@dataclass
class FastqEntry:
  """
  Fastq Entry.
  """
  identifier:str
  description:str
  sequence:str
  quality:str
  def __len__(self) -> int:
    """
    Return the sequence length.
    """
    return len(self.sequence)

Class variables

var description : str
var identifier : str
var quality : str
var sequence : str
class FastqReader

FastqReader: A simple fastq file reader.

Expand source code
class FastqReader:
  """
  FastqReader: A simple fastq file reader.
  """
  first_line_pattern = re.compile(r"^@([^\s]+)( (.+))*$")
  @staticmethod
  def fastq_entry_from_lines(
    lines:Tuple[str, str, str, str]
  ) -> Optional[FastqEntry]:
    """
    Create a FastqEntry from the a tuple withg the 4 text lines of the fastq
      format file.

    Args:
      lines (Tuple[str, str, str, str]): The text lines from a fastq file for a
        single entry.

    Returns:
      Optional[FastqEntry]: The resulting FastqEntry or None if fail.
    """
    identifier:str = ""
    description:str = ""
    line1, line2, _, line4 = lines
    if not (matching := re.match(FastqReader.first_line_pattern, line1)):
      return None
    identifier = matching.group(1)
    description = matching.group(3)
    return FastqEntry(identifier, description, line2, line4)

  def read_next_fastq_entry(
    self,
    text_wrapper: TextIOWrapper
  ) -> Optional[FastqEntry]:
    """
    Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from
      them.

    Args:
      text_wrapper (TextIOWrapper): The input text source, generated by open
        function.

    Returns:
      Optional[FastqEntry]: The resulting FastqEntry or None if fail.
    """
    lines = [
      next(text_wrapper, None)
      for _ in range(4)
    ]
    if any(line is None or line =="" for line in lines):
      return None
    lines = cast(list[str], lines)
    lines = tuple(map(str.strip, lines))
    assert len(lines) == 4
    return FastqReader.fastq_entry_from_lines(lines)

  def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]:
    """
    Reads a fastq file and produces a iterator of fastq entries.

    Args:
      filename (str): The input fastq file.

    Yields:
      Iterator[FastqEntry]: An iterator of generator FastqEntry's.
    """
    with open(filename, 'r', encoding='utf8') as f_in:
      while True:
        next_entry = self.read_next_fastq_entry(f_in)
        if next_entry is None:
          return
        yield next_entry

  def read_fastq_from_text_source(
    self,
    text_source: TextIOWrapper
  ) -> Iterator[FastqEntry]:
    """
    Reads fastq entries from a text source.

    Args:
      text_source (TextIOWrapper): The text source.

    Yields:
      Iterator[FastqEntry]: An iterator of produced FastqEntry's.
    """
    while True:
      next_entry = self.read_next_fastq_entry(text_source)
      if next_entry is None:
        return
      yield next_entry

Class variables

var first_line_pattern

Static methods

def fastq_entry_from_lines(lines: Tuple[str, str, str, str]) ‑> Optional[FastqEntry]

Create a FastqEntry from the a tuple withg the 4 text lines of the fastq format file.

Args

lines : Tuple[str, str, str, str]
The text lines from a fastq file for a single entry.

Returns

Optional[FastqEntry]
The resulting FastqEntry or None if fail.
Expand source code
@staticmethod
def fastq_entry_from_lines(
  lines:Tuple[str, str, str, str]
) -> Optional[FastqEntry]:
  """
  Create a FastqEntry from the a tuple withg the 4 text lines of the fastq
    format file.

  Args:
    lines (Tuple[str, str, str, str]): The text lines from a fastq file for a
      single entry.

  Returns:
    Optional[FastqEntry]: The resulting FastqEntry or None if fail.
  """
  identifier:str = ""
  description:str = ""
  line1, line2, _, line4 = lines
  if not (matching := re.match(FastqReader.first_line_pattern, line1)):
    return None
  identifier = matching.group(1)
  description = matching.group(3)
  return FastqEntry(identifier, description, line2, line4)

Methods

def read_fastq_from_file(self, filename: str) ‑> Iterator[FastqEntry]

Reads a fastq file and produces a iterator of fastq entries.

Args

filename : str
The input fastq file.

Yields

Iterator[FastqEntry]
An iterator of generator FastqEntry's.
Expand source code
def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]:
  """
  Reads a fastq file and produces a iterator of fastq entries.

  Args:
    filename (str): The input fastq file.

  Yields:
    Iterator[FastqEntry]: An iterator of generator FastqEntry's.
  """
  with open(filename, 'r', encoding='utf8') as f_in:
    while True:
      next_entry = self.read_next_fastq_entry(f_in)
      if next_entry is None:
        return
      yield next_entry
def read_fastq_from_text_source(self, text_source: _io.TextIOWrapper) ‑> Iterator[FastqEntry]

Reads fastq entries from a text source.

Args

text_source : TextIOWrapper
The text source.

Yields

Iterator[FastqEntry]
An iterator of produced FastqEntry's.
Expand source code
def read_fastq_from_text_source(
  self,
  text_source: TextIOWrapper
) -> Iterator[FastqEntry]:
  """
  Reads fastq entries from a text source.

  Args:
    text_source (TextIOWrapper): The text source.

  Yields:
    Iterator[FastqEntry]: An iterator of produced FastqEntry's.
  """
  while True:
    next_entry = self.read_next_fastq_entry(text_source)
    if next_entry is None:
      return
    yield next_entry
def read_next_fastq_entry(self, text_wrapper: _io.TextIOWrapper) ‑> Optional[FastqEntry]

Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from them.

Args

text_wrapper : TextIOWrapper
The input text source, generated by open function.

Returns

Optional[FastqEntry]
The resulting FastqEntry or None if fail.
Expand source code
def read_next_fastq_entry(
  self,
  text_wrapper: TextIOWrapper
) -> Optional[FastqEntry]:
  """
  Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from
    them.

  Args:
    text_wrapper (TextIOWrapper): The input text source, generated by open
      function.

  Returns:
    Optional[FastqEntry]: The resulting FastqEntry or None if fail.
  """
  lines = [
    next(text_wrapper, None)
    for _ in range(4)
  ]
  if any(line is None or line =="" for line in lines):
    return None
  lines = cast(list[str], lines)
  lines = tuple(map(str.strip, lines))
  assert len(lines) == 4
  return FastqReader.fastq_entry_from_lines(lines)
class FastqWriter (outfile: str)

Writes fastq entries to a file.

Expand source code
class FastqWriter:
  """
  Writes fastq entries to a file.
  """
  def __init__(self, outfile:str):
    self.fp:TextIOWrapper = open(outfile, "w", encoding="utf-8")
  def __enter__(self):
    return self
  def __exit__(self, type, value, traceback):
    if self.fp:
      self.fp.close()
  def write(self, entry:FastqEntry):
    """
    Writes a fastq entry.
    Args:
      entry (FastqEntry): A single fastq entry.
    """
    if self.fp:
      self.fp.write(f"@{entry.identifier} {entry.description}\n")
      self.fp.write(f"{entry.sequence}\n")
      self.fp.write("+\n")
      self.fp.write(f"{entry.quality}\n")

Methods

def write(self, entry: FastqEntry)

Writes a fastq entry.

Args

entry : FastqEntry
A single fastq entry.
Expand source code
def write(self, entry:FastqEntry):
  """
  Writes a fastq entry.
  Args:
    entry (FastqEntry): A single fastq entry.
  """
  if self.fp:
    self.fp.write(f"@{entry.identifier} {entry.description}\n")
    self.fp.write(f"{entry.sequence}\n")
    self.fp.write("+\n")
    self.fp.write(f"{entry.quality}\n")