Module xi_covutils.fastq
A simple module to work with fastq files
Expand source code
"""
A simple module to work with fastq files
"""
from dataclasses import dataclass
from io import TextIOWrapper
import re
from typing import Iterator, Optional, Tuple, cast
@dataclass
class FastqEntry:
"""
Fastq Entry.
"""
identifier:str
description:str
sequence:str
quality:str
def __len__(self) -> int:
"""
Return the sequence length.
"""
return len(self.sequence)
class FastqReader:
"""
FastqReader: A simple fastq file reader.
"""
first_line_pattern = re.compile(r"^@([^\s]+)( (.+))*$")
@staticmethod
def fastq_entry_from_lines(
lines:Tuple[str, str, str, str]
) -> Optional[FastqEntry]:
"""
Create a FastqEntry from the a tuple withg the 4 text lines of the fastq
format file.
Args:
lines (Tuple[str, str, str, str]): The text lines from a fastq file for a
single entry.
Returns:
Optional[FastqEntry]: The resulting FastqEntry or None if fail.
"""
identifier:str = ""
description:str = ""
line1, line2, _, line4 = lines
if not (matching := re.match(FastqReader.first_line_pattern, line1)):
return None
identifier = matching.group(1)
description = matching.group(3)
return FastqEntry(identifier, description, line2, line4)
def read_next_fastq_entry(
self,
text_wrapper: TextIOWrapper
) -> Optional[FastqEntry]:
"""
Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from
them.
Args:
text_wrapper (TextIOWrapper): The input text source, generated by open
function.
Returns:
Optional[FastqEntry]: The resulting FastqEntry or None if fail.
"""
lines = [
next(text_wrapper, None)
for _ in range(4)
]
if any(line is None or line =="" for line in lines):
return None
lines = cast(list[str], lines)
lines = tuple(map(str.strip, lines))
assert len(lines) == 4
return FastqReader.fastq_entry_from_lines(lines)
def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]:
"""
Reads a fastq file and produces a iterator of fastq entries.
Args:
filename (str): The input fastq file.
Yields:
Iterator[FastqEntry]: An iterator of generator FastqEntry's.
"""
with open(filename, 'r', encoding='utf8') as f_in:
while True:
next_entry = self.read_next_fastq_entry(f_in)
if next_entry is None:
return
yield next_entry
def read_fastq_from_text_source(
self,
text_source: TextIOWrapper
) -> Iterator[FastqEntry]:
"""
Reads fastq entries from a text source.
Args:
text_source (TextIOWrapper): The text source.
Yields:
Iterator[FastqEntry]: An iterator of produced FastqEntry's.
"""
while True:
next_entry = self.read_next_fastq_entry(text_source)
if next_entry is None:
return
yield next_entry
class FastqWriter:
"""
Writes fastq entries to a file.
"""
def __init__(self, outfile:str):
self.fp:TextIOWrapper = open(outfile, "w", encoding="utf-8")
def __enter__(self):
return self
def __exit__(self, type, value, traceback):
if self.fp:
self.fp.close()
def write(self, entry:FastqEntry):
"""
Writes a fastq entry.
Args:
entry (FastqEntry): A single fastq entry.
"""
if self.fp:
self.fp.write(f"@{entry.identifier} {entry.description}\n")
self.fp.write(f"{entry.sequence}\n")
self.fp.write("+\n")
self.fp.write(f"{entry.quality}\n")
Classes
class FastqEntry (identifier: str, description: str, sequence: str, quality: str)
-
Fastq Entry.
Expand source code
@dataclass class FastqEntry: """ Fastq Entry. """ identifier:str description:str sequence:str quality:str def __len__(self) -> int: """ Return the sequence length. """ return len(self.sequence)
Class variables
var description : str
var identifier : str
var quality : str
var sequence : str
class FastqReader
-
FastqReader: A simple fastq file reader.
Expand source code
class FastqReader: """ FastqReader: A simple fastq file reader. """ first_line_pattern = re.compile(r"^@([^\s]+)( (.+))*$") @staticmethod def fastq_entry_from_lines( lines:Tuple[str, str, str, str] ) -> Optional[FastqEntry]: """ Create a FastqEntry from the a tuple withg the 4 text lines of the fastq format file. Args: lines (Tuple[str, str, str, str]): The text lines from a fastq file for a single entry. Returns: Optional[FastqEntry]: The resulting FastqEntry or None if fail. """ identifier:str = "" description:str = "" line1, line2, _, line4 = lines if not (matching := re.match(FastqReader.first_line_pattern, line1)): return None identifier = matching.group(1) description = matching.group(3) return FastqEntry(identifier, description, line2, line4) def read_next_fastq_entry( self, text_wrapper: TextIOWrapper ) -> Optional[FastqEntry]: """ Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from them. Args: text_wrapper (TextIOWrapper): The input text source, generated by open function. Returns: Optional[FastqEntry]: The resulting FastqEntry or None if fail. """ lines = [ next(text_wrapper, None) for _ in range(4) ] if any(line is None or line =="" for line in lines): return None lines = cast(list[str], lines) lines = tuple(map(str.strip, lines)) assert len(lines) == 4 return FastqReader.fastq_entry_from_lines(lines) def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]: """ Reads a fastq file and produces a iterator of fastq entries. Args: filename (str): The input fastq file. Yields: Iterator[FastqEntry]: An iterator of generator FastqEntry's. """ with open(filename, 'r', encoding='utf8') as f_in: while True: next_entry = self.read_next_fastq_entry(f_in) if next_entry is None: return yield next_entry def read_fastq_from_text_source( self, text_source: TextIOWrapper ) -> Iterator[FastqEntry]: """ Reads fastq entries from a text source. Args: text_source (TextIOWrapper): The text source. Yields: Iterator[FastqEntry]: An iterator of produced FastqEntry's. """ while True: next_entry = self.read_next_fastq_entry(text_source) if next_entry is None: return yield next_entry
Class variables
var first_line_pattern
Static methods
def fastq_entry_from_lines(lines: Tuple[str, str, str, str]) ‑> Optional[FastqEntry]
-
Create a FastqEntry from the a tuple withg the 4 text lines of the fastq format file.
Args
lines
:Tuple[str, str, str, str]
- The text lines from a fastq file for a single entry.
Returns
Optional[FastqEntry]
- The resulting FastqEntry or None if fail.
Expand source code
@staticmethod def fastq_entry_from_lines( lines:Tuple[str, str, str, str] ) -> Optional[FastqEntry]: """ Create a FastqEntry from the a tuple withg the 4 text lines of the fastq format file. Args: lines (Tuple[str, str, str, str]): The text lines from a fastq file for a single entry. Returns: Optional[FastqEntry]: The resulting FastqEntry or None if fail. """ identifier:str = "" description:str = "" line1, line2, _, line4 = lines if not (matching := re.match(FastqReader.first_line_pattern, line1)): return None identifier = matching.group(1) description = matching.group(3) return FastqEntry(identifier, description, line2, line4)
Methods
def read_fastq_from_file(self, filename: str) ‑> Iterator[FastqEntry]
-
Reads a fastq file and produces a iterator of fastq entries.
Args
filename
:str
- The input fastq file.
Yields
Iterator[FastqEntry]
- An iterator of generator FastqEntry's.
Expand source code
def read_fastq_from_file(self, filename: str) -> Iterator[FastqEntry]: """ Reads a fastq file and produces a iterator of fastq entries. Args: filename (str): The input fastq file. Yields: Iterator[FastqEntry]: An iterator of generator FastqEntry's. """ with open(filename, 'r', encoding='utf8') as f_in: while True: next_entry = self.read_next_fastq_entry(f_in) if next_entry is None: return yield next_entry
def read_fastq_from_text_source(self, text_source: _io.TextIOWrapper) ‑> Iterator[FastqEntry]
-
Reads fastq entries from a text source.
Args
text_source
:TextIOWrapper
- The text source.
Yields
Iterator[FastqEntry]
- An iterator of produced FastqEntry's.
Expand source code
def read_fastq_from_text_source( self, text_source: TextIOWrapper ) -> Iterator[FastqEntry]: """ Reads fastq entries from a text source. Args: text_source (TextIOWrapper): The text source. Yields: Iterator[FastqEntry]: An iterator of produced FastqEntry's. """ while True: next_entry = self.read_next_fastq_entry(text_source) if next_entry is None: return yield next_entry
def read_next_fastq_entry(self, text_wrapper: _io.TextIOWrapper) ‑> Optional[FastqEntry]
-
Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from them.
Args
text_wrapper
:TextIOWrapper
- The input text source, generated by open function.
Returns
Optional[FastqEntry]
- The resulting FastqEntry or None if fail.
Expand source code
def read_next_fastq_entry( self, text_wrapper: TextIOWrapper ) -> Optional[FastqEntry]: """ Consumes four lines from a TextIOWrapper and tries to get a FastqEntry from them. Args: text_wrapper (TextIOWrapper): The input text source, generated by open function. Returns: Optional[FastqEntry]: The resulting FastqEntry or None if fail. """ lines = [ next(text_wrapper, None) for _ in range(4) ] if any(line is None or line =="" for line in lines): return None lines = cast(list[str], lines) lines = tuple(map(str.strip, lines)) assert len(lines) == 4 return FastqReader.fastq_entry_from_lines(lines)
class FastqWriter (outfile: str)
-
Writes fastq entries to a file.
Expand source code
class FastqWriter: """ Writes fastq entries to a file. """ def __init__(self, outfile:str): self.fp:TextIOWrapper = open(outfile, "w", encoding="utf-8") def __enter__(self): return self def __exit__(self, type, value, traceback): if self.fp: self.fp.close() def write(self, entry:FastqEntry): """ Writes a fastq entry. Args: entry (FastqEntry): A single fastq entry. """ if self.fp: self.fp.write(f"@{entry.identifier} {entry.description}\n") self.fp.write(f"{entry.sequence}\n") self.fp.write("+\n") self.fp.write(f"{entry.quality}\n")
Methods
def write(self, entry: FastqEntry)
-
Expand source code
def write(self, entry:FastqEntry): """ Writes a fastq entry. Args: entry (FastqEntry): A single fastq entry. """ if self.fp: self.fp.write(f"@{entry.identifier} {entry.description}\n") self.fp.write(f"{entry.sequence}\n") self.fp.write("+\n") self.fp.write(f"{entry.quality}\n")