from os import getcwd
from os.path import join
from pathlib import Path
import subprocess as sp
import uuid
import click
from hicberg import logger
[docs]
def hic_build_index(genome : str, output_dir : str = None , cpus : int = 1 , verbose : bool = False) -> None:
"""
Building of bowtie2 index (.bt2l files) for read alignment.
Parameters
----------
genome : str
Path to the genome file along which reads are going to be aligned.
cpus : int, optional
Number of threads allocated for the alignment, by default 1
output_dir : str, optional
Path where the Bowtie2 index files should be stored, by default None
verbose : bool, optional
Set wether or not the shell command should be printed, by default False
"""
logger.info("Start building index for alignment")
try:
sp.check_output(["bowtie2-build", "-h"])
except OSError:
raise RuntimeError(
"bowtie2-build not found; check if it is installed and in $PATH\n install Bowtie2 with : conda install bowtie2"
)
genome_path = Path(genome)
if not genome_path.is_file():
raise ValueError(f"Genome file {genome} not found")
if output_dir is None:
output_path = Path(getcwd())
else :
output_path = Path(output_dir)
if not output_path.exists():
raise ValueError(f"Output path {output_path} does not exist. Please provide existing ouput path.")
sample = Path(genome).stem
index_path = Path(output_dir, sample)
cmd_index = f"bowtie2-build -q -f --threads {cpus} --large-index {genome} {index_path}"
if verbose:
logger.info(cmd_index)
sp.run([cmd_index], shell=True)
logger.info(f"Index built at {index_path}")
return index_path
[docs]
def hic_align(index : str, fq_for : str, fq_rev : str, sensitivity : str = 'very-sensitive', max_alignment : int = None, cpus : int = 1, output_dir : str = None, verbose : bool = False) -> None:
"""
Alignment of reads from HiC experiments along an indexed genome.
Parameters
----------
index : str
Path to the index of the genome along which reads are going to be aligned (path to .bt2l files). Default to None, index files are searched to sample_name/data/index/sample_name.
fq_for : str
Path to .fasta containing set of reads to align (forward mate).
fq_rev : str
Path to .fasta containing set of reads to align (forward mate).
sensitivity : str, optional
Sensitivity of the alignment., by default 'very_sensitive'
max_alignment : int, optional
Maximum number of alignments to be returned, by default None
cpus : int, optional
Number of threads allocated for the alignment, by default 1
output_dir : str, optional
Path where the alignment files (.sam) should be stored, by default None
verbose : bool, optional
Set wether or not the shell command should be printed, by default False
"""
logger.info("Start aligning reads")
fq_for_path, fq_rev_path = Path(fq_for), Path(fq_rev)
if not fq_for_path.is_file() or not fq_rev_path.is_file():
raise IOError(f"Wrong path to fastq files : {fq_for_path} or {fq_rev_path} given. \
Pease provide existing files.")
if output_dir is None:
output_path = Path(getcwd())
else :
output_path = Path(output_dir)
if not output_path.exists():
raise ValueError(f"Output path {output_path} does not exist. Please provide existing output path.")
index_path = Path(output_path / index)
if max_alignment is None or max_alignment == -1:
cmd_alignment_rev = f"bowtie2 --{sensitivity} -p {cpus} -a -x {index_path} -S {output_path / '2.sam'} {fq_for}"
cmd_alignment_for = f"bowtie2 --{sensitivity} -p {cpus} -a -x {index_path} -S {output_path / '1.sam'} {fq_rev}"
elif max_alignment is not None:
cmd_alignment_for = f"bowtie2 --{sensitivity} -p {cpus} -k {max_alignment} -p {cpus} -x {index_path} -S {output_path / '1.sam'} {fq_for}"
cmd_alignment_rev = f"bowtie2 --{sensitivity} -p {cpus} -k {max_alignment} -p {cpus} -x {index_path} -S {output_path / '2.sam'} {fq_rev}"
if verbose :
logger.info(cmd_alignment_for)
logger.info(cmd_alignment_rev)
p_for = sp.Popen([cmd_alignment_for], shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
stdout_for, stderr_for = p_for.communicate()
p_rev = sp.Popen([cmd_alignment_rev], shell=True, stdout = sp.PIPE, stderr = sp.PIPE)
stdout_rev, stderr_rev = p_rev.communicate()
if stdout_for :
logger.info(stdout_for.decode('ascii'))
if stderr_for :
logger.info(stderr_for.decode('ascii'))
if stdout_rev:
logger.info(stdout_rev.decode('ascii'))
if stderr_rev:
logger.info(stderr_rev.decode('ascii'))
logger.info(f"Alignment saved at {output_path}")
[docs]
def hic_view(sam_for : str = "1.sam", sam_rev : str = "2.sam", cpus : int = 1, output_dir : str = None, verbose : bool = False) -> None:
"""
Conversion of .sam alignment files to .bam alignment format (using samtools).
Parameters
----------
sam_for : str, optional
Path to forward .sam alignment file, by default "1.sam"
sam_rev : str, optional
Path to reverse .sam alignment file, by default "2.sam"
cpus : int, optional
Number of threads allocated for the alignment, by default 1
output_dir : str, optional
Path where the alignment files (.bam) should be stored, by default None
verbose : bool, optional
Set wether or not the shell command should be printed, by default False
"""
logger.info("Start converting .sam to .bam")
try:
sp.check_output(["samtools", "--help"])
except OSError:
raise RuntimeError(
"Samtools not found; check if it is installed and in $PATH\n install Samtools with : conda install samtools"
)
if output_dir is None:
output_path = Path(getcwd())
else :
output_path = Path(output_dir)
if not output_path.exists():
raise ValueError(f"Output path {output_path} does not exist. Please provide existing output path.")
cmd_view_for = f"samtools view -h -b {output_path / sam_for} -o {output_path / '1.bam'} --threads {cpus}"
cmd_view_rev = f"samtools view -h -b {output_path / sam_rev} -o {output_path / '2.bam'} --threads {cpus}"
if verbose:
logger.info(cmd_view_for)
logger.info(cmd_view_rev)
sp_for = sp.Popen([cmd_view_for], shell=True)
sp_for.communicate()
sp_rev = sp.Popen([cmd_view_rev], shell=True)
sp_rev.communicate()
# Delete .sam files after .bam conversion
(output_path / sam_for).unlink()
(output_path / sam_rev).unlink()
logger.info(f"Compressed alignment done at {output_path}")
[docs]
def hic_sort(bam_for : str = "1.bam", bam_rev : str = "2.bam", cpus : int = 1, output_dir : str = None, verbose : bool = False) -> None:
"""
Sort .bam alignment files by read_name (using samtools).
Parameters
----------
bam_for : str, optional
Forward alignment file to be sorted, by default "1.bam"
bam_rev : str, optional
Reverse alignment file to be sorted, by default "2.bam"
cpus : int, optional
Number of threads allocated for the alignment, by default 1
output_dir : str, optional
Path where the alignment files (.bam) should be stored, by default None
verbose : bool, optional
Set wether or not the shell command should be printed, by default False
"""
logger.info("Start sorting .bam alignment files")
try:
sp.check_output(["samtools", "--help"])
except OSError:
raise RuntimeError(
"Samtools not found; check if it is installed and in $PATH\n install Samtools with : conda install samtools"
)
if output_dir is None:
output_path = Path(getcwd())
else :
output_path = Path(output_dir)
if not output_path.exists():
raise ValueError(f"Output path {output_path} does not exist. Please provide existing ouput path.")
id_for = uuid.uuid4()
id_rev = uuid.uuid4()
cmd_sort_for = f"samtools sort -n -T {id_for} {output_path / '1.bam'} -o {output_path / '1.sorted.bam'} --threads {cpus}"
cmd_sort_rev = f"samtools sort -n -T {id_rev} {output_path / '2.bam'} -o {output_path / '2.sorted.bam'} --threads {cpus}"
if verbose:
logger.info(cmd_sort_for)
logger.info(cmd_sort_rev)
sp_for = sp.Popen([cmd_sort_for], shell=True)
sp_for.communicate()
sp_rev = sp.Popen([cmd_sort_rev], shell=True)
sp_rev.communicate()
(output_path / '1.bam').unlink()
(output_path / '2.bam').unlink()
logger.info(f"Sorted alignment done at {output_path}")
[docs]
def hic_index(bam_for : str = "1.sorted.bam", bam_rev : str = "2.sorted.bam", cpus : int = 1, output_dir : str = None, verbose : bool = False) -> None:
"""
Index a coordinate-sorted BGZIP-compressed SAM, BAM or CRAM file for fast random access.
Parameters
----------
bam_for : str, optional
Forward alignment file to be indexed, by default "1.sorted.bam"
bam_rev : str, optional
Reverse alignment file to be indexed,, by default "2.sorted.bam"
cpus : int, optional
Number of threads allocated for the alignment, by default 1
output_dir : str, optional
Path where the alignment files (.bam) should be stored, by default None
verbose : bool, optional
Set wether or not the shell command should be printed, by default False
"""
try:
sp.check_output(["samtools", "--help"])
except OSError:
raise RuntimeError(
"Samtools not found; check if it is installed and in $PATH\n install Samtools with : conda install samtools"
)
if output_dir is None:
output_path = Path(getcwd())
else :
output_path = Path(output_dir)
if not output_path.exists():
raise ValueError(f"Output path {output_path} does not exist. Please provide existing output path.")
cmd_index_for = f"samtools index -b {bam_for} -@ {cpus}"
cmd_index_rev = f"samtools index -b {bam_rev} -@ {cpus}"
if verbose:
logger.info(cmd_index_for)
logger.info(cmd_index_rev)
sp.run([cmd_index_for], shell=True)
sp.run([cmd_index_rev], shell=True)
logger.info(f"Indexed alignment done at {output_path}")