Module xi_covutils.pdb_align

This module has function to align PDB structures.

Expand source code
"""
This module has function to align PDB structures.
"""

from copy import deepcopy
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass

from Bio.PDB.Atom import Atom
from Bio.PDB.Structure import Structure
from Bio.PDB.Superimposer import Superimposer

from xi_covutils.pdbbank import PDBSource, pdb_structure_from
from xi_covutils.pdbmapper import (
  align_sequence_to_sequence,
  build_pdb_sequence
)

@dataclass
class PDBAlignmentResult:
  """
  The result of an alignment of two PDB structures.
  """
  structure: Structure
  """The aligned Biopython Structure."""
  rms: float
  """The rms of the aligned residues."""
  aligned_residues_1: List[int]
  """The list of the aligned residues of the first chain."""
  aligned_residues_2: List[int]
  """The list of the aligned residues of the second chain."""

def align_pdbs(
    pdbsrc1: PDBSource,
    pdbsrc2: PDBSource,
    chain1: str = "A",
    chain2: str = "A"
  ) -> PDBAlignmentResult:
  """
  Align two PDB structures.

  The first structure is considere fixed, and the second is mobile.
  The alignment is made with the Carbon Alpha of alignable residues between
  the two selected chains.

  Args:
    pdbsrc1 (xi_covutils.pdbbank.PDB_SOURCE): Fixed PDB structure.
    pdbsrc2 (xi_covutils.pdbbank.PDB_SOURCE): Mobile PDB structure.
    chain1 (str): Chain ID of the fisrt PDB structure. Defaults to "A".
    chain2 (str): Chain ID of the second PDB structure. Defaults to "A".

  Returns:
    PDBAlignmentResult: A new structure for the second PDBs with the rotation
      and translations applied.
  """
  return align_partial_pdbs(
    pdbsrc1=pdbsrc1,
    pdbsrc2=pdbsrc2,
    region1=None,
    region2=None,
    chain1=chain1,
    chain2=chain2,
  )

# pylint: disable=too-many-arguments, too-many-locals
def align_partial_pdbs(
    pdbsrc1: PDBSource,
    pdbsrc2: PDBSource,
    region1: Optional[List[int]],
    region2: Optional[List[int]],
    chain1: str = "A",
    chain2: str = "A",
  ) -> PDBAlignmentResult:
  """
  Align two PDB structures.

  The first structure is considere fixed, and the second is mobile.
  The alignment is made with the Carbon Alpha of alignable residues between
  the two selected chains and residues.

  Args:
    pdbsrc1 (xi_covutils.pdbbank.PDB_SOURCE): Fixed PDB structure.
    pdbsrc2 (xi_covutils.pdbbank.PDB_SOURCE): Mobile PDB structure.
    region1 (Optional[List[int]]): Residue numbers of the first protein to be
      aligned.
    region2 (Optional[List[int]]): Residue numbers of the second protein to be
      aligned.
    chain1 (str): Chain ID of the fisrt PDB structure. Defaults to "A".
    chain2 (str): Chain ID of the second PDB structure. Defaults to "A".

  Returns:
    PDBAlignmentResult: A new structure for the
      second PDBs with the rotation and translations applied.

  """
  def _select_residue_atoms_ca_in_chain(
      pdb_strucure: Structure,
      residues: List[int],
      chain:str
    ) -> List[Atom]:
    atoms = []
    for res in pdb_strucure[0][chain].get_residues():
      res_n = res.id[1]
      if res_n in residues:
        atoms.append(res["CA"])
    return atoms
  def _get_seq_and_res(
      seq_data: Dict[int, str]
    ) -> Tuple[str, List[int]]:
    res = sorted(seq_data.keys())
    seq = "".join(seq_data[r] for r in res)
    return seq, res
  def _filter_regions(
      aln_res_1: List[int],
      aln_res_2: List[int],
      region1: Optional[List[int]],
      region2: Optional[List[int]],
    ):
    for i, region in enumerate([region1, region2]):
      if region is None:
        continue
      set_r = set(region)
      aln_res = zip(
        *(
          x
          for x in zip(aln_res_1, aln_res_2)
          if x[i] in set_r
        )
      )
      aln_res_1, aln_res_2 = [list(x) for x in aln_res]
    return aln_res_1, aln_res_2
  pdb1 = pdb_structure_from(pdbsrc1)
  if not pdb1:
    raise ValueError("PDB cannot be interpreted")
  pdb2 = pdb_structure_from(pdbsrc2)
  if not pdb2:
    raise ValueError("PDB cannot be interpreted")
  # Build amino acid sequences from pdb as dict
  # from residue number to amino acid.
  seq_1_data = build_pdb_sequence(pdb1, chain1)
  seq_2_data = build_pdb_sequence(pdb2, chain2)
  # Converts sequence data into a string sequence
  # and a list of residues numbers.
  seq1, res1 = _get_seq_and_res(seq_1_data)
  seq2, res2 = _get_seq_and_res(seq_2_data)
  # Align sequences, gets a dict that matches position in seq1 to seq2.
  aln = align_sequence_to_sequence(seq1, seq2)
  # Maps sequence positions to pdb residue numbers.
  aln_res_1 = [res1[i-1] for i in sorted(aln)]
  aln_res_2 = [res2[aln[i]-1] for i in sorted(aln)]
  # Removes residues outside the regions to align
  aln_res_1, aln_res_2 = _filter_regions(
    aln_res_1,
    aln_res_2,
    region1,
    region2
  )
  # There should be at least three residues to align
  if len(aln_res_1) < 3:
    raise ValueError("Alignment requires at least three residues.")
  # Select the atoms corresponding to the selected residues.
  atoms_1 = _select_residue_atoms_ca_in_chain(pdb1, aln_res_1, chain1)
  atoms_2 = _select_residue_atoms_ca_in_chain(pdb2, aln_res_2, chain2)
  # Makes the structural alignment
  sup = Superimposer()
  sup.set_atoms(atoms_1, atoms_2)
  # Makes a new copy of the second structure to avoid
  # affecting the original structure. And applys the
  # geometrical transformations.
  pdb2_copy = deepcopy(pdb2)
  sup.apply(pdb2_copy)
  rms = sup.rms if isinstance(sup.rms, float) else 0
  return PDBAlignmentResult(
    pdb2_copy,
    rms,
    aln_res_1,
    aln_res_2
  )

Functions

def align_partial_pdbs(pdbsrc1: Union[str, Bio.PDB.Structure.Structure], pdbsrc2: Union[str, Bio.PDB.Structure.Structure], region1: Optional[List[int]], region2: Optional[List[int]], chain1: str = 'A', chain2: str = 'A') ‑> PDBAlignmentResult

Align two PDB structures.

The first structure is considere fixed, and the second is mobile. The alignment is made with the Carbon Alpha of alignable residues between the two selected chains and residues.

Args

pdbsrc1 : xi_covutils.pdbbank.PDB_SOURCE
Fixed PDB structure.
pdbsrc2 : xi_covutils.pdbbank.PDB_SOURCE
Mobile PDB structure.
region1 : Optional[List[int]]
Residue numbers of the first protein to be aligned.
region2 : Optional[List[int]]
Residue numbers of the second protein to be aligned.
chain1 : str
Chain ID of the fisrt PDB structure. Defaults to "A".
chain2 : str
Chain ID of the second PDB structure. Defaults to "A".

Returns

PDBAlignmentResult
A new structure for the second PDBs with the rotation and translations applied.
Expand source code
def align_partial_pdbs(
    pdbsrc1: PDBSource,
    pdbsrc2: PDBSource,
    region1: Optional[List[int]],
    region2: Optional[List[int]],
    chain1: str = "A",
    chain2: str = "A",
  ) -> PDBAlignmentResult:
  """
  Align two PDB structures.

  The first structure is considere fixed, and the second is mobile.
  The alignment is made with the Carbon Alpha of alignable residues between
  the two selected chains and residues.

  Args:
    pdbsrc1 (xi_covutils.pdbbank.PDB_SOURCE): Fixed PDB structure.
    pdbsrc2 (xi_covutils.pdbbank.PDB_SOURCE): Mobile PDB structure.
    region1 (Optional[List[int]]): Residue numbers of the first protein to be
      aligned.
    region2 (Optional[List[int]]): Residue numbers of the second protein to be
      aligned.
    chain1 (str): Chain ID of the fisrt PDB structure. Defaults to "A".
    chain2 (str): Chain ID of the second PDB structure. Defaults to "A".

  Returns:
    PDBAlignmentResult: A new structure for the
      second PDBs with the rotation and translations applied.

  """
  def _select_residue_atoms_ca_in_chain(
      pdb_strucure: Structure,
      residues: List[int],
      chain:str
    ) -> List[Atom]:
    atoms = []
    for res in pdb_strucure[0][chain].get_residues():
      res_n = res.id[1]
      if res_n in residues:
        atoms.append(res["CA"])
    return atoms
  def _get_seq_and_res(
      seq_data: Dict[int, str]
    ) -> Tuple[str, List[int]]:
    res = sorted(seq_data.keys())
    seq = "".join(seq_data[r] for r in res)
    return seq, res
  def _filter_regions(
      aln_res_1: List[int],
      aln_res_2: List[int],
      region1: Optional[List[int]],
      region2: Optional[List[int]],
    ):
    for i, region in enumerate([region1, region2]):
      if region is None:
        continue
      set_r = set(region)
      aln_res = zip(
        *(
          x
          for x in zip(aln_res_1, aln_res_2)
          if x[i] in set_r
        )
      )
      aln_res_1, aln_res_2 = [list(x) for x in aln_res]
    return aln_res_1, aln_res_2
  pdb1 = pdb_structure_from(pdbsrc1)
  if not pdb1:
    raise ValueError("PDB cannot be interpreted")
  pdb2 = pdb_structure_from(pdbsrc2)
  if not pdb2:
    raise ValueError("PDB cannot be interpreted")
  # Build amino acid sequences from pdb as dict
  # from residue number to amino acid.
  seq_1_data = build_pdb_sequence(pdb1, chain1)
  seq_2_data = build_pdb_sequence(pdb2, chain2)
  # Converts sequence data into a string sequence
  # and a list of residues numbers.
  seq1, res1 = _get_seq_and_res(seq_1_data)
  seq2, res2 = _get_seq_and_res(seq_2_data)
  # Align sequences, gets a dict that matches position in seq1 to seq2.
  aln = align_sequence_to_sequence(seq1, seq2)
  # Maps sequence positions to pdb residue numbers.
  aln_res_1 = [res1[i-1] for i in sorted(aln)]
  aln_res_2 = [res2[aln[i]-1] for i in sorted(aln)]
  # Removes residues outside the regions to align
  aln_res_1, aln_res_2 = _filter_regions(
    aln_res_1,
    aln_res_2,
    region1,
    region2
  )
  # There should be at least three residues to align
  if len(aln_res_1) < 3:
    raise ValueError("Alignment requires at least three residues.")
  # Select the atoms corresponding to the selected residues.
  atoms_1 = _select_residue_atoms_ca_in_chain(pdb1, aln_res_1, chain1)
  atoms_2 = _select_residue_atoms_ca_in_chain(pdb2, aln_res_2, chain2)
  # Makes the structural alignment
  sup = Superimposer()
  sup.set_atoms(atoms_1, atoms_2)
  # Makes a new copy of the second structure to avoid
  # affecting the original structure. And applys the
  # geometrical transformations.
  pdb2_copy = deepcopy(pdb2)
  sup.apply(pdb2_copy)
  rms = sup.rms if isinstance(sup.rms, float) else 0
  return PDBAlignmentResult(
    pdb2_copy,
    rms,
    aln_res_1,
    aln_res_2
  )
def align_pdbs(pdbsrc1: Union[str, Bio.PDB.Structure.Structure], pdbsrc2: Union[str, Bio.PDB.Structure.Structure], chain1: str = 'A', chain2: str = 'A') ‑> PDBAlignmentResult

Align two PDB structures.

The first structure is considere fixed, and the second is mobile. The alignment is made with the Carbon Alpha of alignable residues between the two selected chains.

Args

pdbsrc1 : xi_covutils.pdbbank.PDB_SOURCE
Fixed PDB structure.
pdbsrc2 : xi_covutils.pdbbank.PDB_SOURCE
Mobile PDB structure.
chain1 : str
Chain ID of the fisrt PDB structure. Defaults to "A".
chain2 : str
Chain ID of the second PDB structure. Defaults to "A".

Returns

PDBAlignmentResult
A new structure for the second PDBs with the rotation and translations applied.
Expand source code
def align_pdbs(
    pdbsrc1: PDBSource,
    pdbsrc2: PDBSource,
    chain1: str = "A",
    chain2: str = "A"
  ) -> PDBAlignmentResult:
  """
  Align two PDB structures.

  The first structure is considere fixed, and the second is mobile.
  The alignment is made with the Carbon Alpha of alignable residues between
  the two selected chains.

  Args:
    pdbsrc1 (xi_covutils.pdbbank.PDB_SOURCE): Fixed PDB structure.
    pdbsrc2 (xi_covutils.pdbbank.PDB_SOURCE): Mobile PDB structure.
    chain1 (str): Chain ID of the fisrt PDB structure. Defaults to "A".
    chain2 (str): Chain ID of the second PDB structure. Defaults to "A".

  Returns:
    PDBAlignmentResult: A new structure for the second PDBs with the rotation
      and translations applied.
  """
  return align_partial_pdbs(
    pdbsrc1=pdbsrc1,
    pdbsrc2=pdbsrc2,
    region1=None,
    region2=None,
    chain1=chain1,
    chain2=chain2,
  )

Classes

class PDBAlignmentResult (structure: Bio.PDB.Structure.Structure, rms: float, aligned_residues_1: List[int], aligned_residues_2: List[int])

The result of an alignment of two PDB structures.

Expand source code
@dataclass
class PDBAlignmentResult:
  """
  The result of an alignment of two PDB structures.
  """
  structure: Structure
  """The aligned Biopython Structure."""
  rms: float
  """The rms of the aligned residues."""
  aligned_residues_1: List[int]
  """The list of the aligned residues of the first chain."""
  aligned_residues_2: List[int]
  """The list of the aligned residues of the second chain."""

Class variables

var aligned_residues_1 : List[int]

The list of the aligned residues of the first chain.

var aligned_residues_2 : List[int]

The list of the aligned residues of the second chain.

var rms : float

The rms of the aligned residues.

var structure : Bio.PDB.Structure.Structure

The aligned Biopython Structure.