Module xi_covutils.mkdssp

Run mkdssp to get secondary structure information from a pdb.

Structure one letter code. G = 3-turn helix (310 helix). Min length 3 residues. H = 4-turn helix (α helix). Minimum length 4 residues. I = 5-turn helix (π helix). Minimum length 5 residues. T = hydrogen bonded turn (3, 4 or 5 turn) E = extended strand in parallel and/or anti-parallel β-sheet conformation. Min length 2 residues. B = residue in isolated β-bridge (single pair β-sheet hydrogen bond formation) S = bend (the only non-hydrogen-bond based assignment). C = coil (residues which are not in any of the above conformations).

Expand source code
"""
  Run mkdssp to get secondary structure information from a pdb.

  Structure one letter code.
  G = 3-turn helix (310 helix). Min length 3 residues.
  H = 4-turn helix (α helix). Minimum length 4 residues.
  I = 5-turn helix (π helix). Minimum length 5 residues.
  T = hydrogen bonded turn (3, 4 or 5 turn)
  E = extended strand in parallel and/or anti-parallel β-sheet conformation.
      Min length 2 residues.
  B = residue in isolated β-bridge (single pair β-sheet hydrogen bond formation)
  S = bend (the only non-hydrogen-bond based assignment).
  C = coil (residues which are not in any of the above conformations).

"""
from os import environ
from os.path import exists
from subprocess import check_output
from shutil import which
import re
from typing import Any, Optional

MKDSSP_LINE_PATTERN = re.compile(
  "^(?P<index>.....)" # DSSP residue number
  "(?P<pdb_num>......)" # PDB residue number
  "(?P<pdb_chain>..)" # chain
  "(?P<aa>...)" # Aminoacid
  "(?P<structure>.)" # Structure
  "(?P<st_desc>........)" # Addiotional structure descriptions
  "(?P<BP1>....)" # Bridge pair candidate 1
  "(?P<BP2>....)" # Bridge pair candidate 2
  "(?P<acc>.....)" # Solvent accesibility
  "(?P<hb1>............)" # Hidrogen bond pair 1
  "(?P<hb2>...........)" # Hidrogen bond pair 2
  "(?P<hb3>...........)" # Hidrogen bond pair 3
  "(?P<hb4>...........)" # Hidrogen bond pair 4
  "(?P<tco>........)" # TCO, cosino of C=O angles between two adjacent residues.
  "(?P<kappa>......)" # Kappa: the virtual bond angle.
  "(?P<alpha>......)" # Some value named alpha.
  "(?P<phi>......)" # Dihedral phi angle
  "(?P<psi>......)" # Dihedral psi angle
  "(?P<xca>.......)" # Alpha carbon x coordinate
  "(?P<yca>.......)" # Alpha carbon y coordinate
  "(?P<zca>.......)" # Alpha carbon z coordinate
  "(?P<chain>.................)?" # Chain
  "(?P<autochain>..........)?" # Autochain
  "$" # End of line
)

def _get_mkdssp_exec():
  if "MKDSSP_PATH" in environ and exists(environ["MKDSSP_PATH"]):
    return environ["MKDSSP_PATH"]
  mk_dssp = which("mkdssp")
  if mk_dssp:
    return mk_dssp
  raise(ValueError(
    "mkdssp program should be on path or MKDSSP_PATH"+
    "enviromental variable should be set."))

def _parse_mkdssp_line(line:str) -> Optional[dict[str, Any]]:
  c_match = re.match(MKDSSP_LINE_PATTERN, line)
  if c_match:
    try:
      pdb_num = int(c_match.group('pdb_num').strip())
    except ValueError:
      return None
    chain = str(c_match.group('pdb_chain').strip())
    return {
      'chain': chain,
      'pdb_num': pdb_num,
      'index': int(c_match.group('index').strip()),
      'aa': c_match.group('aa').strip(),
      'structure': c_match.group('structure').strip()
    }
  return None

def _parse_mkdssp_output(
    content:bytes
  ) -> dict[tuple[str, int], dict[str, Any]]:
  decoded = content.decode().split("\n")
  results = {}
  in_data_section = False
  for line in decoded:
    if in_data_section:
      parsed = _parse_mkdssp_line(line)
      if parsed:
        chain = str(parsed['chain'])
        pdb_num = int(parsed['pdb_num'])
        results[(chain, pdb_num)] = parsed
    else:
      in_data_section = in_data_section or line.startswith("  #  RESIDUE")
  return results

def mkdssp(pdb_file:str) -> dict[tuple[str, int], dict[str, Any]]:
  """
  Run mkdssp program.

  Args:
    pdb_file (str): the path of the input pdb file.

  Returns:
    str: The output of the mkdssp program.
  """
  mk_dssp = _get_mkdssp_exec()
  cmd = [mk_dssp, '--output-format', 'dssp', '-i', pdb_file]
  output = check_output(cmd)
  return _parse_mkdssp_output(output)

Functions

def mkdssp(pdb_file: str) ‑> dict

Run mkdssp program.

Args

pdb_file : str
the path of the input pdb file.

Returns

str
The output of the mkdssp program.
Expand source code
def mkdssp(pdb_file:str) -> dict[tuple[str, int], dict[str, Any]]:
  """
  Run mkdssp program.

  Args:
    pdb_file (str): the path of the input pdb file.

  Returns:
    str: The output of the mkdssp program.
  """
  mk_dssp = _get_mkdssp_exec()
  cmd = [mk_dssp, '--output-format', 'dssp', '-i', pdb_file]
  output = check_output(cmd)
  return _parse_mkdssp_output(output)