Source code for manim_chemistry.utils.parsers.asnt_parser

from typing import Any, Dict, List, Tuple, Union
import os

import numpy as np

from .base_parser import BaseParser

BOND_TYPE_MAPPING = {
    "simple": 1,
    "double": 2,
    "triple": 3,
}


[docs] class ASNTParser(BaseParser): """The ASNT format is weird. It's like a JSON wannabe but ugly. Parsing it has been a pain for the last 5 hours and I wish to never encounter it again. It will only support a single molecule files and fuck it. Yes, fuck it goes to the commit. Examples -------- .. code-block:: python parsed_asnt = ASNTParser(filename="acetone_2d.asnt") print(parsed_asnt.atoms_data) print(parsed_asnt.bonds_data) >>> { 1: {"element": "O", "coords": array([3.732, 0.75, 0.0])}, 2: {"element": "C", "coords": array([2.866, 0.25, 0.0])}, 3: {"element": "C", "coords": array([2.0, 0.75, 0.0])}, 4: {"element": "C", "coords": array([2.866, -0.75, 0.0])}, 5: {"element": "H", "coords": array([2.31, 1.2869, 0.0])}, 6: {"element": "H", "coords": array([1.4631, 1.06, 0.0])}, 7: {"element": "H", "coords": array([1.69, 0.2131, 0.0])}, 8: {"element": "H", "coords": array([2.246, -0.75, 0.0])}, 9: {"element": "H", "coords": array([2.866, -1.37, 0.0])}, 10: {"element": "H", "coords": array([3.486, -0.75, 0.0])}, } >>> { 0: {"from_atom_index": 1, "to_atom_index": 2, "bond_type": 2}, 1: {"from_atom_index": 2, "to_atom_index": 3, "bond_type": 1}, 2: {"from_atom_index": 2, "to_atom_index": 4, "bond_type": 1}, 3: {"from_atom_index": 3, "to_atom_index": 5, "bond_type": 1}, 4: {"from_atom_index": 3, "to_atom_index": 6, "bond_type": 1}, 5: {"from_atom_index": 3, "to_atom_index": 7, "bond_type": 1}, 6: {"from_atom_index": 4, "to_atom_index": 8, "bond_type": 1}, 7: {"from_atom_index": 4, "to_atom_index": 9, "bond_type": 1}, 8: {"from_atom_index": 4, "to_atom_index": 10, "bond_type": 1}, } """
[docs] @staticmethod def read_file(filename: Union[str, bytes, os.PathLike]) -> List[List[str]]: with open(filename, "r") as asnt_file: file_list = asnt_file.readlines() return file_list
[docs] @staticmethod def replace_stuff(line: str) -> str: """Makes replacements of strings given the shitty structure of this format. Args: line (str): Line being read at that moment. Returns: str: Line with replaced strings. """ line = line.strip().replace("\n", "") line = line.replace('"', "¿") line = line.replace("'", "¡") if line.startswith("{"): if len(line) == 1: # That means the line is { return line if line.endswith("}") or line.endswith("},"): # That means the line is { value, value, value, } line = line.replace("{", "[") line = line.replace("}", "]") return line if line.startswith("}") or line.startswith("},"): return line line = '"' + line.replace(" ", '" "') if line.endswith("{"): line = line.replace(' "{', ": {") if line.endswith(","): line = line.replace(",", '",') else: line = line + '"' if line.endswith('}"') or line.endswith('{"'): line = line[:-1] line = line.replace('" "', '": "') line = line.replace("¿", "'") line = line.replace("¡", "'") return line
[docs] @staticmethod def data_parser(data: Any) -> Tuple[Dict, Dict] | List[Tuple[Dict, Dict]]: """Parses the atoms and bonds data and returns a tuple of dictionaries with each data. The atom data follows the structure: {<atom_index>: {"element": <atom_element>, "position": [<x_pos>, <y_pos>, <z_pos>]}} The bond data follows the structure: {<bond_index>: {"from_atom_index": <from_atom_index>, "to_atom_index": <to_atom_index>, "bond_type": <bond_type>}} """ lines_list = data line = lines_list[0] while not line.startswith('"atoms"'): line = ASNTParser.replace_stuff(lines_list.pop(0)) atoms_indices = [] lines_list.pop(0) # Remove atoms line line = ASNTParser.replace_stuff(lines_list.pop(0)) # Start atoms while not line.startswith("}"): atoms_indices.append(line.replace(",", "").replace('"', "")) line = ASNTParser.replace_stuff(lines_list.pop(0)) atoms_indices = atoms_indices atoms_elements = [] lines_list.pop(0) # Remove element line line = ASNTParser.replace_stuff(lines_list.pop(0)) # Get first element while not line.startswith("}"): atoms_elements.append(line.replace(",", "").replace('"', "").capitalize()) line = ASNTParser.replace_stuff(lines_list.pop(0)) atoms_data = { int(float(atom_index.strip())): {"element": atom_element} for atom_index, atom_element in zip(atoms_indices, atoms_elements) } from_atom_index = [] lines_list.pop(0) # Remove }, line lines_list.pop(0) # Remove bonds line lines_list.pop(0) # Remove aid1 line line = ASNTParser.replace_stuff(lines_list.pop(0)) # Get first from atom index while not line.startswith("}"): from_atom_index.append(int(line.replace(",", "").replace('"', ""))) line = ASNTParser.replace_stuff(lines_list.pop(0)) to_atom_index = [] lines_list.pop(0) # Remove aid2 line line = ASNTParser.replace_stuff(lines_list.pop(0)) # Get first to atom index while not line.startswith("}"): to_atom_index.append(int(line.replace(",", "").replace('"', ""))) line = ASNTParser.replace_stuff(lines_list.pop(0)) bond_type_list = [] lines_list.pop(0) # Remove aid2 line line = ASNTParser.replace_stuff(lines_list.pop(0)) # Get first bond while not line.startswith("}"): bond_type_list.append(line.replace(",", "")) line = ASNTParser.replace_stuff(lines_list.pop(0)) while not line.startswith('"conformers"'): line = ASNTParser.replace_stuff(lines_list.pop(0)) bonds_data = {} for bond_index, bond_params in enumerate( zip(from_atom_index, to_atom_index, bond_type_list) ): from_atom, to_atom, bond_type = bond_params bonds_data[bond_index] = { "from_atom_index": from_atom, "to_atom_index": to_atom, "bond_type": BOND_TYPE_MAPPING.get(bond_type.replace('"', ""), 1), } coords_x = {} lines_list.pop(0) # Remove { line lines_list.pop(0) # Remove x { line line = ASNTParser.replace_stuff(lines_list.pop(0)) for i in atoms_data.keys(): line = ( line.replace("[", "") .replace("]", "") .replace(",", "") .strip() .split(" ") ) coords_x[i] = int(line[0]) * int(line[1]) ** int(line[2]) line = ASNTParser.replace_stuff(lines_list.pop(0)) coords_y = {} lines_list.pop(0) # Remove y { line line = ASNTParser.replace_stuff(lines_list.pop(0)) for i in atoms_data.keys(): line = ( line.replace("[", "") .replace("]", "") .replace(",", "") .strip() .split(" ") ) coords_y[i] = int(line[0]) * int(line[1]) ** int(line[2]) line = ASNTParser.replace_stuff(lines_list.pop(0)) for index in atoms_data.keys(): atoms_data[index]["coords"] = np.array( [coords_x[index], coords_y[index], 0] ) return atoms_data, bonds_data