# coding: utf-8
"""
This module implements basic kinds of jobs for QChem runs.
"""
import math
import os
import shutil
import copy
import subprocess
import numpy as np
from pymatgen.core import Molecule
from pymatgen.io.qchem.inputs import QCInput
from pymatgen.io.qchem.outputs import QCOutput, check_for_structure_changes
from custodian.custodian import Job
__author__ = "Samuel Blau, Brandon Wood, Shyam Dwaraknath"
__copyright__ = "Copyright 2018, The Materials Project"
__version__ = "0.1"
__maintainer__ = "Samuel Blau"
__email__ = "samblau1@gmail.com"
__status__ = "Alpha"
__date__ = "3/20/18"
__credits__ = "Xiaohui Qu"
[docs]class QCJob(Job):
"""
A basic QChem Job.
"""
def __init__(
self,
qchem_command,
max_cores,
multimode="openmp",
input_file="mol.qin",
output_file="mol.qout",
qclog_file="mol.qclog",
suffix="",
calc_loc=None,
save_scratch=False,
backup=True
):
"""
Args:
qchem_command (str): Command to run QChem.
max_cores (int): Maximum number of cores to parallelize over.
multimode (str): Parallelization scheme, either openmp or mpi.
input_file (str): Name of the QChem input file.
output_file (str): Name of the QChem output file.
qclog_file (str): Name of the file to redirect the standard output
to. None means not to record the standard output.
suffix (str): String to append to the file in postprocess.
calc_loc (str): Path where Q-Chem should run. Defaults to None, in
which case Q-Chem will run in the system-defined QCLOCALSCR.
save_scratch (bool): Whether to save full scratch directory contents.
Defaults to False.
backup (bool): Whether to backup the initial input file. If True, the
input will be copied with a ".orig" appended. Defaults to True.
"""
self.qchem_command = qchem_command.split(" ")
self.multimode = multimode
self.input_file = input_file
self.output_file = output_file
self.max_cores = max_cores
self.qclog_file = qclog_file
self.suffix = suffix
self.calc_loc = calc_loc
self.save_scratch = save_scratch
self.backup = backup
@property
def current_command(self):
"""
The command to run QChem
"""
multi = {"openmp": "-nt", "mpi": "-np"}
if self.multimode not in multi:
raise RuntimeError("ERROR: Multimode should only be set to openmp or mpi")
command = [
multi[self.multimode],
str(self.max_cores),
self.input_file,
self.output_file,
"scratch"
]
command = self.qchem_command + command
com_str = ""
for part in command:
com_str = com_str + " " + part
return com_str
[docs] def setup(self):
"""
Sets up environment variables necessary to efficiently run QChem
"""
if self.backup:
shutil.copy(self.input_file, "{}.orig".format(self.input_file))
if self.multimode == 'openmp':
os.environ['QCTHREADS'] = str(self.max_cores)
os.environ['OMP_NUM_THREADS'] = str(self.max_cores)
os.environ["QCSCRATCH"] = os.getcwd()
if self.calc_loc is not None:
os.environ["QCLOCALSCR"] = self.calc_loc
[docs] def postprocess(self):
"""
Renames and removes scratch files after running QChem
"""
scratch_dir = os.path.join(os.environ["QCSCRATCH"], "scratch")
for file in ["HESS", "GRAD", "plots/dens.0.cube"]:
file_path = os.path.join(scratch_dir, file)
if os.path.exists(file_path):
shutil.copy(file_path, os.getcwd())
if self.suffix != "":
shutil.move(self.input_file, self.input_file + self.suffix)
shutil.move(self.output_file, self.output_file + self.suffix)
shutil.move(self.qclog_file, self.qclog_file + self.suffix)
for file in ["HESS", "GRAD", "dens.0.cube"]:
if os.path.exists(file):
shutil.move(file, file + self.suffix)
if not self.save_scratch:
shutil.rmtree(scratch_dir)
[docs] def run(self):
"""
Perform the actual QChem run.
Returns:
(subprocess.Popen) Used for monitoring.
"""
local_scratch = os.path.join(os.environ["QCLOCALSCR"], "scratch")
if os.path.exists(local_scratch):
shutil.rmtree(local_scratch)
qclog = open(self.qclog_file, 'w')
p = subprocess.Popen(self.current_command, stdout=qclog, shell=True)
return p
[docs] @classmethod
def opt_with_frequency_flattener(
cls,
qchem_command,
multimode="openmp",
input_file="mol.qin",
output_file="mol.qout",
qclog_file="mol.qclog",
max_iterations=10,
max_molecule_perturb_scale=0.3,
check_connectivity=True,
linked=True,
save_final_scratch=False,
**QCJob_kwargs
):
"""
Optimize a structure and calculate vibrational frequencies to check if the
structure is in a true minima. If a frequency is negative, iteratively
perturbe the geometry, optimize, and recalculate frequencies until all are
positive, aka a true minima has been found.
Args:
qchem_command (str): Command to run QChem.
multimode (str): Parallelization scheme, either openmp or mpi.
input_file (str): Name of the QChem input file.
output_file (str): Name of the QChem output file.
max_iterations (int): Number of perturbation -> optimization -> frequency
iterations to perform. Defaults to 10.
max_molecule_perturb_scale (float): The maximum scaled perturbation that
can be applied to the molecule. Defaults to 0.3.
check_connectivity (bool): Whether to check differences in connectivity
introduced by structural perturbation. Defaults to True.
linked (bool): Whether or not to use the linked flattener. Defaults to True.
save_final_scratch (bool): Whether to save full scratch directory contents
at the end of the flattening. Defaults to False.
**QCJob_kwargs: Passthrough kwargs to QCJob. See
:class:`custodian.qchem.jobs.QCJob`.
"""
if not os.path.exists(input_file):
raise AssertionError("Input file must be present!")
if linked:
energy_diff_cutoff = 0.0000001
orig_input = QCInput.from_file(input_file)
freq_rem = copy.deepcopy(orig_input.rem)
freq_rem["job_type"] = "freq"
opt_rem = copy.deepcopy(orig_input.rem)
opt_rem["geom_opt_hessian"] = "read"
opt_rem["scf_guess_always"] = True
first = True
energy_history = []
for ii in range(max_iterations):
yield (
QCJob(
qchem_command=qchem_command,
multimode=multimode,
input_file=input_file,
output_file=output_file,
qclog_file=qclog_file,
suffix=".opt_" + str(ii),
save_scratch=True,
backup=first,
**QCJob_kwargs
)
)
opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
opt_indata = QCInput.from_file(input_file + ".opt_" + str(ii))
if opt_indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]:
freq_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"]
opt_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"]
first = False
if (
opt_outdata["structure_change"] == "unconnected_fragments"
and not opt_outdata["completion"]
):
print(
"Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
)
break
energy_history.append(opt_outdata.get("final_energy"))
freq_QCInput = QCInput(
molecule=opt_outdata.get("molecule_from_optimized_geometry"),
rem=freq_rem,
opt=orig_input.opt,
pcm=orig_input.pcm,
solvent=orig_input.solvent,
smx=orig_input.smx,
)
freq_QCInput.write_file(input_file)
yield (
QCJob(
qchem_command=qchem_command,
multimode=multimode,
input_file=input_file,
output_file=output_file,
qclog_file=qclog_file,
suffix=".freq_" + str(ii),
save_scratch=True,
backup=first,
**QCJob_kwargs
)
)
outdata = QCOutput(output_file + ".freq_" + str(ii)).data
indata = QCInput.from_file(input_file + ".freq_" + str(ii))
if indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]:
freq_rem["scf_algorithm"] = indata.rem["scf_algorithm"]
opt_rem["scf_algorithm"] = indata.rem["scf_algorithm"]
errors = outdata.get("errors")
if len(errors) != 0:
raise AssertionError(
"No errors should be encountered while flattening frequencies!"
)
if outdata.get("frequencies")[0] > 0.0:
print("All frequencies positive!")
break
if (
abs(outdata.get("frequencies")[0]) < 15.0
and outdata.get("frequencies")[1] > 0.0
):
print(
"One negative frequency smaller than 15.0 - not worth further flattening!"
)
break
if len(energy_history) > 1:
if (
abs(energy_history[-1] - energy_history[-2])
< energy_diff_cutoff
):
print("Energy change below cutoff!")
break
opt_QCInput = QCInput(
molecule=opt_outdata.get(
"molecule_from_optimized_geometry"
),
rem=opt_rem,
opt=orig_input.opt,
pcm=orig_input.pcm,
solvent=orig_input.solvent,
smx=orig_input.smx,
)
opt_QCInput.write_file(input_file)
if not save_final_scratch:
shutil.rmtree(os.path.join(os.getcwd(), "scratch"))
else:
if not os.path.exists(input_file):
raise AssertionError("Input file must be present!")
orig_opt_input = QCInput.from_file(input_file)
orig_opt_rem = copy.deepcopy(orig_opt_input.rem)
orig_freq_rem = copy.deepcopy(orig_opt_input.rem)
orig_freq_rem["job_type"] = "freq"
first = True
history = []
for ii in range(max_iterations):
yield (
QCJob(
qchem_command=qchem_command,
multimode=multimode,
input_file=input_file,
output_file=output_file,
qclog_file=qclog_file,
suffix=".opt_" + str(ii),
backup=first,
**QCJob_kwargs
)
)
opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
if first:
orig_species = copy.deepcopy(opt_outdata.get("species"))
orig_charge = copy.deepcopy(opt_outdata.get("charge"))
orig_multiplicity = copy.deepcopy(opt_outdata.get("multiplicity"))
orig_energy = copy.deepcopy(opt_outdata.get("final_energy"))
first = False
if (
opt_outdata["structure_change"] == "unconnected_fragments"
and not opt_outdata["completion"]
):
print(
"Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
)
break
freq_QCInput = QCInput(
molecule=opt_outdata.get("molecule_from_optimized_geometry"),
rem=orig_freq_rem,
opt=orig_opt_input.opt,
pcm=orig_opt_input.pcm,
solvent=orig_opt_input.solvent,
smx=orig_opt_input.smx,
)
freq_QCInput.write_file(input_file)
yield (
QCJob(
qchem_command=qchem_command,
multimode=multimode,
input_file=input_file,
output_file=output_file,
qclog_file=qclog_file,
suffix=".freq_" + str(ii),
backup=first,
**QCJob_kwargs
)
)
outdata = QCOutput(output_file + ".freq_" + str(ii)).data
errors = outdata.get("errors")
if len(errors) != 0:
raise AssertionError(
"No errors should be encountered while flattening frequencies!"
)
if outdata.get("frequencies")[0] > 0.0:
print("All frequencies positive!")
if opt_outdata.get("final_energy") > orig_energy:
print(
"WARNING: Energy increased during frequency flattening!"
)
break
hist = {}
hist["molecule"] = copy.deepcopy(
outdata.get("initial_molecule")
)
hist["geometry"] = copy.deepcopy(
outdata.get("initial_geometry")
)
hist["frequencies"] = copy.deepcopy(outdata.get("frequencies"))
hist["frequency_mode_vectors"] = copy.deepcopy(
outdata.get("frequency_mode_vectors")
)
hist["num_neg_freqs"] = sum(
1 for freq in outdata.get("frequencies") if freq < 0
)
hist["energy"] = copy.deepcopy(opt_outdata.get("final_energy"))
hist["index"] = len(history)
hist["children"] = []
history.append(hist)
ref_mol = history[-1]["molecule"]
geom_to_perturb = history[-1]["geometry"]
negative_freq_vecs = history[-1]["frequency_mode_vectors"][0]
reversed_direction = False
standard = True
# If we've found one or more negative frequencies in two consecutive iterations, let's dig in
# deeper:
if len(history) > 1:
# Start by finding the latest iteration's parent:
if history[-1]["index"] in history[-2]["children"]:
parent_hist = history[-2]
history[-1]["parent"] = parent_hist["index"]
elif history[-1]["index"] in history[-3]["children"]:
parent_hist = history[-3]
history[-1]["parent"] = parent_hist["index"]
else:
raise AssertionError(
"ERROR: your parent should always be one or two iterations behind you! Exiting..."
)
# if the number of negative frequencies has remained constant or increased from parent to
# child,
if (
history[-1]["num_neg_freqs"]
>= parent_hist["num_neg_freqs"]
):
# check to see if the parent only has one child, aka only the positive perturbation has
# been tried,
# in which case just try the negative perturbation from the same parent
if len(parent_hist["children"]) == 1:
ref_mol = parent_hist["molecule"]
geom_to_perturb = parent_hist["geometry"]
negative_freq_vecs = parent_hist[
"frequency_mode_vectors"
][0]
reversed_direction = True
standard = False
parent_hist["children"].append(len(history))
# If the parent has two children, aka both directions have been tried, then we have to
# get creative:
elif len(parent_hist["children"]) == 2:
# If we're dealing with just one negative frequency,
if parent_hist["num_neg_freqs"] == 1:
make_good_child_next_parent = False
if (
history[parent_hist["children"][0]][
"energy"
]
< history[-1]["energy"]
):
good_child = copy.deepcopy(
history[parent_hist["children"][0]]
)
else:
good_child = copy.deepcopy(history[-1])
if good_child["num_neg_freqs"] > 1:
raise Exception(
"ERROR: Child with lower energy has more negative frequencies! "
"Exiting..."
)
if (
good_child["energy"] < parent_hist["energy"]
):
make_good_child_next_parent = True
elif (
vector_list_diff(
good_child["frequency_mode_vectors"][0],
parent_hist["frequency_mode_vectors"][
0
],
)
> 0.2
):
make_good_child_next_parent = True
else:
raise Exception(
"ERROR: Good child not good enough! Exiting..."
)
if make_good_child_next_parent:
good_child["index"] = len(history)
history.append(good_child)
ref_mol = history[-1]["molecule"]
geom_to_perturb = history[-1]["geometry"]
negative_freq_vecs = history[-1][
"frequency_mode_vectors"
][0]
else:
raise Exception(
"ERROR: Can't deal with multiple neg frequencies yet! Exiting..."
)
else:
raise AssertionError(
"ERROR: Parent cannot have more than two childen! Exiting..."
)
# Implicitly, if the number of negative frequencies decreased from parent to child,
# continue normally.
if standard:
history[-1]["children"].append(len(history))
min_molecule_perturb_scale = 0.1
scale_grid = 10
perturb_scale_grid = (
max_molecule_perturb_scale - min_molecule_perturb_scale
) / scale_grid
structure_successfully_perturbed = False
for molecule_perturb_scale in np.arange(
max_molecule_perturb_scale,
min_molecule_perturb_scale,
-perturb_scale_grid,
):
new_coords = perturb_coordinates(
old_coords=geom_to_perturb,
negative_freq_vecs=negative_freq_vecs,
molecule_perturb_scale=molecule_perturb_scale,
reversed_direction=reversed_direction,
)
new_molecule = Molecule(
species=orig_species,
coords=new_coords,
charge=orig_charge,
spin_multiplicity=orig_multiplicity,
)
if check_connectivity:
structure_successfully_perturbed = (
check_for_structure_changes(ref_mol, new_molecule)
== "no_change"
)
if structure_successfully_perturbed:
break
if not structure_successfully_perturbed:
raise Exception(
"ERROR: Unable to perturb coordinates to remove negative frequency without changing "
"the connectivity! Exiting..."
)
new_opt_QCInput = QCInput(
molecule=new_molecule,
rem=orig_opt_rem,
opt=orig_opt_input.opt,
pcm=orig_opt_input.pcm,
solvent=orig_opt_input.solvent,
smx=orig_opt_input.smx,
)
new_opt_QCInput.write_file(input_file)
[docs]def perturb_coordinates(
old_coords, negative_freq_vecs, molecule_perturb_scale, reversed_direction
):
"""
Perturbs a structure along the imaginary mode vibrational frequency vectors
"""
max_dis = max([math.sqrt(sum([x ** 2 for x in vec])) for vec in negative_freq_vecs])
scale = molecule_perturb_scale / max_dis
normalized_vecs = [[x * scale for x in vec] for vec in negative_freq_vecs]
direction = 1.0
if reversed_direction:
direction = -1.0
return [
[c + v * direction for c, v in zip(coord, vec)]
for coord, vec in zip(old_coords, normalized_vecs)
]
[docs]def vector_list_diff(vecs1, vecs2):
"""
Calculates the summed difference of two vectors
"""
diff = 0.0
if len(vecs1) != len(vecs2):
raise AssertionError("ERROR: Vectors must be of equal length! Exiting...")
for ii, vec1 in enumerate(vecs1):
diff += np.linalg.norm(vecs2[ii] - vec1)
return diff