Coverage for src/pchemdb/crc.py: 100%
97 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-19 09:48 -0700
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-19 09:48 -0700
1"""Utilities for parsing CRC data.
3Example: Parse CRC CSV into data structure
5>>> from csv import DictReader
6>>> from pchemdb.crc import parse_crc
7>>> with Path(...).open(mode="r", encoding=...) as file:
8... reader = DictReader(file)
9... data = []
10... for row in reader:
11... data.extend(parse_crc)
12"""
14from importlib.resources import files
15import json
16import logging
17import re
18from typing import Any
19from typing import NamedTuple
21from pint import Quantity
22from pyEQL import ureg
24from pchemdb.utils import formula_to_salt
26logger = logging.getLogger(__name__)
28formula_re = re.compile(r"(?P<coeff>\d+/\d+)?(?P<formula>.+)")
29# aqueous HBr/HCl
30molar_cond_temp_re = re.compile(
31 "<i>\u039b</i>/"
32 r"S cm<sup>2</sup> mol<sup>-1</sup><br/>(?:(?P<sign>.)?(?P<temp>\d+.+C))"
33)
34# aqueous hydro-halogen acids
35molar_cond_conc_re1 = re.compile(
36 "<i>\u039b</i>/"
37 r"S cm<sup>2</sup> mol<sup>-1</sup><br/>(?P<conc>\d+\.\d+)"
38)
39# aqueous electrolytes
40molar_cond_conc_re2 = re.compile(
41 "<i>\u039b</i>"
42 r"<sup></sup>\((?P<conc>\d+\.\d+) M\)/S cm<sup>2 </sup>mol<sup>-1</sup>"
43)
44cond_conc_re = re.compile("<i>\u03ba</i>" r"\((?P<conc>\d+(\.\d+)?)%\)")
45activity_conc_re = re.compile("<i>\u03b3</i>" r"\((?P<conc>\d+\.\d+) m\)")
46xml_tags_re = re.compile(r"<(/)?[^>]+>")
47DEFAULT_TEMPERATURE = "298.15 K"
48_CONDUCTIVITY_CONC_KEY = "<i>c</i>/M"
49_CONDUCTIVITY_UNITS = "S/m"
50_CONCENTRATION_UNITS = "mol/L"
51_TEMPERATURE_UNITS = "K"
52DB_FILE = "crc.json"
55class _ParseResult(NamedTuple):
56 prop: str
57 conc: Quantity
58 temp: Quantity
59 value: Quantity
62def _parse_temperature_dependent_molar_conductivity(
63 d: dict[str, str], factor: float, temp: str, v: str
64) -> _ParseResult:
65 conc_units = _CONCENTRATION_UNITS
66 prop = "conductivity"
67 prop_units = "S cm ** 2 /mol"
69 conc_mag = float(d[_CONDUCTIVITY_CONC_KEY])
70 conc = ureg.Quantity(conc_mag, conc_units) * factor
71 value = conc * ureg.Quantity(float(v), prop_units)
73 return _ParseResult(
74 prop=prop,
75 conc=conc,
76 temp=ureg.Quantity(temp),
77 value=value.to(_CONDUCTIVITY_UNITS),
78 )
81def _parse_concentration_dependent_molar_conductivity(
82 factor: float, conc_mag: float, v: str
83) -> _ParseResult:
84 conc_units = _CONCENTRATION_UNITS
85 prop = "conductivity"
86 prop_units = "S cm ** 2 /mol"
88 temp = DEFAULT_TEMPERATURE
89 conc = ureg.Quantity(conc_mag, conc_units) * factor
90 value = conc * ureg.Quantity(float(v), prop_units)
92 return _ParseResult(
93 prop=prop,
94 conc=conc,
95 temp=ureg.Quantity(temp),
96 value=value.to(_CONDUCTIVITY_UNITS),
97 )
100def _parse_concentration_dependent_conductivity(
101 factor: float, target_conc: float, v: str
102) -> _ParseResult:
103 conc_units = _CONCENTRATION_UNITS
104 prop = "conductivity"
105 prop_units = "mS / cm"
107 temp = "20 degC"
108 # Store concentration not as true weight percent, but as wt% of
109 # solution required to be added to achieve reported wt%
110 # This is required because when adding solutes based on weight
111 # percent, pyEQL adds the solute in an amount equal to the weight
112 # percent instead of adding the amount of solute required to for
113 # the solute to attain the specified weight percent
114 conc_mag = target_conc / (100 - target_conc)
115 conc_units = "%"
116 conc = ureg.Quantity(conc_mag, conc_units) * factor
117 value = conc * ureg.Quantity(float(v), prop_units)
119 return _ParseResult(
120 prop=prop,
121 conc=conc,
122 temp=ureg.Quantity(temp),
123 value=value.to(_CONDUCTIVITY_UNITS),
124 )
127def _parse_mean_activity_coefficient(
128 factor: float, conc_mag: float, v: str
129) -> _ParseResult:
130 conc_units = "mol/kg"
131 prop = "mean_activity_coefficient"
132 prop_units = "dimensionless"
134 temp = DEFAULT_TEMPERATURE
135 conc = ureg.Quantity(conc_mag, conc_units) * factor
136 value = ureg.Quantity(float(v), prop_units)
138 return _ParseResult(
139 prop=prop,
140 conc=conc,
141 temp=ureg.Quantity(temp),
142 value=value,
143 )
146def parse_crc(
147 d: dict[str, Any],
148) -> list[
149 tuple[
150 dict[str, Any], dict[str, list[tuple[str, str]]], list[tuple[str, str]]
151 ]
152]:
153 """Parse data from CRC.
155 Args:
156 d: A dictionary corresponding to a row in a CRC .csv file.
158 Returns:
159 A list of 3-tuples (``solution``, ``solute_data``, ``solution_data``),
160 where each item represents a property entry. ``solution`` is a
161 dictionary mapping :class:`pyEQL.solution.Solution` constructor
162 parameter names to their values. ``solute_data`` is a dictionary mapping
163 solutes formulae to list of property-value pairs. ``solution_data`` is a
164 list of property-value pairs.
165 """
166 dataset: list[
167 tuple[
168 dict[str, Any],
169 dict[str, list[tuple[str, str]]],
170 list[tuple[str, str]],
171 ]
172 ] = []
173 compound = str(d.get("Mol. form.", d.get("Compound")))
174 solution = xml_tags_re.sub("", compound)
175 match = formula_re.search(solution)
177 if not match:
178 msg = f"Unable to parse formula: {solution}"
179 raise ValueError(msg)
181 num, denom = (match.group("coeff") or "1/1").split("/")
182 factor = int(num) / int(denom)
183 formula = match.group("formula")
184 salt = formula_to_salt(formula)
186 for k, v in d.items():
187 if k is None or not v:
188 continue
190 # If data key in temperature-dependent dataset,
191 # read concentration from "<i>c,<\i>/M" key
192 # read temperature from cond_temp_re
193 if match := molar_cond_temp_re.search(k):
194 res = _parse_temperature_dependent_molar_conductivity(
195 d, factor, match.group("temp"), v
196 )
198 # If data key in concentration-dependent dataset,
199 # read concentration from conc_re (activity or conductivity)
200 # use dataset temperature
201 elif (conc_match := molar_cond_conc_re1.search(k)) or (
202 conc_match := molar_cond_conc_re2.search(k)
203 ):
204 res = _parse_concentration_dependent_molar_conductivity(
205 factor, float(conc_match.group("conc")), v
206 )
207 elif conc_match := cond_conc_re.search(k):
208 res = _parse_concentration_dependent_conductivity(
209 factor, float(conc_match.group("conc")), v
210 )
211 elif conc_match := activity_conc_re.search(k):
212 res = _parse_mean_activity_coefficient(
213 factor, float(conc_match.group("conc")), v
214 )
215 else:
216 continue
218 solutes = {
219 salt.cation: f"{res.conc.m * salt.nu_cation} {res.conc.units}",
220 salt.anion: f"{res.conc.m * salt.nu_anion} {res.conc.units}",
221 }
222 soln = {
223 "solutes": solutes,
224 "temperature": str(res.temp.to(_TEMPERATURE_UNITS)),
225 }
226 solute_data: dict[str, list[tuple[str, str]]] = {}
227 soln_data = [(res.prop, f"{res.value.m} {res.value.units}")]
228 entry = (soln, solute_data, soln_data)
229 dataset.append(entry)
231 return dataset
234def load_crc_database() -> list[
235 tuple[dict[str, str], dict[str, list[str]], list[str]]
236]:
237 """Load the CRC database."""
238 json_db_file = files("pchemdb").joinpath("_database", DB_FILE)
240 with json_db_file.open(mode="r", encoding="utf-8") as file:
241 return json.load(file)