Coverage for src/pchemdb/utils.py: 84%

39 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 09:48 -0700

1"""Utilities for parsin molar conductivity data from CRC.""" 

2 

3import re 

4 

5from pyEQL.salt_ion_match import Salt 

6 

7ion_re1 = re.compile(r"(?P<ion>((NH4)|([A-Z][a-z]?)))(?P<ion_sub>\d+)?") 

8ion_re2 = re.compile(r"\((?P<ion>[][A-Za-z0-9]+)\)(?P<ion_sub>\d+)?") 

9ion_re3 = re.compile(r"\[(?P<ion>[][A-Za-z0-9]+)\](?P<ion_sub>\d+)?") 

10DEFAULT_TEMPERATURE = 298.15 

11ION_TO_OXIDATION_STATE = { 

12 "F": -1, 

13 "Cl": -1, 

14 "Br": -1, 

15 "I": -1, 

16 "NO3": -1, 

17 "NO2": -1, 

18 "ClO4": -1, 

19 "ClO3": -1, 

20 "ClO2": -1, 

21 "ClO": -1, 

22 "HCO3": -1, 

23 "OH": -1, 

24 "CO3": -2, 

25 "SO4": -2, 

26 "PO4": -3, 

27 "H": 1, 

28 "Li": 1, 

29 "Na": 1, 

30 "K": 1, 

31 "Rb": 1, 

32 "Cs": 1, 

33 "Fr": 1, 

34 "Be": 2, 

35 "Mg": 2, 

36 "Ca": 2, 

37 "Sr": 2, 

38 "Ba": 2, 

39 "Ra": 2, 

40 "NH4": 1, 

41} 

42 

43 

44def formula_to_salt(formula: str) -> Salt: 

45 """Convert a chemical formula into a Salt. 

46 

47 Args: 

48 formula: A chemical formula written with the cation first (e.g., KCl, 

49 Na2SO4, etc.). 

50 

51 Warning: 

52 This function does not work for formulas with polyatomic cations other 

53 than NH4+. 

54 """ 

55 

56 def _parse_ion( 

57 ion: str, regexes: list[re.Pattern] 

58 ) -> re.Match[str] | None: 

59 match = None 

60 for ion_re in regexes: 

61 match = ion_re.match(ion) or match 

62 

63 return match 

64 

65 cation_match = _parse_ion(formula, [ion_re1, ion_re2, ion_re3]) 

66 

67 if cation_match is None: 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true

68 msg = f"Unable to parse formula: {formula} to salt" 

69 raise ValueError(msg) 

70 

71 cation = cation_match.group("ion") 

72 cation_sub = int(cation_match.group("ion_sub") or 1) 

73 anion = formula.removeprefix(cation_match[0]) 

74 cation_match = _parse_ion(formula, [ion_re1, ion_re2, ion_re3]) 

75 anion_match = _parse_ion( 

76 anion, [re.compile(ion_re1.pattern + "$"), ion_re2, ion_re3] 

77 ) 

78 

79 if anion_match: 

80 anion = anion_match.group("ion") 

81 anion_sub = int(anion_match.group("ion_sub") or 1) 

82 else: 

83 anion_sub = 1 

84 

85 cation_ox_state = ION_TO_OXIDATION_STATE.get(cation) 

86 

87 if cation_ox_state is None: 

88 anion_ox_state = ION_TO_OXIDATION_STATE.get(anion) 

89 else: 

90 anion_ox_state = -int(cation_ox_state * cation_sub / anion_sub) 

91 

92 if anion_ox_state is None: 

93 msg = f"Unable to determine oxidation states for formulat: {formula}" 

94 raise ValueError(msg) 

95 

96 cation_ox_state = -int(anion_ox_state * anion_sub / cation_sub) 

97 

98 return Salt( 

99 cation=f"{cation}{cation_ox_state:+}", 

100 anion=f"{anion}{anion_ox_state:+}", 

101 ) 

102 

103 

104# TODO: Condense 

105def condense( 

106 dataset: list[tuple[dict[str, str], dict[str, list[str]], list[str]]], 

107) -> list[tuple[dict[str, str], dict[str, list[str]], list[str]]]: 

108 """Condense a solution dataset. 

109 

110 Args: 

111 dataset: A list of Solution, SoluteData, SolutionData 3-tuples. 

112 

113 # Should: Combine data with same anion and cation into single entry 

114 """ 

115 if dataset: 

116 return [] 

117 return []