Coverage for tests/test_regex_template.py: 100%

216 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-13 11:02 -0700

1# SPDX-FileCopyrightText: 2025-present Trey Hunner 

2# 

3# SPDX-License-Identifier: MIT 

4import re 

5import pytest 

6from string.templatelib import Template 

7from hypothesis import given, strategies as st 

8 

9import regex_template as ret 

10 

11 

12class TestCompileWithRegularStrings: 

13 """Test compile() function rejection of regular string inputs.""" 

14 

15 def test_compile_rejects_regular_string(self): 

16 with pytest.raises(TypeError, match="only accepts t-string Templates"): 

17 ret.compile("hello") 

18 

19 def test_compile_rejects_various_types(self): 

20 # Test various non-Template types 

21 with pytest.raises(TypeError, match="only accepts t-string Templates"): 

22 ret.compile(123) 

23 

24 with pytest.raises(TypeError, match="only accepts t-string Templates"): 

25 ret.compile(["hello"]) 

26 

27 with pytest.raises(TypeError, match="only accepts t-string Templates"): 

28 ret.compile(None) 

29 

30 def test_error_message_suggests_re_compile(self): 

31 with pytest.raises(TypeError, match="Use re.compile\\(\\) for regular strings"): 

32 ret.compile("hello world") 

33 

34 

35class TestCompileWithTStrings: 

36 """Test ret.compile() function with t-string Template inputs.""" 

37 

38 def test_compile_tstring_basic(self): 

39 text = "hello" 

40 pattern = ret.compile(t"{text}") 

41 assert pattern.match("hello") 

42 assert not pattern.match("world") 

43 

44 def test_compile_tstring_with_flags(self): 

45 text = "Hello" 

46 pattern = ret.compile(t"{text}", flags=re.IGNORECASE, verbose=False) 

47 assert pattern.match("hello") 

48 assert pattern.match("HELLO") 

49 assert pattern.match("Hello") 

50 

51 def test_compile_tstring_verbose_mode_default(self): 

52 # Verbose mode should be enabled by default 

53 text = "hello" 

54 pattern = ret.compile(t""" 

55 {text} # Match hello 

56 \\s+ # One or more spaces 

57 world # Match world 

58 """) 

59 assert pattern.match("hello world") 

60 assert pattern.match("hello world") 

61 

62 def test_compile_tstring_verbose_mode_disabled(self): 

63 pattern = ret.compile(t"hello world", verbose=False) 

64 assert pattern.match("hello world") 

65 assert not pattern.match("hello world") 

66 

67 def test_compile_tstring_empty_interpolation(self): 

68 empty = "" 

69 pattern = ret.compile(t"start{empty}end") 

70 assert pattern.match("startend") 

71 

72 def test_compile_tstring_regex_special_chars_safe(self): 

73 digits = r"\d+" 

74 pattern = ret.compile(t"{digits:safe}") 

75 assert pattern.match("123") 

76 assert not pattern.match("abc") 

77 

78 def test_compile_tstring_auto_escape(self): 

79 special_chars = ".+*?[]{}()|^$\\" 

80 pattern = ret.compile(t"{special_chars}") 

81 # Should match literally, not as regex special chars 

82 assert pattern.match(".+*?[]{}()|^$\\") 

83 assert not pattern.match("anything") 

84 

85 def test_compile_tstring_mixed_pattern(self): 

86 filename = "test.txt" 

87 pattern = ret.compile(t"^{filename}$") 

88 assert pattern.match("test.txt") 

89 assert not pattern.match("testxtxt") # . should be escaped 

90 

91 def test_compile_tstring_safe_format_spec(self): 

92 regex_part = r"\d+" 

93 literal_part = "file.txt" 

94 pattern = ret.compile(t"{regex_part:safe}_{literal_part}") 

95 assert pattern.match("123_file.txt") 

96 assert not pattern.match("abc_file.txt") 

97 assert not pattern.match("123_filextxt") # . should be escaped in literal_part 

98 

99 def test_compile_tstring_format_specifiers(self): 

100 number = 42 

101 pattern = ret.compile(t"value_{number:03d}") 

102 assert pattern.match("value_042") 

103 assert not pattern.match("value_42") 

104 

105 def test_compile_tstring_format_spec_then_escape(self): 

106 # Format spec should be applied first, then escaping 

107 value = 3.14 

108 pattern = ret.compile(t"{value:.1f}") # Should format to "3.1" then escape 

109 assert pattern.match("3.1") 

110 

111 def test_compile_tstring_conversion_specifiers(self): 

112 value = "hello" 

113 pattern = ret.compile(t"{value!r}") # Should convert to "'hello'" then escape 

114 assert pattern.match("'hello'") 

115 

116 def test_compile_tstring_multiple_interpolations(self): 

117 start = "^" 

118 filename = "test.txt" 

119 end = "$" 

120 pattern = ret.compile(t"{start:safe}{filename}{end:safe}") 

121 assert pattern.match("test.txt") 

122 assert not pattern.match("prefix_test.txt") 

123 assert not pattern.match("test.txt_suffix") 

124 

125 def test_compile_tstring_verbose_mode(self): 

126 username = "john_doe" 

127 domain = "example.com" 

128 pattern = ret.compile(t""" 

129 ^ # Start of string 

130 {username} # Username (escaped) 

131 @ # Literal @ 

132 {domain} # Domain (escaped) 

133 $ # End of string 

134 """) 

135 assert pattern.match("john_doe@example.com") 

136 assert not pattern.match("john_doe@examplexcom") # . should be escaped 

137 

138 def test_compile_tstring_numeric_interpolation(self): 

139 number = 123 

140 pattern = ret.compile(t"id_{number}") 

141 assert pattern.match("id_123") 

142 assert not pattern.match("id_456") 

143 

144 

145class TestSafeFormatSpecifier: 

146 """Test the :safe format specifier that bypasses escaping.""" 

147 

148 def test_safe_regex_patterns(self): 

149 digit_pattern = r"\d+" 

150 word_pattern = r"\w+" 

151 pattern = ret.compile(t"{digit_pattern:safe}-{word_pattern:safe}") 

152 assert pattern.match("123-abc") 

153 assert not pattern.match("abc-123") 

154 

155 def test_safe_vs_escaped_comparison(self): 

156 regex_chars = ".+" # Valid regex pattern 

157 

158 # With :safe - should be treated as regex 

159 safe_pattern = ret.compile(t"{regex_chars:safe}") 

160 assert safe_pattern.match("abcd") # .+ matches multiple chars 

161 

162 # Without :safe - should be escaped 

163 escaped_pattern = ret.compile(t"{regex_chars}") 

164 assert escaped_pattern.match(".+") # Literal match 

165 assert not escaped_pattern.match("abcd") 

166 

167 def test_safe_with_other_format_specs(self): 

168 # :safe should work with other format specifiers 

169 pattern_template = r"\d" 

170 count = 3 

171 pattern = ret.compile(t"^{pattern_template:safe}{ {count}} $") 

172 # Should create ^\d{3}$ 

173 assert pattern.match("123") 

174 assert not pattern.match("12") 

175 assert not pattern.match("1234") 

176 

177 

178class TestRegexFlags: 

179 """Test regex flags functionality.""" 

180 

181 def test_custom_flags(self): 

182 text = "Hello" 

183 pattern = ret.compile(t"{text}", flags=re.IGNORECASE, verbose=False) 

184 assert pattern.match("hello") 

185 assert pattern.match("HELLO") 

186 assert pattern.match("Hello") 

187 

188 def test_verbose_flag_override(self): 

189 # When verbose=True (default), re.VERBOSE should be added 

190 comment_pattern = "hello # comment" 

191 pattern = ret.compile(t"{comment_pattern:safe}") 

192 assert pattern.match("hello") 

193 

194 # When verbose=False, comments should not be ignored 

195 pattern = ret.compile(t"{comment_pattern:safe}", verbose=False) 

196 assert not pattern.match("hello") 

197 assert pattern.match("hello # comment") 

198 

199 def test_multiline_flag(self): 

200 text = "test" 

201 pattern = ret.compile(t"^{text}$", flags=re.MULTILINE, verbose=False) 

202 assert pattern.match("test") 

203 assert pattern.search("prefix\ntest\nsuffix") 

204 

205 

206class TestEdgeCases: 

207 """Test edge cases and complex scenarios.""" 

208 

209 def test_literal_braces_in_template(self): 

210 value = "test" 

211 # Double braces should be literal 

212 pattern = ret.compile(t"{ {value}} ") # Should create {test} 

213 assert pattern.match("{test}") 

214 

215 def test_complex_regex_with_interpolation(self): 

216 username = "john.doe" 

217 domain = "example.com" 

218 # Complex email validation pattern 

219 pattern = ret.compile(t""" 

220 ^ # Start 

221 {username} # Username (escaped) 

222 @ # Literal @ 

223 {domain} # Domain (escaped)  

224 $ # End 

225 """) 

226 assert pattern.match("john.doe@example.com") 

227 # Dots should be escaped, so this shouldn't match 

228 assert not pattern.match("johnXdoe@exampleXcom") 

229 

230 def test_nested_quantifiers(self): 

231 char_class = "[a-z]" 

232 pattern = ret.compile(t"^{char_class:safe}{ 2,4} $") # ^[a-z]{2,4}$ 

233 assert pattern.match("ab") 

234 assert pattern.match("abcd") 

235 assert not pattern.match("a") 

236 assert not pattern.match("abcde") 

237 

238 def test_interpolation_with_backslashes(self): 

239 escape_seq = r"\n" 

240 pattern = ret.compile(t"line1{escape_seq}") 

241 # Should match literal \n, not newline 

242 assert pattern.match("line1\\n") 

243 assert not pattern.match("line1\n") 

244 

245 def test_unicode_in_interpolation(self): 

246 unicode_text = "café" 

247 pattern = ret.compile(t"{unicode_text}") 

248 assert pattern.match("café") 

249 

250 def test_very_long_interpolation(self): 

251 long_text = "x" * 1000 

252 pattern = ret.compile(t"{long_text}") 

253 assert pattern.match("x" * 1000) 

254 assert not pattern.match("x" * 999) 

255 

256 def test_multiple_safe_interpolations(self): 

257 start = "^" 

258 middle = r"\d+" 

259 end = "$" 

260 pattern = ret.compile(t"{start:safe}{middle:safe}{end:safe}") 

261 assert pattern.match("123") 

262 assert not pattern.match("abc") 

263 assert not pattern.match("123a") 

264 

265 def test_mixed_safe_and_escaped(self): 

266 boundary = r"\b" 

267 word = "hello.world" 

268 pattern = ret.compile(t"{boundary:safe}{word}{boundary:safe}") 

269 # Should match literal "hello.world" with word boundaries 

270 assert pattern.search("say hello.world please") 

271 # Should not match "helloxworld" since . is escaped 

272 assert not pattern.search("say helloxworld please") 

273 

274 

275class TestErrorHandling: 

276 """Test error handling and invalid inputs.""" 

277 

278 def test_invalid_format_spec(self): 

279 # This should still work - invalid format specs are handled by Python 

280 with pytest.raises(ValueError): 

281 value = 42 

282 ret.compile(t"{value:invalid_spec}") 

283 

284 def test_compilation_errors(self): 

285 # Invalid regex should raise re.error 

286 invalid_pattern = "[invalid" 

287 with pytest.raises(re.error): 

288 ret.compile(t"{invalid_pattern:safe}") 

289 

290 def test_tstring_compilation_errors(self): 

291 # Invalid regex in t-string should also raise re.error 

292 with pytest.raises(re.error): 

293 pattern = "[invalid" 

294 ret.compile(t"{pattern:safe}") 

295 

296 

297class TestPropertyBasedTests: 

298 """Property-based tests using Hypothesis.""" 

299 

300 @given(st.text(min_size=0, max_size=100)) 

301 def test_escaped_text_matches_literally(self, text): 

302 """Property test: any text interpolated without :safe should match literally.""" 

303 pattern = ret.compile(t"{text}") 

304 assert pattern.match(text) 

305 

306 @given(st.text(alphabet=st.characters(blacklist_characters=r"\.+*?[]{}()|^$\r\n\t "), min_size=1, max_size=50)) 

307 def test_safe_text_without_special_chars(self, text): 

308 """Property test: text without regex special chars should work the same with/without :safe.""" 

309 # Use verbose=False to avoid issues with whitespace characters 

310 safe_pattern = ret.compile(t"{text:safe}", verbose=False) 

311 escaped_pattern = ret.compile(t"{text}", verbose=False) 

312 # Both should match the text 

313 assert safe_pattern.match(text) 

314 assert escaped_pattern.match(text) 

315 

316 @given(st.integers(min_value=0, max_value=999)) 

317 def test_numeric_interpolation(self, number): 

318 """Property test: numeric interpolation should always work.""" 

319 pattern = ret.compile(t"{number}") 

320 assert pattern.match(str(number)) 

321 

322 @given(st.text(alphabet="abcdefghijklmnopqrstuvwxyz", min_size=1, max_size=20)) 

323 def test_alphabetic_interpolation(self, text): 

324 """Property test: alphabetic text should always match literally.""" 

325 pattern = ret.compile(t"{text}") 

326 assert pattern.match(text)