Coverage for tests/test_regex_template.py: 100%
216 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 11:02 -0700
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-13 11:02 -0700
1# SPDX-FileCopyrightText: 2025-present Trey Hunner
2#
3# SPDX-License-Identifier: MIT
4import re
5import pytest
6from string.templatelib import Template
7from hypothesis import given, strategies as st
9import regex_template as ret
12class TestCompileWithRegularStrings:
13 """Test compile() function rejection of regular string inputs."""
15 def test_compile_rejects_regular_string(self):
16 with pytest.raises(TypeError, match="only accepts t-string Templates"):
17 ret.compile("hello")
19 def test_compile_rejects_various_types(self):
20 # Test various non-Template types
21 with pytest.raises(TypeError, match="only accepts t-string Templates"):
22 ret.compile(123)
24 with pytest.raises(TypeError, match="only accepts t-string Templates"):
25 ret.compile(["hello"])
27 with pytest.raises(TypeError, match="only accepts t-string Templates"):
28 ret.compile(None)
30 def test_error_message_suggests_re_compile(self):
31 with pytest.raises(TypeError, match="Use re.compile\\(\\) for regular strings"):
32 ret.compile("hello world")
35class TestCompileWithTStrings:
36 """Test ret.compile() function with t-string Template inputs."""
38 def test_compile_tstring_basic(self):
39 text = "hello"
40 pattern = ret.compile(t"{text}")
41 assert pattern.match("hello")
42 assert not pattern.match("world")
44 def test_compile_tstring_with_flags(self):
45 text = "Hello"
46 pattern = ret.compile(t"{text}", flags=re.IGNORECASE, verbose=False)
47 assert pattern.match("hello")
48 assert pattern.match("HELLO")
49 assert pattern.match("Hello")
51 def test_compile_tstring_verbose_mode_default(self):
52 # Verbose mode should be enabled by default
53 text = "hello"
54 pattern = ret.compile(t"""
55 {text} # Match hello
56 \\s+ # One or more spaces
57 world # Match world
58 """)
59 assert pattern.match("hello world")
60 assert pattern.match("hello world")
62 def test_compile_tstring_verbose_mode_disabled(self):
63 pattern = ret.compile(t"hello world", verbose=False)
64 assert pattern.match("hello world")
65 assert not pattern.match("hello world")
67 def test_compile_tstring_empty_interpolation(self):
68 empty = ""
69 pattern = ret.compile(t"start{empty}end")
70 assert pattern.match("startend")
72 def test_compile_tstring_regex_special_chars_safe(self):
73 digits = r"\d+"
74 pattern = ret.compile(t"{digits:safe}")
75 assert pattern.match("123")
76 assert not pattern.match("abc")
78 def test_compile_tstring_auto_escape(self):
79 special_chars = ".+*?[]{}()|^$\\"
80 pattern = ret.compile(t"{special_chars}")
81 # Should match literally, not as regex special chars
82 assert pattern.match(".+*?[]{}()|^$\\")
83 assert not pattern.match("anything")
85 def test_compile_tstring_mixed_pattern(self):
86 filename = "test.txt"
87 pattern = ret.compile(t"^{filename}$")
88 assert pattern.match("test.txt")
89 assert not pattern.match("testxtxt") # . should be escaped
91 def test_compile_tstring_safe_format_spec(self):
92 regex_part = r"\d+"
93 literal_part = "file.txt"
94 pattern = ret.compile(t"{regex_part:safe}_{literal_part}")
95 assert pattern.match("123_file.txt")
96 assert not pattern.match("abc_file.txt")
97 assert not pattern.match("123_filextxt") # . should be escaped in literal_part
99 def test_compile_tstring_format_specifiers(self):
100 number = 42
101 pattern = ret.compile(t"value_{number:03d}")
102 assert pattern.match("value_042")
103 assert not pattern.match("value_42")
105 def test_compile_tstring_format_spec_then_escape(self):
106 # Format spec should be applied first, then escaping
107 value = 3.14
108 pattern = ret.compile(t"{value:.1f}") # Should format to "3.1" then escape
109 assert pattern.match("3.1")
111 def test_compile_tstring_conversion_specifiers(self):
112 value = "hello"
113 pattern = ret.compile(t"{value!r}") # Should convert to "'hello'" then escape
114 assert pattern.match("'hello'")
116 def test_compile_tstring_multiple_interpolations(self):
117 start = "^"
118 filename = "test.txt"
119 end = "$"
120 pattern = ret.compile(t"{start:safe}{filename}{end:safe}")
121 assert pattern.match("test.txt")
122 assert not pattern.match("prefix_test.txt")
123 assert not pattern.match("test.txt_suffix")
125 def test_compile_tstring_verbose_mode(self):
126 username = "john_doe"
127 domain = "example.com"
128 pattern = ret.compile(t"""
129 ^ # Start of string
130 {username} # Username (escaped)
131 @ # Literal @
132 {domain} # Domain (escaped)
133 $ # End of string
134 """)
135 assert pattern.match("john_doe@example.com")
136 assert not pattern.match("john_doe@examplexcom") # . should be escaped
138 def test_compile_tstring_numeric_interpolation(self):
139 number = 123
140 pattern = ret.compile(t"id_{number}")
141 assert pattern.match("id_123")
142 assert not pattern.match("id_456")
145class TestSafeFormatSpecifier:
146 """Test the :safe format specifier that bypasses escaping."""
148 def test_safe_regex_patterns(self):
149 digit_pattern = r"\d+"
150 word_pattern = r"\w+"
151 pattern = ret.compile(t"{digit_pattern:safe}-{word_pattern:safe}")
152 assert pattern.match("123-abc")
153 assert not pattern.match("abc-123")
155 def test_safe_vs_escaped_comparison(self):
156 regex_chars = ".+" # Valid regex pattern
158 # With :safe - should be treated as regex
159 safe_pattern = ret.compile(t"{regex_chars:safe}")
160 assert safe_pattern.match("abcd") # .+ matches multiple chars
162 # Without :safe - should be escaped
163 escaped_pattern = ret.compile(t"{regex_chars}")
164 assert escaped_pattern.match(".+") # Literal match
165 assert not escaped_pattern.match("abcd")
167 def test_safe_with_other_format_specs(self):
168 # :safe should work with other format specifiers
169 pattern_template = r"\d"
170 count = 3
171 pattern = ret.compile(t"^{pattern_template:safe}{ {count}} $")
172 # Should create ^\d{3}$
173 assert pattern.match("123")
174 assert not pattern.match("12")
175 assert not pattern.match("1234")
178class TestRegexFlags:
179 """Test regex flags functionality."""
181 def test_custom_flags(self):
182 text = "Hello"
183 pattern = ret.compile(t"{text}", flags=re.IGNORECASE, verbose=False)
184 assert pattern.match("hello")
185 assert pattern.match("HELLO")
186 assert pattern.match("Hello")
188 def test_verbose_flag_override(self):
189 # When verbose=True (default), re.VERBOSE should be added
190 comment_pattern = "hello # comment"
191 pattern = ret.compile(t"{comment_pattern:safe}")
192 assert pattern.match("hello")
194 # When verbose=False, comments should not be ignored
195 pattern = ret.compile(t"{comment_pattern:safe}", verbose=False)
196 assert not pattern.match("hello")
197 assert pattern.match("hello # comment")
199 def test_multiline_flag(self):
200 text = "test"
201 pattern = ret.compile(t"^{text}$", flags=re.MULTILINE, verbose=False)
202 assert pattern.match("test")
203 assert pattern.search("prefix\ntest\nsuffix")
206class TestEdgeCases:
207 """Test edge cases and complex scenarios."""
209 def test_literal_braces_in_template(self):
210 value = "test"
211 # Double braces should be literal
212 pattern = ret.compile(t"{ {value}} ") # Should create {test}
213 assert pattern.match("{test}")
215 def test_complex_regex_with_interpolation(self):
216 username = "john.doe"
217 domain = "example.com"
218 # Complex email validation pattern
219 pattern = ret.compile(t"""
220 ^ # Start
221 {username} # Username (escaped)
222 @ # Literal @
223 {domain} # Domain (escaped)
224 $ # End
225 """)
226 assert pattern.match("john.doe@example.com")
227 # Dots should be escaped, so this shouldn't match
228 assert not pattern.match("johnXdoe@exampleXcom")
230 def test_nested_quantifiers(self):
231 char_class = "[a-z]"
232 pattern = ret.compile(t"^{char_class:safe}{ 2,4} $") # ^[a-z]{2,4}$
233 assert pattern.match("ab")
234 assert pattern.match("abcd")
235 assert not pattern.match("a")
236 assert not pattern.match("abcde")
238 def test_interpolation_with_backslashes(self):
239 escape_seq = r"\n"
240 pattern = ret.compile(t"line1{escape_seq}")
241 # Should match literal \n, not newline
242 assert pattern.match("line1\\n")
243 assert not pattern.match("line1\n")
245 def test_unicode_in_interpolation(self):
246 unicode_text = "café"
247 pattern = ret.compile(t"{unicode_text}")
248 assert pattern.match("café")
250 def test_very_long_interpolation(self):
251 long_text = "x" * 1000
252 pattern = ret.compile(t"{long_text}")
253 assert pattern.match("x" * 1000)
254 assert not pattern.match("x" * 999)
256 def test_multiple_safe_interpolations(self):
257 start = "^"
258 middle = r"\d+"
259 end = "$"
260 pattern = ret.compile(t"{start:safe}{middle:safe}{end:safe}")
261 assert pattern.match("123")
262 assert not pattern.match("abc")
263 assert not pattern.match("123a")
265 def test_mixed_safe_and_escaped(self):
266 boundary = r"\b"
267 word = "hello.world"
268 pattern = ret.compile(t"{boundary:safe}{word}{boundary:safe}")
269 # Should match literal "hello.world" with word boundaries
270 assert pattern.search("say hello.world please")
271 # Should not match "helloxworld" since . is escaped
272 assert not pattern.search("say helloxworld please")
275class TestErrorHandling:
276 """Test error handling and invalid inputs."""
278 def test_invalid_format_spec(self):
279 # This should still work - invalid format specs are handled by Python
280 with pytest.raises(ValueError):
281 value = 42
282 ret.compile(t"{value:invalid_spec}")
284 def test_compilation_errors(self):
285 # Invalid regex should raise re.error
286 invalid_pattern = "[invalid"
287 with pytest.raises(re.error):
288 ret.compile(t"{invalid_pattern:safe}")
290 def test_tstring_compilation_errors(self):
291 # Invalid regex in t-string should also raise re.error
292 with pytest.raises(re.error):
293 pattern = "[invalid"
294 ret.compile(t"{pattern:safe}")
297class TestPropertyBasedTests:
298 """Property-based tests using Hypothesis."""
300 @given(st.text(min_size=0, max_size=100))
301 def test_escaped_text_matches_literally(self, text):
302 """Property test: any text interpolated without :safe should match literally."""
303 pattern = ret.compile(t"{text}")
304 assert pattern.match(text)
306 @given(st.text(alphabet=st.characters(blacklist_characters=r"\.+*?[]{}()|^$\r\n\t "), min_size=1, max_size=50))
307 def test_safe_text_without_special_chars(self, text):
308 """Property test: text without regex special chars should work the same with/without :safe."""
309 # Use verbose=False to avoid issues with whitespace characters
310 safe_pattern = ret.compile(t"{text:safe}", verbose=False)
311 escaped_pattern = ret.compile(t"{text}", verbose=False)
312 # Both should match the text
313 assert safe_pattern.match(text)
314 assert escaped_pattern.match(text)
316 @given(st.integers(min_value=0, max_value=999))
317 def test_numeric_interpolation(self, number):
318 """Property test: numeric interpolation should always work."""
319 pattern = ret.compile(t"{number}")
320 assert pattern.match(str(number))
322 @given(st.text(alphabet="abcdefghijklmnopqrstuvwxyz", min_size=1, max_size=20))
323 def test_alphabetic_interpolation(self, text):
324 """Property test: alphabetic text should always match literally."""
325 pattern = ret.compile(t"{text}")
326 assert pattern.match(text)