pytermgui.highlighters
This module provides the Highlighter
class, and some pre-configured instances.
1"""This module provides the `Highlighter` class, and some pre-configured instances.""" 2 3from __future__ import annotations 4 5import builtins 6import keyword 7import re 8from dataclasses import dataclass, field 9from functools import lru_cache 10from typing import TYPE_CHECKING, Callable, Generator, Match, Pattern, Protocol 11 12from .markup import Token, consume_tag, escape 13from .regex import RE_MARKUP 14 15if TYPE_CHECKING: 16 from .fancy_repr import FancyYield 17 18__all__ = [ 19 "Highlighter", 20 "RegexHighlighter", 21 "highlight_tim", 22 "highlight_python", 23] 24 25 26class Highlighter(Protocol): # pylint: disable=too-few-public-methods 27 """The protocol for highlighters.""" 28 29 def __call__(self, text: str, cache: bool = True) -> str: 30 """Highlights the given text. 31 32 Args: 33 text: The text to highlight. 34 cache: If set (default), results will be stored, keyed by their respective 35 inputs, and retrieved the next time the same key is given. 36 """ 37 38 39@dataclass 40class RegexHighlighter: 41 """A class to highlight strings using regular expressions. 42 43 This class must be provided with a list of styles. These styles are really just a 44 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 45 in the instance use the same prefix, it can be given under the `prefix` key and 46 ommitted from the style names. 47 48 On construction, the instance will combine all of its patterns into a monster regex 49 including named capturing groups. The general format is something like: 50 51 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 52 53 Calling this instance will then replace all matches, going in the order of 54 definition, with style-injected versions. These follow the format: 55 56 [{prefix?}{name}]{content}[/{prefix}{name}] 57 58 Oddities to keep in mind: 59 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 60 groups cannot match the same text. 61 - Because of how capturing groups work, everything within the patterns will be 62 matched. To look for context around a match, look-around assertions can be used. 63 """ 64 65 styles: list[tuple[str, str]] 66 """A list of tuples of (style_alias, pattern_str).""" 67 68 prefix: str = "" 69 """Some string to insert before each style alias.""" 70 71 pre_formatter: Callable[[str], str] | None = None 72 """A callable that formats the input string, before any highlighting is done to it.""" 73 74 match_formatter: Callable[[Match, str], str] | None = None 75 """A callable of (match, content) that gets called on every match. 76 77 Its return value will be used as the content that the already set highlighting will apply 78 to. Useful to trim text, or apply other transformations before inserting it back. 79 """ 80 81 re_flags: int = 0 82 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 83 84 _pattern: Pattern = field(init=False) 85 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 86 87 def __post_init__(self) -> None: 88 """Combines all styles into one pattern.""" 89 90 pattern = "" 91 names: list[str] = [] 92 for name, ptrn in self.styles: 93 pattern += f"(?P<{name}>{ptrn})|" 94 names.append(name) 95 96 pattern = pattern[:-1] 97 98 self._pattern = re.compile(pattern, flags=self.re_flags) 99 100 def __call__(self, text: str, cache: bool = True) -> str: 101 """Highlights the given text, using the combined regex pattern.""" 102 103 if self.pre_formatter is not None: 104 text = self.pre_formatter(text) 105 106 if cache and text in self._highlight_cache: 107 return self._highlight_cache[text] 108 109 cache_key = text 110 111 def _insert_style(matchobj: Match) -> str: 112 """Returns the match inserted into a markup style.""" 113 114 groups = matchobj.groupdict() 115 116 name = matchobj.lastgroup 117 content = groups.get(str(name), None) 118 119 if self.match_formatter is not None: 120 content = self.match_formatter(matchobj, content) 121 122 if content == "": 123 return "" 124 125 tag = f"{self.prefix}{name}" 126 style = f"[{tag}]{{}}[/{tag}]" 127 128 return style.format(content) 129 130 text = self._pattern.sub(_insert_style, text) 131 self._highlight_cache[cache_key] = text 132 133 return text 134 135 def __fancy_repr__(self) -> Generator[FancyYield, None, None]: 136 """Yields some fancy looking repr text.""" 137 138 preview = self("highlight_python()") + "\x1b[0m" 139 pattern = self._pattern.pattern 140 141 if len(pattern) > 40: 142 pattern = pattern[:38] + "..." 143 144 yield f"<{type(self).__name__} pattern: {pattern!r}, preview: " 145 yield {"text": str(preview), "highlight": False} 146 147 yield ">" 148 149 150def highlight_tim(text: str, cache: bool = True) -> str: 151 """Highlights some TIM code.""" 152 153 @lru_cache(1048) 154 def _highlight(txt: str) -> str: 155 output = "" 156 cursor = 0 157 active_tokens: list[Token] = [] 158 159 def _get_active_markup() -> str: 160 active_markup = " ".join(tkn.markup for tkn in active_tokens) 161 162 if active_markup == "": 163 return "" 164 165 return f"[{active_markup}]" 166 167 for matchobj in RE_MARKUP.finditer(txt): 168 start, end = matchobj.span() 169 170 if cursor < start: 171 if cursor > 0: 172 output += "]" 173 174 output += _get_active_markup() 175 output += f"{txt[cursor:start]}[/]" 176 177 *_, tags = matchobj.groups() 178 179 output += "[" 180 for tag in tags.split(): 181 token = consume_tag(tag) 182 output += f"{token.prettified_markup} " 183 184 if Token.is_clear(token): 185 active_tokens = [ 186 tkn for tkn in active_tokens if not token.targets(tkn) 187 ] 188 189 else: 190 active_tokens.append(token) 191 192 output = output.rstrip() 193 cursor = end 194 195 if cursor < len(txt) - 1: 196 if cursor > 0: 197 output += "]" 198 199 output += _get_active_markup() 200 output += f"{txt[cursor:]}" 201 202 if len(active_tokens) > 0: 203 output += "[/]" 204 205 if output.count("[") != output.count("]"): 206 output += "]" 207 208 return output 209 210 if cache: 211 return _highlight(text) 212 213 return _highlight.__wrapped__(text) 214 215 216_BUILTIN_NAMES = "|".join(f"(?:{item})" for item in dir(builtins)) 217_KEYWORD_NAMES = "|".join( 218 f"(?:{keyw})" for keyw in list(keyword.kwlist) + ["builtin", "function", "module"] 219) 220 221highlight_python = RegexHighlighter( 222 pre_formatter=escape, 223 prefix="code.", 224 styles=[ 225 ("multiline_str", r"([frbu]*)\"{3}([\s\S]*?)(?<!\\)\"{3}"), 226 ( 227 "str", 228 r"([frbu]*(\".*?(?<!\\)\")|(\'.*?(?<!\\)\'))", 229 ), 230 ("comment", "(#.*)"), 231 ("keyword", rf"\b(?<![\.\-])()({_KEYWORD_NAMES}+)\b"), 232 ("builtin", rf"\b(?<!\.)({_BUILTIN_NAMES})\b"), 233 ("identifier", r"([^ \.=]+)(?=\()"), 234 ("global", r"(?<=\b)([A-Z]\w+)"), 235 ("number", r"(?<=\b)((?:0x[\da-zA-Z]+)|(?:\d+))"), 236 ], 237)
27class Highlighter(Protocol): # pylint: disable=too-few-public-methods 28 """The protocol for highlighters.""" 29 30 def __call__(self, text: str, cache: bool = True) -> str: 31 """Highlights the given text. 32 33 Args: 34 text: The text to highlight. 35 cache: If set (default), results will be stored, keyed by their respective 36 inputs, and retrieved the next time the same key is given. 37 """
The protocol for highlighters.
1430def _no_init_or_replace_init(self, *args, **kwargs): 1431 cls = type(self) 1432 1433 if cls._is_protocol: 1434 raise TypeError('Protocols cannot be instantiated') 1435 1436 # Already using a custom `__init__`. No need to calculate correct 1437 # `__init__` to call. This can lead to RecursionError. See bpo-45121. 1438 if cls.__init__ is not _no_init_or_replace_init: 1439 return 1440 1441 # Initially, `__init__` of a protocol subclass is set to `_no_init_or_replace_init`. 1442 # The first instantiation of the subclass will call `_no_init_or_replace_init` which 1443 # searches for a proper new `__init__` in the MRO. The new `__init__` 1444 # replaces the subclass' old `__init__` (ie `_no_init_or_replace_init`). Subsequent 1445 # instantiation of the protocol subclass will thus use the new 1446 # `__init__` and no longer call `_no_init_or_replace_init`. 1447 for base in cls.__mro__: 1448 init = base.__dict__.get('__init__', _no_init_or_replace_init) 1449 if init is not _no_init_or_replace_init: 1450 cls.__init__ = init 1451 break 1452 else: 1453 # should not happen 1454 cls.__init__ = object.__init__ 1455 1456 cls.__init__(self, *args, **kwargs)
40@dataclass 41class RegexHighlighter: 42 """A class to highlight strings using regular expressions. 43 44 This class must be provided with a list of styles. These styles are really just a 45 tuple of the markup alias name, and their associated RE patterns. If *all* aliases 46 in the instance use the same prefix, it can be given under the `prefix` key and 47 ommitted from the style names. 48 49 On construction, the instance will combine all of its patterns into a monster regex 50 including named capturing groups. The general format is something like: 51 52 (?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|... 53 54 Calling this instance will then replace all matches, going in the order of 55 definition, with style-injected versions. These follow the format: 56 57 [{prefix?}{name}]{content}[/{prefix}{name}] 58 59 Oddities to keep in mind: 60 - Regex replace goes in the order of the defined groups, and is non-overlapping. Two 61 groups cannot match the same text. 62 - Because of how capturing groups work, everything within the patterns will be 63 matched. To look for context around a match, look-around assertions can be used. 64 """ 65 66 styles: list[tuple[str, str]] 67 """A list of tuples of (style_alias, pattern_str).""" 68 69 prefix: str = "" 70 """Some string to insert before each style alias.""" 71 72 pre_formatter: Callable[[str], str] | None = None 73 """A callable that formats the input string, before any highlighting is done to it.""" 74 75 match_formatter: Callable[[Match, str], str] | None = None 76 """A callable of (match, content) that gets called on every match. 77 78 Its return value will be used as the content that the already set highlighting will apply 79 to. Useful to trim text, or apply other transformations before inserting it back. 80 """ 81 82 re_flags: int = 0 83 """All regex flags to apply when compiling the generated pattern, OR-d (|) together.""" 84 85 _pattern: Pattern = field(init=False) 86 _highlight_cache: dict[str, str] = field(init=False, default_factory=dict) 87 88 def __post_init__(self) -> None: 89 """Combines all styles into one pattern.""" 90 91 pattern = "" 92 names: list[str] = [] 93 for name, ptrn in self.styles: 94 pattern += f"(?P<{name}>{ptrn})|" 95 names.append(name) 96 97 pattern = pattern[:-1] 98 99 self._pattern = re.compile(pattern, flags=self.re_flags) 100 101 def __call__(self, text: str, cache: bool = True) -> str: 102 """Highlights the given text, using the combined regex pattern.""" 103 104 if self.pre_formatter is not None: 105 text = self.pre_formatter(text) 106 107 if cache and text in self._highlight_cache: 108 return self._highlight_cache[text] 109 110 cache_key = text 111 112 def _insert_style(matchobj: Match) -> str: 113 """Returns the match inserted into a markup style.""" 114 115 groups = matchobj.groupdict() 116 117 name = matchobj.lastgroup 118 content = groups.get(str(name), None) 119 120 if self.match_formatter is not None: 121 content = self.match_formatter(matchobj, content) 122 123 if content == "": 124 return "" 125 126 tag = f"{self.prefix}{name}" 127 style = f"[{tag}]{{}}[/{tag}]" 128 129 return style.format(content) 130 131 text = self._pattern.sub(_insert_style, text) 132 self._highlight_cache[cache_key] = text 133 134 return text 135 136 def __fancy_repr__(self) -> Generator[FancyYield, None, None]: 137 """Yields some fancy looking repr text.""" 138 139 preview = self("highlight_python()") + "\x1b[0m" 140 pattern = self._pattern.pattern 141 142 if len(pattern) > 40: 143 pattern = pattern[:38] + "..." 144 145 yield f"<{type(self).__name__} pattern: {pattern!r}, preview: " 146 yield {"text": str(preview), "highlight": False} 147 148 yield ">"
A class to highlight strings using regular expressions.
This class must be provided with a list of styles. These styles are really just a
tuple of the markup alias name, and their associated RE patterns. If all aliases
in the instance use the same prefix, it can be given under the prefix
key and
ommitted from the style names.
On construction, the instance will combine all of its patterns into a monster regex including named capturing groups. The general format is something like:
(?P<{name1}>{pattern1})|(?P<{name2}>{pattern2})|...
Calling this instance will then replace all matches, going in the order of definition, with style-injected versions. These follow the format:
[{prefix?}{name}]{content}[/{prefix}{name}]
Oddities to keep in mind:
- Regex replace goes in the order of the defined groups, and is non-overlapping. Two groups cannot match the same text.
- Because of how capturing groups work, everything within the patterns will be matched. To look for context around a match, look-around assertions can be used.
A callable that formats the input string, before any highlighting is done to it.
151def highlight_tim(text: str, cache: bool = True) -> str: 152 """Highlights some TIM code.""" 153 154 @lru_cache(1048) 155 def _highlight(txt: str) -> str: 156 output = "" 157 cursor = 0 158 active_tokens: list[Token] = [] 159 160 def _get_active_markup() -> str: 161 active_markup = " ".join(tkn.markup for tkn in active_tokens) 162 163 if active_markup == "": 164 return "" 165 166 return f"[{active_markup}]" 167 168 for matchobj in RE_MARKUP.finditer(txt): 169 start, end = matchobj.span() 170 171 if cursor < start: 172 if cursor > 0: 173 output += "]" 174 175 output += _get_active_markup() 176 output += f"{txt[cursor:start]}[/]" 177 178 *_, tags = matchobj.groups() 179 180 output += "[" 181 for tag in tags.split(): 182 token = consume_tag(tag) 183 output += f"{token.prettified_markup} " 184 185 if Token.is_clear(token): 186 active_tokens = [ 187 tkn for tkn in active_tokens if not token.targets(tkn) 188 ] 189 190 else: 191 active_tokens.append(token) 192 193 output = output.rstrip() 194 cursor = end 195 196 if cursor < len(txt) - 1: 197 if cursor > 0: 198 output += "]" 199 200 output += _get_active_markup() 201 output += f"{txt[cursor:]}" 202 203 if len(active_tokens) > 0: 204 output += "[/]" 205 206 if output.count("[") != output.count("]"): 207 output += "]" 208 209 return output 210 211 if cache: 212 return _highlight(text) 213 214 return _highlight.__wrapped__(text)
Highlights some TIM code.