phml.core.nodes.nodes
1from __future__ import annotations 2 3from functools import cached_property, lru_cache 4from typing import Optional, overload 5 6__all__ = [ 7 "Element", 8 "Root", 9 "Node", 10 "DocType", 11 "Parent", 12 "PI", 13 "Comment", 14 "Literal", 15 "Point", 16 "Position", 17 "Text", 18 "NODE" 19] 20 21def leading_spaces(content: str | list[str]) -> int: 22 """Get the leading offset of the first line of the string.""" 23 content = content.split("\n") if isinstance(content, str) else content 24 return len(content[0]) - len(content[0].lstrip()) 25 26def strip_blank_lines(data_lines: list[str]) -> list[str]: 27 """Strip the blank lines at the start and end of a list.""" 28 data_lines = [line.replace("\r\n", "\n") for line in data_lines] 29 # remove leading blank lines 30 for idx in range(0, len(data_lines)): # pylint: disable=consider-using-enumerate 31 if data_lines[idx].strip() != "": 32 data_lines = data_lines[idx:] 33 break 34 if idx == len(data_lines) - 1: 35 data_lines = [] 36 break 37 38 # Remove trailing blank lines 39 if len(data_lines) > 0: 40 for idx in range(len(data_lines) - 1, -1, -1): 41 if data_lines[idx].replace("\n", " ").strip() != "": 42 data_lines = data_lines[: idx + 1] 43 break 44 45 return data_lines 46 47def normalize_indent(content: str, indent: int = 0) -> str: 48 """Normalize the indent between all lines. 49 50 Args: 51 content (str): The content to normalize the indent for 52 indent (bool): The amount of offset to add to each line after normalization. 53 54 Returns: 55 str: The normalized string 56 """ 57 58 content = strip_blank_lines(str(content).split("\n")) 59 if len(content) > 0: 60 offset = len(content[0]) - len(content[0].lstrip()) 61 lines = [] 62 for line in content: 63 if len(line) > 0 and leading_spaces(line) >= offset: 64 lines.append(" " * indent + line[offset:]) 65 else: 66 lines.append(line) 67 return "\n".join(lines) 68 return "" 69 70class Point: 71 """Represents one place in a source file. 72 73 The line field (1-indexed integer) represents a line in a source file. The column field 74 (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) 75 represents a character in a source file. 76 """ 77 78 def __init__(self, line: int, column: int, offset: Optional[int] = None): 79 if line is None or line < 0: 80 raise IndexError(f"Point.line must be >= 0 but was {line}") 81 82 self.line = line 83 84 if column is None or column < 0: 85 raise IndexError(f"Point.column must be >= 0 but was {column}") 86 87 self.column = column 88 89 if offset is not None and offset < 0: 90 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 91 92 self.offset = offset 93 94 def __eq__(self, obj) -> bool: 95 return bool( 96 obj is not None 97 and isinstance(obj, self.__class__) 98 and self.line == obj.line 99 and self.column == obj.column 100 ) 101 102 def __repr__(self) -> str: 103 return f"point(line: {self.line}, column: {self.column}, offset: {self.offset})" 104 105 def __str__(self) -> str: 106 return f"{self.line}:{self.column}" 107 108class Position: 109 """Position represents the location of a node in a source file. 110 111 The `start` field of `Position` represents the place of the first character 112 of the parsed source region. The `end` field of Position represents the place 113 of the first character after the parsed source region, whether it exists or not. 114 The value of the `start` and `end` fields implement the `Point` interface. 115 116 The `indent` field of `Position` represents the start column at each index 117 (plus start line) in the source region, for elements that span multiple lines. 118 119 If the syntactic unit represented by a node is not present in the source file at 120 the time of parsing, the node is said to be `generated` and it must not have positional 121 information. 122 """ 123 124 @overload 125 def __init__( 126 self, 127 start: tuple[int, int, int | None], 128 end: tuple[int, int, int | None], 129 indent: Optional[int] = None, 130 ): 131 """ 132 Args: 133 start (tuple[int, int, int | None]): Tuple representing the line, column, and optional 134 offset of the start point. 135 end (tuple[int, int, int | None]): Tuple representing the line, column, and optional 136 offset of the end point. 137 indent (Optional[int], optional): The indent amount for the start of the position. 138 """ 139 ... 140 141 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 142 """ 143 Args: 144 start (Point): Starting point of the position. 145 end (Point): End point of the position. 146 indent (int | None): The indent amount for the start of the position. 147 """ 148 149 self.start = ( 150 Point(start[0], start[1], start[2] if len(start) == 3 else None) 151 if isinstance(start, tuple) 152 else start 153 ) 154 self.end = ( 155 Point(end[0], end[1], end[2] if len(end) == 3 else None) 156 if isinstance(end, tuple) 157 else end 158 ) 159 160 if indent is not None and indent < 0: 161 raise IndexError(f"Position.indent value must be >= 0 or None but was {indent}") 162 163 self.indent = indent 164 165 def __eq__(self, obj) -> bool: 166 return bool( 167 obj is not None 168 and isinstance(obj, Position) 169 and self.start == obj.start 170 and self.end == obj.end 171 ) 172 173 def as_dict(self) -> dict: 174 """Convert the position object to a dict.""" 175 return { 176 "start": { 177 "line": self.start.line, 178 "column": self.start.column, 179 "offset": self.start.offset, 180 }, 181 "end": {"line": self.end.line, "column": self.end.column, "offset": self.end.offset}, 182 "indent": self.indent, 183 } 184 185 def __repr__(self) -> str: 186 indent = f" ~ {self.indent}" if self.indent is not None else "" 187 return f"<{self.start}-{self.end}{indent}>" 188 189 def __str__(self) -> str: 190 return repr(self) 191 192class Node: # pylint: disable=too-few-public-methods 193 """All node values can be expressed in JSON as: string, number, 194 object, array, true, false, or null. This means that the syntax tree should 195 be able to be converted to and from JSON and produce the same tree. 196 For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) 197 and result in the same tree. 198 """ 199 200 position: Position 201 """The location of a node in a source document. 202 The value of the position field implements the Position interface. 203 The position field must not be present if a node is generated. 204 """ 205 206 def __init__( 207 self, 208 position: Optional[Position] = None, 209 ): 210 self.position = position 211 212 @property 213 def type(self) -> str: 214 """Non-empty string representing the variant of a node. 215 This field can be used to determine the type a node implements.""" 216 return self.__class__.__name__.lower() 217 218class Parent(Node): # pylint: disable=too-few-public-methods 219 """Parent (UnistParent) represents a node in hast containing other nodes (said to be children). 220 221 Its content is limited to only other hast content. 222 """ 223 224 def __init__(self, position: Optional[Position] = None, children: Optional[list] = None): 225 super().__init__(position) 226 227 if children is not None: 228 for child in children: 229 if hasattr(child, "type") and child.type in [ 230 "element", 231 "text", 232 "doctype", 233 "root", 234 "comment", 235 ]: 236 child.parent = self 237 238 self.children: list[Element | DocType | Comment | Text] = children or [] 239 240 def append(self, node: NODE): 241 """Add a node to the nested children of the current parent node.""" 242 node.parent = self 243 self.children.append(node) 244 245 def extend(self, nodes: list[NODE]): 246 """Add a node to the nested children of the current parent node.""" 247 for node in nodes: 248 self.append(node) 249 250 def insert(self, index: int, node: NODE): 251 """Insert a node into a specific position in the current parent node's children.""" 252 node.parent = self 253 self.children.insert(index, node) 254 255 def remove(self, node: NODE): 256 """Remove a specific node from the current parent node's children.""" 257 self.children.remove(node) 258 259class Root(Parent): 260 """Root (Parent) represents a document. 261 262 Root can be used as the root of a tree, or as a value 263 of the content field on a 'template' Element, never as a child. 264 """ 265 266 def __init__( 267 self, 268 position: Optional[Position] = None, 269 children: Optional[list] = None, 270 ): 271 super().__init__(position, children) 272 self.parent = None 273 274 def __eq__(self, obj) -> bool: 275 return bool( 276 obj is not None 277 and isinstance(obj, Root) 278 and len(self.children) == len(obj.children) 279 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 280 ) 281 282 def __repr__(self) -> str: 283 return f"root [{len(self.children)}]" 284 285class Element(Parent): 286 """Element (Parent) represents an Element ([DOM]). 287 288 A tagName field must be present. It represents the element's local name ([DOM]). 289 290 The properties field represents information associated with the element. 291 The value of the properties field implements the Properties interface. 292 293 If the tagName field is 'template', a content field can be present. The value 294 of the content field implements the Root interface. 295 296 If the tagName field is 'template', the element must be a leaf. 297 298 If the tagName field is 'noscript', its children should be represented as if 299 scripting is disabled ([HTML]). 300 301 302 For example, the following HTML: 303 304 ```html 305 <a href="https://alpha.com" class="bravo" download></a> 306 ``` 307 308 Yields: 309 310 ```javascript 311 { 312 type: 'element', 313 tagName: 'a', 314 properties: { 315 href: 'https://alpha.com', 316 className: ['bravo'], 317 download: true 318 }, 319 children: [] 320 } 321 ``` 322 """ 323 324 def __init__( 325 self, 326 tag: str = "element", 327 properties: Optional[dict[str, str]] = None, 328 parent: Optional[Element | Root] = None, 329 startend: bool = False, 330 **kwargs, 331 ): 332 super().__init__(**kwargs) 333 self.properties = properties or {} 334 self.tag = tag 335 self.startend = startend 336 self.parent = parent 337 self.context = {} 338 339 def __contains__(self, index: str) -> str: 340 return index in self.properties 341 342 def __getitem__(self, index: str) -> str: 343 return self.properties[index] 344 345 def __setitem__(self, index: str, value: str): 346 if not isinstance(index, str) or not isinstance(value, (str, bool)): 347 raise TypeError("Index must be a str and value must be either str or bool.") 348 349 self.properties[index] = value 350 351 def __delitem__(self, index: str): 352 if index in self.properties: 353 self.properties.pop(index, None) 354 355 def __eq__(self, obj) -> bool: 356 return bool( 357 obj is not None 358 and isinstance(obj, Element) 359 and self.tag == obj.tag 360 and self.startend == obj.startend 361 and self.properties == obj.properties 362 and len(self.children) == len(obj.children) 363 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 364 ) 365 366 def start_tag(self) -> str: 367 """Builds the open/start tag for the element. 368 369 Note: 370 It will return `/>` if the tag is self closing. 371 372 Returns: 373 str: Built element start tag. 374 """ 375 opening = f"<{self.tag}" 376 377 attributes = [] 378 for prop in self.properties: 379 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 380 if self[prop] == "yes" or self[prop]: 381 attributes.append(prop) 382 else: 383 attributes.append(f'{prop}="{self[prop]}"') 384 if len(attributes) > 0: 385 attributes = " " + " ".join(attributes) 386 else: 387 attributes = "" 388 389 closing = f"{'/' if self.startend else ''}>" 390 391 if closing == "/>" and attributes != "": 392 return opening + attributes + " " + closing 393 return opening + attributes + closing 394 395 def end_tag(self) -> str: 396 """Build the elements end tag. 397 398 Returns: 399 str: Built element end tag. 400 """ 401 return f"</{self.tag}>" if not self.startend else "" 402 403 def __repr__(self) -> str: 404 out = f"{self.type}(tag: {self.tag}, properties: {self.properties}, \ 405startend: {self.startend}, children: {len(self.children)})" 406 return out 407 408class PI(Node): 409 """A processing instruction node. Mainly used for XML.""" 410 411 def __init__(self, tag: str, properties: dict, position: Optional[Position] = None) -> None: 412 super().__init__(position) 413 self.tag = tag 414 self.properties = properties 415 416 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 417 """Construct the string representation of the processing instruction node.""" 418 attributes = " ".join(f'{key}="{value}"' for key, value in self.properties.items()) 419 return f"<?{self.tag} {attributes}?>" 420 421class DocType(Node): 422 """Doctype (Node) represents a DocumentType ([DOM]). 423 424 Example: 425 426 ```html 427 <!doctype html> 428 ``` 429 430 Yields: 431 432 ```javascript 433 {type: 'doctype'} 434 ``` 435 """ 436 437 def __init__( 438 self, 439 lang: Optional[str] = None, 440 parent: Optional[Element | Root] = None, 441 position: Optional[Position] = None, 442 ): 443 super().__init__(position) 444 self.parent = parent 445 self.lang = lang or 'html' 446 447 def __eq__(self, obj) -> bool: 448 if obj is None: 449 return False 450 451 if hasattr(obj, "type") and obj.type == self.type: 452 if self.lang == obj.lang: 453 return True 454 return False 455 456 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 457 """Build indented html string of html doctype element. 458 459 Returns: 460 str: Built html of doctype element 461 """ 462 return f"<!DOCTYPE {self.lang or 'html'}>" 463 464 def __repr__(self) -> str: 465 return f"node.doctype({self.lang or 'html'})" 466 467class Literal(Node): 468 """Literal (UnistLiteral) represents a node in hast containing a value.""" 469 470 position: Position 471 """The location of a node in a source document. 472 The value of the position field implements the Position interface. 473 The position field must not be present if a node is generated. 474 """ 475 476 value: str 477 """The Literal nodes value. All literal values must be strings""" 478 479 def __init__( 480 self, 481 value: str = "", 482 parent: Optional[Element | Root] = None, 483 position: Optional[Position] = None, 484 ): 485 super().__init__(position) 486 self.value = str(value) 487 self.parent = parent 488 489 def __eq__(self, obj) -> bool: 490 return bool(obj is not None and self.type == obj.type and self.value == obj.value) 491 492 def normalized(self, indent: int = 0) -> str: 493 """Get the normalized indented value with leading and trailing blank lines stripped.""" 494 return normalize_indent(self.value, indent) 495 496 def stringify(self, indent: int = 0) -> str: 497 if "pre" in self.get_ancestry(): 498 return self.value 499 return self.normalized(indent).strip() 500 501 def get_ancestry(self) -> list[str]: 502 """Get the ancestry of the literal node. 503 504 Used to validate whether there is a `pre` element in the ancestry. 505 """ 506 507 def get_parent(parent) -> list[str]: 508 result = [] 509 510 if parent is not None and hasattr(parent, "tag"): 511 result.append(parent.tag) 512 513 if parent.parent is not None: 514 result.extend(get_parent(parent.parent)) 515 516 return result 517 518 return get_parent(self.parent) 519 520class Text(Literal): 521 """Text (Literal) represents a Text ([DOM]). 522 523 Example: 524 525 ```html 526 <span>Foxtrot</span> 527 ``` 528 529 Yields: 530 531 ```javascript 532 { 533 type: 'element', 534 tagName: 'span', 535 properties: {}, 536 children: [{type: 'text', value: 'Foxtrot'}] 537 } 538 ``` 539 """ 540 541 @cached_property 542 def num_lines(self) -> int: 543 """Determine the number of lines the text has.""" 544 return len([line for line in str(self.value).split("\n") if line.strip() != ""]) 545 546 def __repr__(self) -> str: 547 return f"literal.text('{self.value}')" 548 549class Comment(Literal): 550 """Comment (Literal) represents a Comment ([DOM]). 551 552 Example: 553 ```html 554 <!--Charlie--> 555 ``` 556 """ 557 558 def stringify(self, indent: int = 0) -> str: 559 """Build indented html string of html comment. 560 561 Returns: 562 str: Built html of comment 563 """ 564 lines = [line for line in self.value.split("\n") if line.strip() != ""] 565 if len(lines) > 1: 566 start = f"{' ' * indent}<!--{lines[0].rstrip()}" 567 end = f"{' ' * indent}{lines[-1].lstrip()}-->" 568 for i in range(1, len(lines) - 1): 569 lines[i] = (' ' * indent) + lines[i].strip() 570 lines = [start, *lines[1:-1], end] 571 return "\n".join(lines) 572 return ' ' * indent + f"<!--{self.value}-->" 573 574 def __repr__(self) -> str: 575 return f"literal.comment(value: {self.value})" 576 577NODE = Root | Element | Text | Comment | DocType | Parent | Node | Literal
286class Element(Parent): 287 """Element (Parent) represents an Element ([DOM]). 288 289 A tagName field must be present. It represents the element's local name ([DOM]). 290 291 The properties field represents information associated with the element. 292 The value of the properties field implements the Properties interface. 293 294 If the tagName field is 'template', a content field can be present. The value 295 of the content field implements the Root interface. 296 297 If the tagName field is 'template', the element must be a leaf. 298 299 If the tagName field is 'noscript', its children should be represented as if 300 scripting is disabled ([HTML]). 301 302 303 For example, the following HTML: 304 305 ```html 306 <a href="https://alpha.com" class="bravo" download></a> 307 ``` 308 309 Yields: 310 311 ```javascript 312 { 313 type: 'element', 314 tagName: 'a', 315 properties: { 316 href: 'https://alpha.com', 317 className: ['bravo'], 318 download: true 319 }, 320 children: [] 321 } 322 ``` 323 """ 324 325 def __init__( 326 self, 327 tag: str = "element", 328 properties: Optional[dict[str, str]] = None, 329 parent: Optional[Element | Root] = None, 330 startend: bool = False, 331 **kwargs, 332 ): 333 super().__init__(**kwargs) 334 self.properties = properties or {} 335 self.tag = tag 336 self.startend = startend 337 self.parent = parent 338 self.context = {} 339 340 def __contains__(self, index: str) -> str: 341 return index in self.properties 342 343 def __getitem__(self, index: str) -> str: 344 return self.properties[index] 345 346 def __setitem__(self, index: str, value: str): 347 if not isinstance(index, str) or not isinstance(value, (str, bool)): 348 raise TypeError("Index must be a str and value must be either str or bool.") 349 350 self.properties[index] = value 351 352 def __delitem__(self, index: str): 353 if index in self.properties: 354 self.properties.pop(index, None) 355 356 def __eq__(self, obj) -> bool: 357 return bool( 358 obj is not None 359 and isinstance(obj, Element) 360 and self.tag == obj.tag 361 and self.startend == obj.startend 362 and self.properties == obj.properties 363 and len(self.children) == len(obj.children) 364 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 365 ) 366 367 def start_tag(self) -> str: 368 """Builds the open/start tag for the element. 369 370 Note: 371 It will return `/>` if the tag is self closing. 372 373 Returns: 374 str: Built element start tag. 375 """ 376 opening = f"<{self.tag}" 377 378 attributes = [] 379 for prop in self.properties: 380 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 381 if self[prop] == "yes" or self[prop]: 382 attributes.append(prop) 383 else: 384 attributes.append(f'{prop}="{self[prop]}"') 385 if len(attributes) > 0: 386 attributes = " " + " ".join(attributes) 387 else: 388 attributes = "" 389 390 closing = f"{'/' if self.startend else ''}>" 391 392 if closing == "/>" and attributes != "": 393 return opening + attributes + " " + closing 394 return opening + attributes + closing 395 396 def end_tag(self) -> str: 397 """Build the elements end tag. 398 399 Returns: 400 str: Built element end tag. 401 """ 402 return f"</{self.tag}>" if not self.startend else "" 403 404 def __repr__(self) -> str: 405 out = f"{self.type}(tag: {self.tag}, properties: {self.properties}, \ 406startend: {self.startend}, children: {len(self.children)})" 407 return out
Element (Parent) represents an Element ([DOM]).
A tagName field must be present. It represents the element's local name ([DOM]).
The properties field represents information associated with the element. The value of the properties field implements the Properties interface.
If the tagName field is 'template', a content field can be present. The value of the content field implements the Root interface.
If the tagName field is 'template', the element must be a leaf.
If the tagName field is 'noscript', its children should be represented as if scripting is disabled ([HTML]).
For example, the following HTML:
<a href="https://alpha.com" class="bravo" download></a>
Yields:
{
type: 'element',
tagName: 'a',
properties: {
href: 'https://alpha.com',
className: ['bravo'],
download: true
},
children: []
}
325 def __init__( 326 self, 327 tag: str = "element", 328 properties: Optional[dict[str, str]] = None, 329 parent: Optional[Element | Root] = None, 330 startend: bool = False, 331 **kwargs, 332 ): 333 super().__init__(**kwargs) 334 self.properties = properties or {} 335 self.tag = tag 336 self.startend = startend 337 self.parent = parent 338 self.context = {}
367 def start_tag(self) -> str: 368 """Builds the open/start tag for the element. 369 370 Note: 371 It will return `/>` if the tag is self closing. 372 373 Returns: 374 str: Built element start tag. 375 """ 376 opening = f"<{self.tag}" 377 378 attributes = [] 379 for prop in self.properties: 380 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 381 if self[prop] == "yes" or self[prop]: 382 attributes.append(prop) 383 else: 384 attributes.append(f'{prop}="{self[prop]}"') 385 if len(attributes) > 0: 386 attributes = " " + " ".join(attributes) 387 else: 388 attributes = "" 389 390 closing = f"{'/' if self.startend else ''}>" 391 392 if closing == "/>" and attributes != "": 393 return opening + attributes + " " + closing 394 return opening + attributes + closing
Builds the open/start tag for the element.
Note:
It will return
/>
if the tag is self closing.
Returns:
str: Built element start tag.
260class Root(Parent): 261 """Root (Parent) represents a document. 262 263 Root can be used as the root of a tree, or as a value 264 of the content field on a 'template' Element, never as a child. 265 """ 266 267 def __init__( 268 self, 269 position: Optional[Position] = None, 270 children: Optional[list] = None, 271 ): 272 super().__init__(position, children) 273 self.parent = None 274 275 def __eq__(self, obj) -> bool: 276 return bool( 277 obj is not None 278 and isinstance(obj, Root) 279 and len(self.children) == len(obj.children) 280 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 281 ) 282 283 def __repr__(self) -> str: 284 return f"root [{len(self.children)}]"
Root (Parent) represents a document.
Root can be used as the root of a tree, or as a value of the content field on a 'template' Element, never as a child.
193class Node: # pylint: disable=too-few-public-methods 194 """All node values can be expressed in JSON as: string, number, 195 object, array, true, false, or null. This means that the syntax tree should 196 be able to be converted to and from JSON and produce the same tree. 197 For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) 198 and result in the same tree. 199 """ 200 201 position: Position 202 """The location of a node in a source document. 203 The value of the position field implements the Position interface. 204 The position field must not be present if a node is generated. 205 """ 206 207 def __init__( 208 self, 209 position: Optional[Position] = None, 210 ): 211 self.position = position 212 213 @property 214 def type(self) -> str: 215 """Non-empty string representing the variant of a node. 216 This field can be used to determine the type a node implements.""" 217 return self.__class__.__name__.lower()
All node values can be expressed in JSON as: string, number, object, array, true, false, or null. This means that the syntax tree should be able to be converted to and from JSON and produce the same tree. For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) and result in the same tree.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
422class DocType(Node): 423 """Doctype (Node) represents a DocumentType ([DOM]). 424 425 Example: 426 427 ```html 428 <!doctype html> 429 ``` 430 431 Yields: 432 433 ```javascript 434 {type: 'doctype'} 435 ``` 436 """ 437 438 def __init__( 439 self, 440 lang: Optional[str] = None, 441 parent: Optional[Element | Root] = None, 442 position: Optional[Position] = None, 443 ): 444 super().__init__(position) 445 self.parent = parent 446 self.lang = lang or 'html' 447 448 def __eq__(self, obj) -> bool: 449 if obj is None: 450 return False 451 452 if hasattr(obj, "type") and obj.type == self.type: 453 if self.lang == obj.lang: 454 return True 455 return False 456 457 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 458 """Build indented html string of html doctype element. 459 460 Returns: 461 str: Built html of doctype element 462 """ 463 return f"<!DOCTYPE {self.lang or 'html'}>" 464 465 def __repr__(self) -> str: 466 return f"node.doctype({self.lang or 'html'})"
Doctype (Node) represents a DocumentType ([DOM]).
Example:
<!doctype html>
Yields:
{type: 'doctype'}
457 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 458 """Build indented html string of html doctype element. 459 460 Returns: 461 str: Built html of doctype element 462 """ 463 return f"<!DOCTYPE {self.lang or 'html'}>"
Build indented html string of html doctype element.
Returns:
str: Built html of doctype element
219class Parent(Node): # pylint: disable=too-few-public-methods 220 """Parent (UnistParent) represents a node in hast containing other nodes (said to be children). 221 222 Its content is limited to only other hast content. 223 """ 224 225 def __init__(self, position: Optional[Position] = None, children: Optional[list] = None): 226 super().__init__(position) 227 228 if children is not None: 229 for child in children: 230 if hasattr(child, "type") and child.type in [ 231 "element", 232 "text", 233 "doctype", 234 "root", 235 "comment", 236 ]: 237 child.parent = self 238 239 self.children: list[Element | DocType | Comment | Text] = children or [] 240 241 def append(self, node: NODE): 242 """Add a node to the nested children of the current parent node.""" 243 node.parent = self 244 self.children.append(node) 245 246 def extend(self, nodes: list[NODE]): 247 """Add a node to the nested children of the current parent node.""" 248 for node in nodes: 249 self.append(node) 250 251 def insert(self, index: int, node: NODE): 252 """Insert a node into a specific position in the current parent node's children.""" 253 node.parent = self 254 self.children.insert(index, node) 255 256 def remove(self, node: NODE): 257 """Remove a specific node from the current parent node's children.""" 258 self.children.remove(node)
Parent (UnistParent) represents a node in hast containing other nodes (said to be children).
Its content is limited to only other hast content.
225 def __init__(self, position: Optional[Position] = None, children: Optional[list] = None): 226 super().__init__(position) 227 228 if children is not None: 229 for child in children: 230 if hasattr(child, "type") and child.type in [ 231 "element", 232 "text", 233 "doctype", 234 "root", 235 "comment", 236 ]: 237 child.parent = self 238 239 self.children: list[Element | DocType | Comment | Text] = children or []
241 def append(self, node: NODE): 242 """Add a node to the nested children of the current parent node.""" 243 node.parent = self 244 self.children.append(node)
Add a node to the nested children of the current parent node.
246 def extend(self, nodes: list[NODE]): 247 """Add a node to the nested children of the current parent node.""" 248 for node in nodes: 249 self.append(node)
Add a node to the nested children of the current parent node.
251 def insert(self, index: int, node: NODE): 252 """Insert a node into a specific position in the current parent node's children.""" 253 node.parent = self 254 self.children.insert(index, node)
Insert a node into a specific position in the current parent node's children.
256 def remove(self, node: NODE): 257 """Remove a specific node from the current parent node's children.""" 258 self.children.remove(node)
Remove a specific node from the current parent node's children.
409class PI(Node): 410 """A processing instruction node. Mainly used for XML.""" 411 412 def __init__(self, tag: str, properties: dict, position: Optional[Position] = None) -> None: 413 super().__init__(position) 414 self.tag = tag 415 self.properties = properties 416 417 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 418 """Construct the string representation of the processing instruction node.""" 419 attributes = " ".join(f'{key}="{value}"' for key, value in self.properties.items()) 420 return f"<?{self.tag} {attributes}?>"
A processing instruction node. Mainly used for XML.
417 def stringify(self, indent: int = 0): # pylint: disable=unused-argument 418 """Construct the string representation of the processing instruction node.""" 419 attributes = " ".join(f'{key}="{value}"' for key, value in self.properties.items()) 420 return f"<?{self.tag} {attributes}?>"
Construct the string representation of the processing instruction node.
550class Comment(Literal): 551 """Comment (Literal) represents a Comment ([DOM]). 552 553 Example: 554 ```html 555 <!--Charlie--> 556 ``` 557 """ 558 559 def stringify(self, indent: int = 0) -> str: 560 """Build indented html string of html comment. 561 562 Returns: 563 str: Built html of comment 564 """ 565 lines = [line for line in self.value.split("\n") if line.strip() != ""] 566 if len(lines) > 1: 567 start = f"{' ' * indent}<!--{lines[0].rstrip()}" 568 end = f"{' ' * indent}{lines[-1].lstrip()}-->" 569 for i in range(1, len(lines) - 1): 570 lines[i] = (' ' * indent) + lines[i].strip() 571 lines = [start, *lines[1:-1], end] 572 return "\n".join(lines) 573 return ' ' * indent + f"<!--{self.value}-->" 574 575 def __repr__(self) -> str: 576 return f"literal.comment(value: {self.value})"
Comment (Literal) represents a Comment ([DOM]).
Example:
<!--Charlie-->
559 def stringify(self, indent: int = 0) -> str: 560 """Build indented html string of html comment. 561 562 Returns: 563 str: Built html of comment 564 """ 565 lines = [line for line in self.value.split("\n") if line.strip() != ""] 566 if len(lines) > 1: 567 start = f"{' ' * indent}<!--{lines[0].rstrip()}" 568 end = f"{' ' * indent}{lines[-1].lstrip()}-->" 569 for i in range(1, len(lines) - 1): 570 lines[i] = (' ' * indent) + lines[i].strip() 571 lines = [start, *lines[1:-1], end] 572 return "\n".join(lines) 573 return ' ' * indent + f"<!--{self.value}-->"
Build indented html string of html comment.
Returns:
str: Built html of comment
Inherited Members
468class Literal(Node): 469 """Literal (UnistLiteral) represents a node in hast containing a value.""" 470 471 position: Position 472 """The location of a node in a source document. 473 The value of the position field implements the Position interface. 474 The position field must not be present if a node is generated. 475 """ 476 477 value: str 478 """The Literal nodes value. All literal values must be strings""" 479 480 def __init__( 481 self, 482 value: str = "", 483 parent: Optional[Element | Root] = None, 484 position: Optional[Position] = None, 485 ): 486 super().__init__(position) 487 self.value = str(value) 488 self.parent = parent 489 490 def __eq__(self, obj) -> bool: 491 return bool(obj is not None and self.type == obj.type and self.value == obj.value) 492 493 def normalized(self, indent: int = 0) -> str: 494 """Get the normalized indented value with leading and trailing blank lines stripped.""" 495 return normalize_indent(self.value, indent) 496 497 def stringify(self, indent: int = 0) -> str: 498 if "pre" in self.get_ancestry(): 499 return self.value 500 return self.normalized(indent).strip() 501 502 def get_ancestry(self) -> list[str]: 503 """Get the ancestry of the literal node. 504 505 Used to validate whether there is a `pre` element in the ancestry. 506 """ 507 508 def get_parent(parent) -> list[str]: 509 result = [] 510 511 if parent is not None and hasattr(parent, "tag"): 512 result.append(parent.tag) 513 514 if parent.parent is not None: 515 result.extend(get_parent(parent.parent)) 516 517 return result 518 519 return get_parent(self.parent)
Literal (UnistLiteral) represents a node in hast containing a value.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
493 def normalized(self, indent: int = 0) -> str: 494 """Get the normalized indented value with leading and trailing blank lines stripped.""" 495 return normalize_indent(self.value, indent)
Get the normalized indented value with leading and trailing blank lines stripped.
502 def get_ancestry(self) -> list[str]: 503 """Get the ancestry of the literal node. 504 505 Used to validate whether there is a `pre` element in the ancestry. 506 """ 507 508 def get_parent(parent) -> list[str]: 509 result = [] 510 511 if parent is not None and hasattr(parent, "tag"): 512 result.append(parent.tag) 513 514 if parent.parent is not None: 515 result.extend(get_parent(parent.parent)) 516 517 return result 518 519 return get_parent(self.parent)
Get the ancestry of the literal node.
Used to validate whether there is a pre
element in the ancestry.
71class Point: 72 """Represents one place in a source file. 73 74 The line field (1-indexed integer) represents a line in a source file. The column field 75 (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) 76 represents a character in a source file. 77 """ 78 79 def __init__(self, line: int, column: int, offset: Optional[int] = None): 80 if line is None or line < 0: 81 raise IndexError(f"Point.line must be >= 0 but was {line}") 82 83 self.line = line 84 85 if column is None or column < 0: 86 raise IndexError(f"Point.column must be >= 0 but was {column}") 87 88 self.column = column 89 90 if offset is not None and offset < 0: 91 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 92 93 self.offset = offset 94 95 def __eq__(self, obj) -> bool: 96 return bool( 97 obj is not None 98 and isinstance(obj, self.__class__) 99 and self.line == obj.line 100 and self.column == obj.column 101 ) 102 103 def __repr__(self) -> str: 104 return f"point(line: {self.line}, column: {self.column}, offset: {self.offset})" 105 106 def __str__(self) -> str: 107 return f"{self.line}:{self.column}"
Represents one place in a source file.
The line field (1-indexed integer) represents a line in a source file. The column field (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) represents a character in a source file.
79 def __init__(self, line: int, column: int, offset: Optional[int] = None): 80 if line is None or line < 0: 81 raise IndexError(f"Point.line must be >= 0 but was {line}") 82 83 self.line = line 84 85 if column is None or column < 0: 86 raise IndexError(f"Point.column must be >= 0 but was {column}") 87 88 self.column = column 89 90 if offset is not None and offset < 0: 91 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 92 93 self.offset = offset
109class Position: 110 """Position represents the location of a node in a source file. 111 112 The `start` field of `Position` represents the place of the first character 113 of the parsed source region. The `end` field of Position represents the place 114 of the first character after the parsed source region, whether it exists or not. 115 The value of the `start` and `end` fields implement the `Point` interface. 116 117 The `indent` field of `Position` represents the start column at each index 118 (plus start line) in the source region, for elements that span multiple lines. 119 120 If the syntactic unit represented by a node is not present in the source file at 121 the time of parsing, the node is said to be `generated` and it must not have positional 122 information. 123 """ 124 125 @overload 126 def __init__( 127 self, 128 start: tuple[int, int, int | None], 129 end: tuple[int, int, int | None], 130 indent: Optional[int] = None, 131 ): 132 """ 133 Args: 134 start (tuple[int, int, int | None]): Tuple representing the line, column, and optional 135 offset of the start point. 136 end (tuple[int, int, int | None]): Tuple representing the line, column, and optional 137 offset of the end point. 138 indent (Optional[int], optional): The indent amount for the start of the position. 139 """ 140 ... 141 142 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 143 """ 144 Args: 145 start (Point): Starting point of the position. 146 end (Point): End point of the position. 147 indent (int | None): The indent amount for the start of the position. 148 """ 149 150 self.start = ( 151 Point(start[0], start[1], start[2] if len(start) == 3 else None) 152 if isinstance(start, tuple) 153 else start 154 ) 155 self.end = ( 156 Point(end[0], end[1], end[2] if len(end) == 3 else None) 157 if isinstance(end, tuple) 158 else end 159 ) 160 161 if indent is not None and indent < 0: 162 raise IndexError(f"Position.indent value must be >= 0 or None but was {indent}") 163 164 self.indent = indent 165 166 def __eq__(self, obj) -> bool: 167 return bool( 168 obj is not None 169 and isinstance(obj, Position) 170 and self.start == obj.start 171 and self.end == obj.end 172 ) 173 174 def as_dict(self) -> dict: 175 """Convert the position object to a dict.""" 176 return { 177 "start": { 178 "line": self.start.line, 179 "column": self.start.column, 180 "offset": self.start.offset, 181 }, 182 "end": {"line": self.end.line, "column": self.end.column, "offset": self.end.offset}, 183 "indent": self.indent, 184 } 185 186 def __repr__(self) -> str: 187 indent = f" ~ {self.indent}" if self.indent is not None else "" 188 return f"<{self.start}-{self.end}{indent}>" 189 190 def __str__(self) -> str: 191 return repr(self)
Position represents the location of a node in a source file.
The start
field of Position
represents the place of the first character
of the parsed source region. The end
field of Position represents the place
of the first character after the parsed source region, whether it exists or not.
The value of the start
and end
fields implement the Point
interface.
The indent
field of Position
represents the start column at each index
(plus start line) in the source region, for elements that span multiple lines.
If the syntactic unit represented by a node is not present in the source file at
the time of parsing, the node is said to be generated
and it must not have positional
information.
142 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 143 """ 144 Args: 145 start (Point): Starting point of the position. 146 end (Point): End point of the position. 147 indent (int | None): The indent amount for the start of the position. 148 """ 149 150 self.start = ( 151 Point(start[0], start[1], start[2] if len(start) == 3 else None) 152 if isinstance(start, tuple) 153 else start 154 ) 155 self.end = ( 156 Point(end[0], end[1], end[2] if len(end) == 3 else None) 157 if isinstance(end, tuple) 158 else end 159 ) 160 161 if indent is not None and indent < 0: 162 raise IndexError(f"Position.indent value must be >= 0 or None but was {indent}") 163 164 self.indent = indent
Arguments:
- start (Point): Starting point of the position.
- end (Point): End point of the position.
- indent (int | None): The indent amount for the start of the position.
174 def as_dict(self) -> dict: 175 """Convert the position object to a dict.""" 176 return { 177 "start": { 178 "line": self.start.line, 179 "column": self.start.column, 180 "offset": self.start.offset, 181 }, 182 "end": {"line": self.end.line, "column": self.end.column, "offset": self.end.offset}, 183 "indent": self.indent, 184 }
Convert the position object to a dict.
521class Text(Literal): 522 """Text (Literal) represents a Text ([DOM]). 523 524 Example: 525 526 ```html 527 <span>Foxtrot</span> 528 ``` 529 530 Yields: 531 532 ```javascript 533 { 534 type: 'element', 535 tagName: 'span', 536 properties: {}, 537 children: [{type: 'text', value: 'Foxtrot'}] 538 } 539 ``` 540 """ 541 542 @cached_property 543 def num_lines(self) -> int: 544 """Determine the number of lines the text has.""" 545 return len([line for line in str(self.value).split("\n") if line.strip() != ""]) 546 547 def __repr__(self) -> str: 548 return f"literal.text('{self.value}')"
Text (Literal) represents a Text ([DOM]).
Example:
<span>Foxtrot</span>
Yields:
{
type: 'element',
tagName: 'span',
properties: {},
children: [{type: 'text', value: 'Foxtrot'}]
}