phml.nodes
All things related to phml node data objects.
1"""phml.nodes 2 3All things related to phml node data objects. 4""" 5 6from .AST import AST 7from .comment import Comment 8from .doctype import DocType 9from .element import Element 10from .literal import Literal 11from .node import Node 12from .parent import Parent 13from .point import Point 14from .position import Position 15from .root import Root 16from .text import Text 17from .types import Properties, PropertyName, PropertyValue 18 19All_Nodes = Root | Element | Text | Comment | DocType | Parent | Node | Literal 20 21__all__ = [ 22 "AST", 23 "Node", 24 "Root", 25 "DocType", 26 "Parent", 27 "Element", 28 "Literal", 29 "Comment", 30 "Text", 31 "Position", 32 "Point", 33 "Properties", 34 "PropertyName", 35 "PropertyValue", 36 "All_Nodes", 37]
17class AST: 18 """PHML ast. 19 20 Contains utility functions that can manipulate the ast. 21 """ 22 23 def __init__(self, tree): 24 if tree is not None and hasattr(tree, "type") and tree.type in ["root", "element"]: 25 self.tree = tree 26 else: 27 raise TypeError("The given tree/root node for AST must be of type `Root` or `Element`") 28 29 def __iter__(self) -> Iterator: 30 from phml import walk # pylint: disable=import-outside-toplevel 31 32 return walk(self.tree) 33 34 def __eq__(self, obj) -> bool: 35 if isinstance(obj, self.__class__): 36 if self.tree == obj.tree: 37 return True 38 return False 39 40 @cached_property 41 def size(self) -> int: 42 """Get the number of nodes in the ast tree.""" 43 from phml import size # pylint: disable=import-outside-toplevel 44 45 return size(self.tree) 46 47 @property 48 def children(self) -> list: 49 """Get access to the ast roots children. 50 Is none if there is no root. 51 """ 52 return self.tree.children if self.tree is not None else None
PHML ast.
Contains utility functions that can manipulate the ast.
10class Node: # pylint: disable=too-few-public-methods 11 """All node values can be expressed in JSON as: string, number, 12 object, array, true, false, or null. This means that the syntax tree should 13 be able to be converted to and from JSON and produce the same tree. 14 For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) 15 and result in the same tree. 16 """ 17 18 position: Position 19 """The location of a node in a source document. 20 The value of the position field implements the Position interface. 21 The position field must not be present if a node is generated. 22 """ 23 24 def __init__( 25 self, 26 position: Optional[Position] = None, 27 ): 28 self.position = position 29 30 @property 31 def type(self) -> str: 32 """Non-empty string representing the variant of a node. 33 This field can be used to determine the type a node implements.""" 34 return self.__class__.__name__.lower()
All node values can be expressed in JSON as: string, number, object, array, true, false, or null. This means that the syntax tree should be able to be converted to and from JSON and produce the same tree. For example, in JavaScript, a tree can be passed through JSON.parse(JSON.phml(tree)) and result in the same tree.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
10class Root(Parent): 11 """Root (Parent) represents a document. 12 13 Root can be used as the root of a tree, or as a value 14 of the content field on a 'template' Element, never as a child. 15 """ 16 17 def __init__( 18 self, 19 position: Optional[Position] = None, 20 children: Optional[list] = None, 21 ): 22 super().__init__(position, children) 23 self.parent = None 24 25 def __eq__(self, obj) -> bool: 26 return bool( 27 obj is not None 28 and isinstance(obj, Root) 29 and len(self.children) == len(obj.children) 30 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 31 ) 32 33 def __repr__(self) -> str: 34 return f"root [{len(self.children)}]"
Root (Parent) represents a document.
Root can be used as the root of a tree, or as a value of the content field on a 'template' Element, never as a child.
10class DocType(Node): 11 """Doctype (Node) represents a DocumentType ([DOM]). 12 13 Example: 14 15 ```html 16 <!doctype html> 17 ``` 18 19 Yields: 20 21 ```javascript 22 {type: 'doctype'} 23 ``` 24 """ 25 26 def __init__( 27 self, 28 lang: Optional[str] = None, 29 parent: Optional[Element | Root] = None, 30 position: Optional[Position] = None, 31 ): 32 super().__init__(position) 33 self.parent = parent 34 self.lang = lang or 'html' 35 36 def __eq__(self, obj) -> bool: 37 if obj is None: 38 return False 39 40 if hasattr(obj, "type") and obj.type == self.type: 41 if self.lang == obj.lang: 42 return True 43 return False 44 45 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 46 """Build indented html string of html doctype element. 47 48 Returns: 49 str: Built html of doctype element 50 """ 51 return f"<!DOCTYPE {self.lang or 'html'}>" 52 53 def __repr__(self) -> str: 54 return f"node.doctype({self.lang or 'html'})"
Doctype (Node) represents a DocumentType ([DOM]).
Example:
<!doctype html>
Yields:
{type: 'doctype'}
45 def stringify(self, indent: int = 0) -> str: # pylint: disable=unused-argument 46 """Build indented html string of html doctype element. 47 48 Returns: 49 str: Built html of doctype element 50 """ 51 return f"<!DOCTYPE {self.lang or 'html'}>"
Build indented html string of html doctype element.
Returns:
str: Built html of doctype element
16class Parent(Node): # pylint: disable=too-few-public-methods 17 """Parent (UnistParent) represents a node in hast containing other nodes (said to be children). 18 19 Its content is limited to only other hast content. 20 """ 21 22 def __init__(self, position: Optional[Position] = None, children: Optional[list] = None): 23 super().__init__(position) 24 25 if children is not None: 26 for child in children: 27 if hasattr(child, "type") and child.type in [ 28 "element", 29 "text", 30 "doctype", 31 "root", 32 "comment", 33 ]: 34 child.parent = self 35 36 self.children: list[Element | DocType | Comment | Text] = children or []
Parent (UnistParent) represents a node in hast containing other nodes (said to be children).
Its content is limited to only other hast content.
22 def __init__(self, position: Optional[Position] = None, children: Optional[list] = None): 23 super().__init__(position) 24 25 if children is not None: 26 for child in children: 27 if hasattr(child, "type") and child.type in [ 28 "element", 29 "text", 30 "doctype", 31 "root", 32 "comment", 33 ]: 34 child.parent = self 35 36 self.children: list[Element | DocType | Comment | Text] = children or []
13class Element(Parent): 14 """Element (Parent) represents an Element ([DOM]). 15 16 A tagName field must be present. It represents the element's local name ([DOM]). 17 18 The properties field represents information associated with the element. 19 The value of the properties field implements the Properties interface. 20 21 If the tagName field is 'template', a content field can be present. The value 22 of the content field implements the Root interface. 23 24 If the tagName field is 'template', the element must be a leaf. 25 26 If the tagName field is 'noscript', its children should be represented as if 27 scripting is disabled ([HTML]). 28 29 30 For example, the following HTML: 31 32 ```html 33 <a href="https://alpha.com" class="bravo" download></a> 34 ``` 35 36 Yields: 37 38 ```javascript 39 { 40 type: 'element', 41 tagName: 'a', 42 properties: { 43 href: 'https://alpha.com', 44 className: ['bravo'], 45 download: true 46 }, 47 children: [] 48 } 49 ``` 50 """ 51 52 def __init__( 53 self, 54 tag: str = "element", 55 properties: Optional[Properties] = None, 56 parent: Optional[Element | Root] = None, 57 startend: bool = False, 58 **kwargs, 59 ): 60 super().__init__(**kwargs) 61 self.properties = properties or {} 62 self.tag = tag 63 self.startend = startend 64 self.parent = parent 65 self.locals = {} 66 67 def __getitem__(self, index: str) -> str: 68 return self.properties[index] 69 70 def __setitem__(self, index: str, value: str): 71 if not isinstance(index, str) or not isinstance(value, (str, bool)): 72 raise TypeError("Index must be a str and value must be either str or bool.") 73 74 self.properties[index] = value 75 76 def __delitem__(self, index: str): 77 if index in self.properties: 78 self.properties.pop(index, None) 79 80 def __eq__(self, obj) -> bool: 81 return bool( 82 obj is not None 83 and isinstance(obj, Element) 84 and self.tag == obj.tag 85 and self.startend == obj.startend 86 and self.properties == obj.properties 87 and len(self.children) == len(obj.children) 88 and all(child == obj_child for child, obj_child in zip(self.children, obj.children)) 89 ) 90 91 def start_tag(self) -> str: 92 """Builds the open/start tag for the element. 93 94 Note: 95 It will return `/>` if the tag is self closing. 96 97 Returns: 98 str: Built element start tag. 99 """ 100 opening = f"<{self.tag}" 101 102 attributes = [] 103 for prop in self.properties: 104 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 105 if self[prop] == "yes" or self[prop]: 106 attributes.append(prop) 107 else: 108 attributes.append(f'{prop}="{self[prop]}"') 109 if len(attributes) > 0: 110 attributes = " " + " ".join(attributes) 111 else: 112 attributes = "" 113 114 closing = f"{' /' if self.startend else ''}>" 115 116 return opening + attributes + closing 117 118 def end_tag(self) -> str: 119 """Build the elements end tag. 120 121 Returns: 122 str: Built element end tag. 123 """ 124 return f"</{self.tag}>" if not self.startend else None 125 126 def __repr__(self) -> str: 127 out = f"{self.type}(tag: {self.tag}, properties: {self.properties}, \ 128startend: {self.startend}, children: {len(self.children)})" 129 return out
Element (Parent) represents an Element ([DOM]).
A tagName field must be present. It represents the element's local name ([DOM]).
The properties field represents information associated with the element. The value of the properties field implements the Properties interface.
If the tagName field is 'template', a content field can be present. The value of the content field implements the Root interface.
If the tagName field is 'template', the element must be a leaf.
If the tagName field is 'noscript', its children should be represented as if scripting is disabled ([HTML]).
For example, the following HTML:
<a href="https://alpha.com" class="bravo" download></a>
Yields:
{
type: 'element',
tagName: 'a',
properties: {
href: 'https://alpha.com',
className: ['bravo'],
download: true
},
children: []
}
52 def __init__( 53 self, 54 tag: str = "element", 55 properties: Optional[Properties] = None, 56 parent: Optional[Element | Root] = None, 57 startend: bool = False, 58 **kwargs, 59 ): 60 super().__init__(**kwargs) 61 self.properties = properties or {} 62 self.tag = tag 63 self.startend = startend 64 self.parent = parent 65 self.locals = {}
91 def start_tag(self) -> str: 92 """Builds the open/start tag for the element. 93 94 Note: 95 It will return `/>` if the tag is self closing. 96 97 Returns: 98 str: Built element start tag. 99 """ 100 opening = f"<{self.tag}" 101 102 attributes = [] 103 for prop in self.properties: 104 if isinstance(self[prop], bool) or self[prop] in ["yes", "no"]: 105 if self[prop] == "yes" or self[prop]: 106 attributes.append(prop) 107 else: 108 attributes.append(f'{prop}="{self[prop]}"') 109 if len(attributes) > 0: 110 attributes = " " + " ".join(attributes) 111 else: 112 attributes = "" 113 114 closing = f"{' /' if self.startend else ''}>" 115 116 return opening + attributes + closing
Builds the open/start tag for the element.
Note:
It will return
/>
if the tag is self closing.
Returns:
str: Built element start tag.
10class Literal(Node): 11 """Literal (UnistLiteral) represents a node in hast containing a value.""" 12 13 position: Position 14 """The location of a node in a source document. 15 The value of the position field implements the Position interface. 16 The position field must not be present if a node is generated. 17 """ 18 19 value: str 20 """The Literal nodes value. All literal values must be strings""" 21 22 def __init__( 23 self, 24 value: str = "", 25 parent: Optional[Element | Root] = None, 26 position: Optional[Position] = None, 27 ): 28 super().__init__(position) 29 self.value = value 30 self.parent = parent 31 32 def __eq__(self, obj) -> bool: 33 return bool(obj is not None and self.type == obj.type and self.value == obj.value) 34 35 def get_ancestry(self) -> list[str]: 36 """Get the ancestry of the literal node. 37 38 Used to validate whether there is a `pre` element in the ancestry. 39 """ 40 41 def get_parent(parent) -> list[str]: 42 result = [] 43 44 if parent is not None and hasattr(parent, "tag"): 45 result.append(parent.tag) 46 47 if parent.parent is not None: 48 result.extend(get_parent(parent.parent)) 49 50 return result 51 52 return get_parent(self.parent)
Literal (UnistLiteral) represents a node in hast containing a value.
The location of a node in a source document. The value of the position field implements the Position interface. The position field must not be present if a node is generated.
35 def get_ancestry(self) -> list[str]: 36 """Get the ancestry of the literal node. 37 38 Used to validate whether there is a `pre` element in the ancestry. 39 """ 40 41 def get_parent(parent) -> list[str]: 42 result = [] 43 44 if parent is not None and hasattr(parent, "tag"): 45 result.append(parent.tag) 46 47 if parent.parent is not None: 48 result.extend(get_parent(parent.parent)) 49 50 return result 51 52 return get_parent(self.parent)
Get the ancestry of the literal node.
Used to validate whether there is a pre
element in the ancestry.
5class Comment(Literal): 6 """Comment (Literal) represents a Comment ([DOM]). 7 8 Example: 9 ```html 10 <!--Charlie--> 11 ``` 12 """ 13 14 def stringify(self, indent: int = 0) -> str: 15 """Build indented html string of html comment. 16 17 Returns: 18 str: Built html of comment 19 """ 20 lines = [line for line in self.value.split("\n") if line.strip() != ""] 21 if len(lines) > 1: 22 start = f"{' ' * indent}<!--{lines[0].rstrip()}" 23 end = f"{' ' * indent}{lines[-1].lstrip()}-->" 24 for i in range(1, len(lines) - 1): 25 lines[i] = (' ' * indent) + lines[i].strip() 26 lines = [start, *lines[1:-1], end] 27 return "\n".join(lines) 28 return ' ' * indent + f"<!--{self.value}-->" 29 30 def __repr__(self) -> str: 31 return f"literal.comment(value: {self.value})"
Comment (Literal) represents a Comment ([DOM]).
Example:
<!--Charlie-->
14 def stringify(self, indent: int = 0) -> str: 15 """Build indented html string of html comment. 16 17 Returns: 18 str: Built html of comment 19 """ 20 lines = [line for line in self.value.split("\n") if line.strip() != ""] 21 if len(lines) > 1: 22 start = f"{' ' * indent}<!--{lines[0].rstrip()}" 23 end = f"{' ' * indent}{lines[-1].lstrip()}-->" 24 for i in range(1, len(lines) - 1): 25 lines[i] = (' ' * indent) + lines[i].strip() 26 lines = [start, *lines[1:-1], end] 27 return "\n".join(lines) 28 return ' ' * indent + f"<!--{self.value}-->"
Build indented html string of html comment.
Returns:
str: Built html of comment
7class Text(Literal): 8 """Text (Literal) represents a Text ([DOM]). 9 10 Example: 11 12 ```html 13 <span>Foxtrot</span> 14 ``` 15 16 Yields: 17 18 ```javascript 19 { 20 type: 'element', 21 tagName: 'span', 22 properties: {}, 23 children: [{type: 'text', value: 'Foxtrot'}] 24 } 25 ``` 26 """ 27 28 @cached_property 29 def num_lines(self) -> int: 30 """Determine the number of lines the text has.""" 31 return len([line for line in self.value.split("\n") if line.strip() != ""]) 32 33 def stringify(self, indent: int = 0) -> str: 34 """Build indented html string of html text. 35 36 Returns: 37 str: Built html of text 38 """ 39 if self.parent is None or not any( 40 tag in self.get_ancestry() for tag in ["pre", "python", "script", "style"] 41 ): 42 lines = [line.lstrip() for line in self.value.split("\n") if line.strip() != ""] 43 for i, line in enumerate(lines): 44 lines[i] = (' ' * indent) + line 45 return "\n".join(lines) 46 return self.value 47 48 def __repr__(self) -> str: 49 return f"literal.text('{self.value}')"
Text (Literal) represents a Text ([DOM]).
Example:
<span>Foxtrot</span>
Yields:
{
type: 'element',
tagName: 'span',
properties: {},
children: [{type: 'text', value: 'Foxtrot'}]
}
33 def stringify(self, indent: int = 0) -> str: 34 """Build indented html string of html text. 35 36 Returns: 37 str: Built html of text 38 """ 39 if self.parent is None or not any( 40 tag in self.get_ancestry() for tag in ["pre", "python", "script", "style"] 41 ): 42 lines = [line.lstrip() for line in self.value.split("\n") if line.strip() != ""] 43 for i, line in enumerate(lines): 44 lines[i] = (' ' * indent) + line 45 return "\n".join(lines) 46 return self.value
Build indented html string of html text.
Returns:
str: Built html of text
10class Position: 11 """Position represents the location of a node in a source file. 12 13 The `start` field of `Position` represents the place of the first character 14 of the parsed source region. The `end` field of Position represents the place 15 of the first character after the parsed source region, whether it exists or not. 16 The value of the `start` and `end` fields implement the `Point` interface. 17 18 The `indent` field of `Position` represents the start column at each index 19 (plus start line) in the source region, for elements that span multiple lines. 20 21 If the syntactic unit represented by a node is not present in the source file at 22 the time of parsing, the node is said to be `generated` and it must not have positional 23 information. 24 """ 25 26 def __init__(self, start: Point, end: Point, indent: Optional[int] = None): 27 self.start = start 28 self.end = end 29 30 if indent is not None and indent < 0: 31 raise IndexError(f"Position.indent value must be >= 0 or None but was {indent}") 32 33 self.indent = indent 34 35 def __eq__(self, obj) -> bool: 36 return bool( 37 obj is not None 38 and isinstance(obj, Position) 39 and self.start == obj.start 40 and self.end == obj.end 41 ) 42 43 def as_dict(self) -> dict: 44 """Convert the position object to a dict.""" 45 return { 46 "start": { 47 "line": self.start.line, 48 "column": self.start.column, 49 "offset": self.start.offset, 50 }, 51 "end": {"line": self.end.line, "column": self.end.column, "offset": self.end.offset}, 52 "indent": self.indent, 53 } 54 55 def __repr__(self) -> str: 56 indent = f" ~ {self.indent}" if self.indent is not None else "" 57 return f"<{self.start}-{self.end}{indent}>" 58 59 def __str__(self) -> str: 60 return repr(self)
Position represents the location of a node in a source file.
The start
field of Position
represents the place of the first character
of the parsed source region. The end
field of Position represents the place
of the first character after the parsed source region, whether it exists or not.
The value of the start
and end
fields implement the Point
interface.
The indent
field of Position
represents the start column at each index
(plus start line) in the source region, for elements that span multiple lines.
If the syntactic unit represented by a node is not present in the source file at
the time of parsing, the node is said to be generated
and it must not have positional
information.
43 def as_dict(self) -> dict: 44 """Convert the position object to a dict.""" 45 return { 46 "start": { 47 "line": self.start.line, 48 "column": self.start.column, 49 "offset": self.start.offset, 50 }, 51 "end": {"line": self.end.line, "column": self.end.column, "offset": self.end.offset}, 52 "indent": self.indent, 53 }
Convert the position object to a dict.
5class Point: 6 """Represents one place in a source file. 7 8 The line field (1-indexed integer) represents a line in a source file. The column field 9 (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) 10 represents a character in a source file. 11 """ 12 13 def __init__(self, line: int, column: int, offset: Optional[int] = None): 14 if line < 0: 15 raise IndexError(f"Point.line must be >= 0 but was {line}") 16 17 self.line = line 18 19 if column < 0: 20 raise IndexError(f"Point.column must be >= 0 but was {column}") 21 22 self.column = column 23 24 if offset is not None and offset < 0: 25 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 26 27 self.offset = offset 28 29 def __eq__(self, obj) -> bool: 30 return bool( 31 obj is not None 32 and isinstance(obj, self.__class__) 33 and self.line == obj.line 34 and self.column == obj.column 35 ) 36 37 def __repr__(self) -> str: 38 return f"point(line: {self.line}, column: {self.column}, offset: {self.offset})" 39 40 def __str__(self) -> str: 41 return f"{self.line}:{self.column}"
Represents one place in a source file.
The line field (1-indexed integer) represents a line in a source file. The column field (1-indexed integer) represents a column in a source file. The offset field (0-indexed integer) represents a character in a source file.
13 def __init__(self, line: int, column: int, offset: Optional[int] = None): 14 if line < 0: 15 raise IndexError(f"Point.line must be >= 0 but was {line}") 16 17 self.line = line 18 19 if column < 0: 20 raise IndexError(f"Point.column must be >= 0 but was {column}") 21 22 self.column = column 23 24 if offset is not None and offset < 0: 25 raise IndexError(f"Point.offset must be >= 0 or None but was {line}") 26 27 self.offset = offset