phml.transform.extract
1from phml.nodes import AST, All_Nodes, Comment, Element, Root, Text 2 3__all__ = ["to_string"] 4 5 6def to_string(node: AST | All_Nodes) -> str: 7 """Get the raw text content of the element. Works similar to 8 the DOMs Node#textContent getter. 9 10 Args: 11 node (Root | Element | Text): Node to get the text content from 12 13 Returns: 14 str: Raw inner text without formatting. 15 """ 16 17 if isinstance(node, AST): 18 node = node.tree 19 20 if isinstance(node, Text | Comment): 21 return node.value 22 23 def concat_text(element: Element | Root) -> list[str]: 24 result = [] 25 26 for child in element.children: 27 if isinstance(child, (Element, Root)): 28 result.extend(concat_text(child)) 29 elif isinstance(child, Text): 30 result.append(child.value) 31 return result 32 33 if isinstance(node, Root | Element): 34 # Recursive concat 35 return " ".join(concat_text(node)) 36 37 return None
def
to_string( node: phml.nodes.AST.AST | phml.nodes.root.Root | phml.nodes.element.Element | phml.nodes.text.Text | phml.nodes.comment.Comment | phml.nodes.doctype.DocType | phml.nodes.parent.Parent | phml.nodes.node.Node | phml.nodes.literal.Literal) -> str:
7def to_string(node: AST | All_Nodes) -> str: 8 """Get the raw text content of the element. Works similar to 9 the DOMs Node#textContent getter. 10 11 Args: 12 node (Root | Element | Text): Node to get the text content from 13 14 Returns: 15 str: Raw inner text without formatting. 16 """ 17 18 if isinstance(node, AST): 19 node = node.tree 20 21 if isinstance(node, Text | Comment): 22 return node.value 23 24 def concat_text(element: Element | Root) -> list[str]: 25 result = [] 26 27 for child in element.children: 28 if isinstance(child, (Element, Root)): 29 result.extend(concat_text(child)) 30 elif isinstance(child, Text): 31 result.append(child.value) 32 return result 33 34 if isinstance(node, Root | Element): 35 # Recursive concat 36 return " ".join(concat_text(node)) 37 38 return None
Get the raw text content of the element. Works similar to the DOMs Node#textContent getter.
Arguments:
- node (Root | Element | Text): Node to get the text content from
Returns:
str: Raw inner text without formatting.