phml.utilities.transform.extract

 1from phml.core.nodes import AST, NODE, Comment, Element, Root, Text
 2
 3__all__ = ["to_string"]
 4
 5
 6def to_string(node: AST | NODE) -> str:
 7    """Get the raw text content of the element. Works similar to
 8    the DOMs Node#textContent getter.
 9
10    Args:
11        node (Root | Element | Text): Node to get the text content from
12
13    Returns:
14        str: Raw inner text without formatting.
15    """
16
17    if isinstance(node, AST):
18        node = node.tree
19
20    if isinstance(node, Text | Comment):
21        return node.value
22
23    def concat_text(element: Element | Root) -> list[str]:
24        result = []
25
26        for child in element.children:
27            if isinstance(child, (Element, Root)):
28                result.extend(concat_text(child))
29            elif isinstance(child, Text):
30                result.append(child.value)
31        return result
32
33    if isinstance(node, Root | Element):
34        # Recursive concat
35        return " ".join(concat_text(node))
36
37    return None
 7def to_string(node: AST | NODE) -> str:
 8    """Get the raw text content of the element. Works similar to
 9    the DOMs Node#textContent getter.
10
11    Args:
12        node (Root | Element | Text): Node to get the text content from
13
14    Returns:
15        str: Raw inner text without formatting.
16    """
17
18    if isinstance(node, AST):
19        node = node.tree
20
21    if isinstance(node, Text | Comment):
22        return node.value
23
24    def concat_text(element: Element | Root) -> list[str]:
25        result = []
26
27        for child in element.children:
28            if isinstance(child, (Element, Root)):
29                result.extend(concat_text(child))
30            elif isinstance(child, Text):
31                result.append(child.value)
32        return result
33
34    if isinstance(node, Root | Element):
35        # Recursive concat
36        return " ".join(concat_text(node))
37
38    return None

Get the raw text content of the element. Works similar to the DOMs Node#textContent getter.

Arguments:
  • node (Root | Element | Text): Node to get the text content from
Returns:

str: Raw inner text without formatting.