phml.utilities.validate.validate

  1from re import match, split, sub
  2from typing import Any
  3
  4from phml.core.nodes import NODE, Comment, Element, Literal, Parent, Root, Text
  5
  6__all__ = [
  7    "validate",
  8    "parent",
  9    "literal",
 10    "generated",
 11    "has_property",
 12    "is_heading",
 13    "is_css_link",
 14    "is_css_style",
 15    "is_javascript",
 16    "is_element",
 17    "is_embedded",
 18    "is_interactive",
 19    "is_phrasing",
 20    "is_event_handler",
 21    "blank",
 22]
 23
 24
 25def validate(node: NODE) -> bool:
 26    """Validate a node based on attributes and type."""
 27
 28    if hasattr(node, "children"):
 29        if not hasattr(node, "type"):
 30            raise AssertionError("Node should have a type")
 31
 32        if node.type not in ["root", "element"]:
 33            raise AssertionError(
 34                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
 35            )
 36
 37        if not all(isinstance(child, NODE) for child in node.children):
 38            raise AssertionError("Children must be a node type")
 39
 40    if hasattr(node, "properties"):
 41        if hasattr(node, "type") and node.type != "element":
 42            raise AssertionError("Node must be of type 'element' to contain 'properties'")
 43
 44        if not all(isinstance(node[prop], (int, str)) for prop in node.properties):
 45            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
 46
 47    if hasattr(node, "value") and not isinstance(node.value, str):
 48        raise AssertionError("Node 'value' must be of type 'str'")
 49
 50    return True
 51
 52
 53def parent(node: Root | Element) -> bool:
 54    """Validate a parent node based on attributes and type."""
 55    if not issubclass(type(node), Parent):
 56        raise AssertionError(
 57            "Node must inherit from 'Parent'. 'Root' and 'Element' are most common."
 58        )
 59
 60    if not hasattr(node, "children") or node.children is None:
 61        raise AssertionError("Parent nodes should have the 'children' attribute")
 62
 63    if node.type == "element" and (not hasattr(node, "properties") or node.properties is None):
 64        raise AssertionError("Parent element node shoudl have the 'properties' element.")
 65
 66
 67def literal(node: Text | Comment) -> bool:
 68    """Validate a literal node based on attributes."""
 69
 70    if not issubclass(type(node), Literal):
 71        raise AssertionError(
 72            "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common."
 73        )
 74
 75    if not hasattr(node, "value") or not isinstance(node.value, str):
 76        raise AssertionError("Literal nodes 'value' type should be 'str'")
 77
 78
 79def generated(node: NODE) -> bool:
 80    """Checks if a node has been generated. A node is concidered
 81    generated if it does not have a position.
 82
 83    Args:
 84        node (NODE): Node to check for position with.
 85
 86    Returns:
 87        bool: Whether a node has a position or not.
 88    """
 89    return not hasattr(node, "position") or node.position is None
 90
 91
 92def is_heading(node) -> bool:
 93    """Check if an element is a heading."""
 94
 95    if node.type == "element":
 96        if match(r"h[1-6]", node.tag) is not None:
 97            return True
 98        return False
 99    raise TypeError("Node must be an element.")
100
101
102def is_css_link(node) -> bool:
103    """Check if an element is a `link` to a css file.
104
105    Returns `true` if `node` is a `<link>` element with a `rel` list that
106    contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'`
107    as its `type`
108    """
109
110    return (
111        # Verify it is a element with a `link` tag
112        is_element(node, "link")
113        # Must have a rel list with stylesheet
114        and has_property(node, "rel")
115        and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"]))
116        and (
117            # Can have a `type` of `text/css` or empty or no `type`
118            not has_property(node, "type")
119            or (has_property(node, "type") and (node["type"] == "text/css" or node["type"] == ""))
120        )
121    )
122
123
124def is_css_style(node) -> bool:
125    """Check if an element is a css `style` element.
126
127    Returns `true` if `node` is a `<style>` element that
128    has no `type`, an empty `type`, or `'text/css'` as its `type`.
129    """
130
131    return is_element(node, "style") and (
132        not has_property(node, "type")
133        or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css"))
134    )
135
136
137def is_javascript(node) -> bool:
138    """Check if an element is a javascript `script` element.
139
140    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
141    `type` and a valid JavaScript `language`, or has neither.
142    """
143    return is_element(node, "script") and (
144        (
145            has_property(node, "type")
146            and node["type"] in ["text/ecmascript", "text/javascript"]
147            and not has_property(node, "language")
148        )
149        or (
150            has_property(node, "language")
151            and node["language"] in ["ecmascript", "javascript"]
152            and not has_property(node, "type")
153        )
154        or (not has_property(node, "type") and not has_property(node, "language"))
155    )
156
157
158def is_element(node, *conditions: str | list) -> bool:
159    """Checks if the given node is a certain element.
160
161    When providing a str it will check that the elements tag matches.
162    If a list is provided it checks that one of the conditions in the list
163    passes.
164    """
165
166    return bool(
167        node.type == "element"
168        and any(
169            bool(
170                (isinstance(condition, str) and node.tag == condition)
171                or (isinstance(condition, list) and any(node.tag == nested for nested in condition))
172            )
173            for condition in conditions
174        )
175    )
176
177
178def is_event_handler(attribute: str) -> bool:
179    """Takes a attribute name and returns true if
180    it starts with `on` and its length is `5` or more.
181    """
182    return attribute.startswith("on") and len(attribute) >= 5
183
184
185def has_property(node, attribute: str) -> bool:
186    """Check to see if an element has a certain property in properties."""
187    if node.type == "element":
188        if attribute in node.properties:
189            return True
190        return False
191    raise TypeError("Node must be an element.")
192
193
194def is_embedded(node: Element) -> bool:
195    """Check to see if an element is an embedded element.
196
197    Embedded Elements:
198
199    * audio
200    * canvas
201    * embed
202    * iframe
203    * img
204    * MathML math
205    * object
206    * picture
207    * SVG svg
208    * video
209
210    Returns:
211        True if emedded
212    """
213    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
214
215    return is_element(
216        node,
217        "audio",
218        "canvas",
219        "embed",
220        "iframe",
221        "img",
222        "math",
223        "object",
224        "picture",
225        "svg",
226        "video",
227    )
228
229
230def is_interactive(node: Element) -> bool:
231    """Check if the element is intended for user interaction.
232
233    Conditions:
234
235    * a (if the href attribute is present)
236    * audio (if the controls attribute is present)
237    * button, details, embed, iframe, img (if the usemap attribute is present)
238    * input (if the type attribute is not in the Hidden state)
239    * label, select, text, area, video (if the controls attribute is present)
240
241    Returns:
242        True if element is interactive
243    """
244
245    if is_element(node, "a"):
246        return has_property(node, "href")
247
248    if is_element(node, "input"):
249        return has_property(node, "type") and node["type"].lower() != "hidden"
250
251    if is_element(node, "img"):
252        return has_property(node, "usemap") and node["usemap"]
253
254    if is_element(node, "video"):
255        return has_property(node, "controls")
256
257    if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"):
258        return True
259
260    return False
261
262
263def is_phrasing(node: Element) -> bool:
264    """Check if a node is phrasing text according to
265    https://html.spec.whatwg.org/#phrasing-content-2.
266
267    Phrasing content is the text of the document, as well as elements that mark up that text at the
268    intra-paragraph level. Runs of phrasing content form paragraphs.
269
270    * area (if it is a descendant of a map element)
271    * link (if it is allowed in the body)
272    * meta (if the itemprop attribute is present)
273    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
274     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
275     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
276     template, textarea, time, u, var, video, wbr, text (true)
277
278    Returns:
279        True if the element is phrasing text
280    """
281
282    if isinstance(node, Text):
283        return True
284
285    if is_element(node, "area"):
286        return node.parent is not None and is_element(node.parent, "map")
287
288    if is_element(node, "meta"):
289        return has_property(node, "itemprop")
290
291    if is_element(node, "link"):
292        body_ok = [
293            "dns-prefetch",
294            "modulepreload",
295            "pingback",
296            "preconnect",
297            "prefetch",
298            "preload",
299            "prerender",
300            "stylesheet",
301        ]
302
303        return bool(
304            has_property(node, "itemprop")
305            or (
306                has_property(node, "rel")
307                and all(token.strip() in body_ok for token in node["rel"].split(" "))
308            )
309        )
310
311    if is_element(
312        node,
313        "node",
314        "map",
315        "mark",
316        "math",
317        "audio",
318        "b",
319        "bdi",
320        "bdo",
321        "br",
322        "button",
323        "canvas",
324        "cite",
325        "code",
326        "data",
327        "datalist",
328        "del",
329        "dfn",
330        "em",
331        "embed",
332        "i",
333        "iframe",
334        "img",
335        "input",
336        "ins",
337        "kbd",
338        "label",
339        "a",
340        "abbr",
341        "meter",
342        "noscript",
343        "object",
344        "output",
345        "picture",
346        "progress",
347        "q",
348        "ruby",
349        "s",
350        "samp",
351        "script",
352        "select",
353        "slot",
354        "small",
355        "span",
356        "strong",
357        "sub",
358        "sup",
359        "svg",
360        "template",
361        "textarea",
362        "time",
363        "u",
364        "var",
365        "video",
366        "wbr",
367    ):
368        return True
369
370    return False
371
372
373def blank(value: Any) -> bool:
374    """Takes any value type and returns whether it is blank/None.
375    For strings if the value is stripped and is equal to '' then it is blank.
376    Otherwise if len > 0 and is not None then not blank.
377
378    Args:
379        value (Any): The value to check if it is blank.
380
381    Returns:
382        bool: True if value is blank
383    """
384
385    if value is not None:
386        if isinstance(value, str):
387            value = value.strip()
388
389        if hasattr(value, "__len__"):
390            return len(value) == 0
391        return False
392
393    return True
26def validate(node: NODE) -> bool:
27    """Validate a node based on attributes and type."""
28
29    if hasattr(node, "children"):
30        if not hasattr(node, "type"):
31            raise AssertionError("Node should have a type")
32
33        if node.type not in ["root", "element"]:
34            raise AssertionError(
35                "Node should have a type of 'root' or 'element' to contain the 'children' attribute"
36            )
37
38        if not all(isinstance(child, NODE) for child in node.children):
39            raise AssertionError("Children must be a node type")
40
41    if hasattr(node, "properties"):
42        if hasattr(node, "type") and node.type != "element":
43            raise AssertionError("Node must be of type 'element' to contain 'properties'")
44
45        if not all(isinstance(node[prop], (int, str)) for prop in node.properties):
46            raise AssertionError("Node 'properties' must be of type 'int' or 'str'")
47
48    if hasattr(node, "value") and not isinstance(node.value, str):
49        raise AssertionError("Node 'value' must be of type 'str'")
50
51    return True

Validate a node based on attributes and type.

def parent(node: phml.core.nodes.nodes.Root | phml.core.nodes.nodes.Element) -> bool:
54def parent(node: Root | Element) -> bool:
55    """Validate a parent node based on attributes and type."""
56    if not issubclass(type(node), Parent):
57        raise AssertionError(
58            "Node must inherit from 'Parent'. 'Root' and 'Element' are most common."
59        )
60
61    if not hasattr(node, "children") or node.children is None:
62        raise AssertionError("Parent nodes should have the 'children' attribute")
63
64    if node.type == "element" and (not hasattr(node, "properties") or node.properties is None):
65        raise AssertionError("Parent element node shoudl have the 'properties' element.")

Validate a parent node based on attributes and type.

def literal(node: phml.core.nodes.nodes.Text | phml.core.nodes.nodes.Comment) -> bool:
68def literal(node: Text | Comment) -> bool:
69    """Validate a literal node based on attributes."""
70
71    if not issubclass(type(node), Literal):
72        raise AssertionError(
73            "Node must inherit from 'Literal'. 'Text' and 'Comment' are most common."
74        )
75
76    if not hasattr(node, "value") or not isinstance(node.value, str):
77        raise AssertionError("Literal nodes 'value' type should be 'str'")

Validate a literal node based on attributes.

80def generated(node: NODE) -> bool:
81    """Checks if a node has been generated. A node is concidered
82    generated if it does not have a position.
83
84    Args:
85        node (NODE): Node to check for position with.
86
87    Returns:
88        bool: Whether a node has a position or not.
89    """
90    return not hasattr(node, "position") or node.position is None

Checks if a node has been generated. A node is concidered generated if it does not have a position.

Arguments:
  • node (NODE): Node to check for position with.
Returns:

bool: Whether a node has a position or not.

def has_property(node, attribute: str) -> bool:
186def has_property(node, attribute: str) -> bool:
187    """Check to see if an element has a certain property in properties."""
188    if node.type == "element":
189        if attribute in node.properties:
190            return True
191        return False
192    raise TypeError("Node must be an element.")

Check to see if an element has a certain property in properties.

def is_heading(node) -> bool:
 93def is_heading(node) -> bool:
 94    """Check if an element is a heading."""
 95
 96    if node.type == "element":
 97        if match(r"h[1-6]", node.tag) is not None:
 98            return True
 99        return False
100    raise TypeError("Node must be an element.")

Check if an element is a heading.

def is_css_style(node) -> bool:
125def is_css_style(node) -> bool:
126    """Check if an element is a css `style` element.
127
128    Returns `true` if `node` is a `<style>` element that
129    has no `type`, an empty `type`, or `'text/css'` as its `type`.
130    """
131
132    return is_element(node, "style") and (
133        not has_property(node, "type")
134        or (has_property(node, "type") and (node["type"] == "" or node["type"] == "text/css"))
135    )

Check if an element is a css style element.

Returns true if node is a <style> element that has no type, an empty type, or 'text/css' as its type.

def is_javascript(node) -> bool:
138def is_javascript(node) -> bool:
139    """Check if an element is a javascript `script` element.
140
141    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
142    `type` and a valid JavaScript `language`, or has neither.
143    """
144    return is_element(node, "script") and (
145        (
146            has_property(node, "type")
147            and node["type"] in ["text/ecmascript", "text/javascript"]
148            and not has_property(node, "language")
149        )
150        or (
151            has_property(node, "language")
152            and node["language"] in ["ecmascript", "javascript"]
153            and not has_property(node, "type")
154        )
155        or (not has_property(node, "type") and not has_property(node, "language"))
156    )

Check if an element is a javascript script element.

Returns true if node is a <script> element that has a valid JavaScript type, has no type and a valid JavaScript language, or has neither.

def is_element(node, *conditions: str | list) -> bool:
159def is_element(node, *conditions: str | list) -> bool:
160    """Checks if the given node is a certain element.
161
162    When providing a str it will check that the elements tag matches.
163    If a list is provided it checks that one of the conditions in the list
164    passes.
165    """
166
167    return bool(
168        node.type == "element"
169        and any(
170            bool(
171                (isinstance(condition, str) and node.tag == condition)
172                or (isinstance(condition, list) and any(node.tag == nested for nested in condition))
173            )
174            for condition in conditions
175        )
176    )

Checks if the given node is a certain element.

When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.

def is_embedded(node: phml.core.nodes.nodes.Element) -> bool:
195def is_embedded(node: Element) -> bool:
196    """Check to see if an element is an embedded element.
197
198    Embedded Elements:
199
200    * audio
201    * canvas
202    * embed
203    * iframe
204    * img
205    * MathML math
206    * object
207    * picture
208    * SVG svg
209    * video
210
211    Returns:
212        True if emedded
213    """
214    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
215
216    return is_element(
217        node,
218        "audio",
219        "canvas",
220        "embed",
221        "iframe",
222        "img",
223        "math",
224        "object",
225        "picture",
226        "svg",
227        "video",
228    )

Check to see if an element is an embedded element.

Embedded Elements:

  • audio
  • canvas
  • embed
  • iframe
  • img
  • MathML math
  • object
  • picture
  • SVG svg
  • video
Returns:

True if emedded

def is_interactive(node: phml.core.nodes.nodes.Element) -> bool:
231def is_interactive(node: Element) -> bool:
232    """Check if the element is intended for user interaction.
233
234    Conditions:
235
236    * a (if the href attribute is present)
237    * audio (if the controls attribute is present)
238    * button, details, embed, iframe, img (if the usemap attribute is present)
239    * input (if the type attribute is not in the Hidden state)
240    * label, select, text, area, video (if the controls attribute is present)
241
242    Returns:
243        True if element is interactive
244    """
245
246    if is_element(node, "a"):
247        return has_property(node, "href")
248
249    if is_element(node, "input"):
250        return has_property(node, "type") and node["type"].lower() != "hidden"
251
252    if is_element(node, "img"):
253        return has_property(node, "usemap") and node["usemap"]
254
255    if is_element(node, "video"):
256        return has_property(node, "controls")
257
258    if is_element(node, "button", "details", "embed", "iframe", "label", "select", "textarea"):
259        return True
260
261    return False

Check if the element is intended for user interaction.

Conditions:

  • a (if the href attribute is present)
  • audio (if the controls attribute is present)
  • button, details, embed, iframe, img (if the usemap attribute is present)
  • input (if the type attribute is not in the Hidden state)
  • label, select, text, area, video (if the controls attribute is present)
Returns:

True if element is interactive

def is_phrasing(node: phml.core.nodes.nodes.Element) -> bool:
264def is_phrasing(node: Element) -> bool:
265    """Check if a node is phrasing text according to
266    https://html.spec.whatwg.org/#phrasing-content-2.
267
268    Phrasing content is the text of the document, as well as elements that mark up that text at the
269    intra-paragraph level. Runs of phrasing content form paragraphs.
270
271    * area (if it is a descendant of a map element)
272    * link (if it is allowed in the body)
273    * meta (if the itemprop attribute is present)
274    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
275     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
276     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
277     template, textarea, time, u, var, video, wbr, text (true)
278
279    Returns:
280        True if the element is phrasing text
281    """
282
283    if isinstance(node, Text):
284        return True
285
286    if is_element(node, "area"):
287        return node.parent is not None and is_element(node.parent, "map")
288
289    if is_element(node, "meta"):
290        return has_property(node, "itemprop")
291
292    if is_element(node, "link"):
293        body_ok = [
294            "dns-prefetch",
295            "modulepreload",
296            "pingback",
297            "preconnect",
298            "prefetch",
299            "preload",
300            "prerender",
301            "stylesheet",
302        ]
303
304        return bool(
305            has_property(node, "itemprop")
306            or (
307                has_property(node, "rel")
308                and all(token.strip() in body_ok for token in node["rel"].split(" "))
309            )
310        )
311
312    if is_element(
313        node,
314        "node",
315        "map",
316        "mark",
317        "math",
318        "audio",
319        "b",
320        "bdi",
321        "bdo",
322        "br",
323        "button",
324        "canvas",
325        "cite",
326        "code",
327        "data",
328        "datalist",
329        "del",
330        "dfn",
331        "em",
332        "embed",
333        "i",
334        "iframe",
335        "img",
336        "input",
337        "ins",
338        "kbd",
339        "label",
340        "a",
341        "abbr",
342        "meter",
343        "noscript",
344        "object",
345        "output",
346        "picture",
347        "progress",
348        "q",
349        "ruby",
350        "s",
351        "samp",
352        "script",
353        "select",
354        "slot",
355        "small",
356        "span",
357        "strong",
358        "sub",
359        "sup",
360        "svg",
361        "template",
362        "textarea",
363        "time",
364        "u",
365        "var",
366        "video",
367        "wbr",
368    ):
369        return True
370
371    return False

Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.

Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.

  • area (if it is a descendant of a map element)
  • link (if it is allowed in the body)
  • meta (if the itemprop attribute is present)
  • map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns:

True if the element is phrasing text

def is_event_handler(attribute: str) -> bool:
179def is_event_handler(attribute: str) -> bool:
180    """Takes a attribute name and returns true if
181    it starts with `on` and its length is `5` or more.
182    """
183    return attribute.startswith("on") and len(attribute) >= 5

Takes a attribute name and returns true if it starts with on and its length is 5 or more.

def blank(value: Any) -> bool:
374def blank(value: Any) -> bool:
375    """Takes any value type and returns whether it is blank/None.
376    For strings if the value is stripped and is equal to '' then it is blank.
377    Otherwise if len > 0 and is not None then not blank.
378
379    Args:
380        value (Any): The value to check if it is blank.
381
382    Returns:
383        bool: True if value is blank
384    """
385
386    if value is not None:
387        if isinstance(value, str):
388            value = value.strip()
389
390        if hasattr(value, "__len__"):
391            return len(value) == 0
392        return False
393
394    return True

Takes any value type and returns whether it is blank/None. For strings if the value is stripped and is equal to '' then it is blank. Otherwise if len > 0 and is not None then not blank.

Arguments:
  • value (Any): The value to check if it is blank.
Returns:

bool: True if value is blank