Source code for badsnakes.libs.parser

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
:Purpose:   This module provides the functionality for parsing a module
            into an abstract syntax tree for analysis.

            The primary parsing work is carried out by the builtin
            :func:`ast.parse` method.

:Platform:  Linux/Windows | Python 3.10+
:Developer: J Berendt
:Email:     development@s3dev.uk

:Comments:  n/a

:Example:
    Example code use::

        >>> from badsnakes.libs.parse import Parser

        >>> p = Parser()
        >>> p.parse(path='hello.py')

        # Access the abstract syntax tree.
        >>> p.ast_
        <ast.Module at 0x123456789012>

        # Access the code's text stream.
        >>> p.code
        <_io.StringIO at 0x123456789000>

"""

import ast
import io
import logging
import os

logger = logging.getLogger(__name__)


[docs] class Parser: """Using the ``ast`` built-in, parse a module's code into its various elements. AST elements which are used for code analysis are: - **Arguments**: Arguments which are passed into function calls. - Generally used to detect base64 strings (or the like) being passed into functions. - **Assignments**: Generally used to detect unusually long strings. - **Attributes**: Used to detect access to modules which are generally used for suspicious activity. - **Function calls**: Used to detect calls to functions which may be suspicious. - **Function declarations**: Used to detect unusual (obfuscated) function names in the module. - **Imports**: Used for capturing a module's import statements (or the lack thereof). - **Strings**: Used to capture the strings used in a module. """ __slots__ = ('_ast', '_code', '_path') def __init__(self): """Module parsing class initialiser.""" self._ast = None # The ast.Module object. self._code = None # Module code as an io.StringIO object. self._path = None # Path to the module being parsed. @property def ast_(self) -> ast.Module: """Public accessor to the module's abstract syntax tree.""" return self._ast @property def code(self) -> io.StringIO: """Public accessor to the code as a text stream.""" return self._code @property def path(self) -> str: """Public accessor to the module's file path.""" return self._path
[docs] def display_syntax_tree(self): """Display the syntax tree as parsed by ``ast``.""" print(ast.dump(self._ast, indent=4))
[docs] def parse(self, path: str): """Parse a module into an abstract syntax tree. Additionally, a the code itself is stored into the :attr:`_code` attribute for additional analysis as an ``_io.StringIO`` text stream object. Args: path (str): Full path to the module. """ self._path = os.path.realpath(path) try: with open(path, 'r', encoding='utf-8') as f: data = f.read() self._code = io.StringIO(data) self._ast = ast.parse(data) except SyntaxError: logging.info('Not a Python code module, cannot parse: %s', path)
[docs] def rewind(self): """Rewind the code stream to the beginning.""" self._code.seek(0)