#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
:App: badsnakes
:Purpose: The badsnakes project is designed to help detect malware in
Python projects.
The project accepts the following formats for analysis:
- Directories
- Python modules
- Python wheels
:Platform: Linux/Windows | Python 3.10+
:Developer: J Berendt
:Email: development@s3dev.uk
:Comments: n/a
:Examples:
Example for analysing a single module::
>>> from badsnakes.libs.module import Module
>>> from badsnakes.libs.reporter import ReporterModule
>>> path = '/path/to/project/module.py'
>>> # Analyse the module.
>>> m = Module(path=path)
>>> m.analyse()
>>> # Report the findings.
>>> r = ReporterModule(modules=[m])
>>> r.report()
Example for analysing multiple modules::
>>> import os
>>> from glob import glob
>>> from badsnakes.libs.module import Module
>>> from badsnakes.libs.reporter import ReporterModule
>>> modules = []
>>> paths = glob(os.path.join('/.../site-packages/pip/_internal/', '*.py'))
>>> # Call Module.analyse for each path and store each module object.
>>> for path in paths:
>>> m = Module(path=path)
>>> m.analyse()
>>> modules.append(m)
>>> # Report all findings at once.
>>> r = ReporterModule(modules=modules)
>>> r.report()
Example for analysing a Python wheel::
>>> from badsnakes.libs.collector import Collector
>>> from badsnakes.libs.module import Module
>>> from badsnakes.libs.reporter import ReporterModule
>>> modules = []
>>> path = '../dist/badsnakes-0.1.0-py3-none-any.whl'
>>> # Collect all non-binary files from thw wheel.
>>> c = Collector(paths=path)
>>> c.collect()
>>> for pkg in c.files:
>>> # Call Module.analyse for each path and store each module object.
>>> for path in pkg:
>>> # Analyse the module.
>>> m = Module(path=path)
>>> m.analyse()
>>> modules.append(m)
>>> # Report the findings.
>>> r = ReporterModule(modules=modules)
>>> r.report()
"""
# pylint: disable=wrong-import-position
import os
import sys
# Update sys.path for project/relative imports.
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
# imports
import logging
import traceback
# locals
from badsnakes.libs.argparser import argparser as ap
from badsnakes.libs.collector import Collector
from badsnakes.libs.containers import Severity
from badsnakes.libs.enums import ExCode
from badsnakes.libs.module import Module
from badsnakes.libs.reporter import ReporterModule
from badsnakes.libs.logger import Logger
from badsnakes.libs.utilities import utilities
[docs]
class BadSnakes:
"""Primary project entry-point and controller class."""
[docs]
def __init__(self):
"""BadSnakes class initialiser.
:Attrs:
- _clf: Maximum classification from all files analysed. This
is reported at the end.
- _files: List of files to be analysed. This same list is
used for all analysis types and is populated by the
:meth:`_collect_files` method.
- _modules: List of modules analysed. If logging is invoked,
this list of modules is given to the logger.
"""
self._clf = Severity.UNKNOWN # Maximum classification for all files.
self._files = None # Files to be analysed.
self._modules = [] # Collection of modules analysed.
self._collector = None # Keep the wheel collector's tmpdir alive.
ap.parse()
[docs]
def main(self):
"""Start a badsnakes analysis.
:Tasks:
- Collect files to be analysed.
- Determine if specific or generic logging should be used.
- Analyse each collected file.
- Report the overall (worst) classification, per package.
- Create a log file, if instructed by the CLI by the
``--log`` argument.
"""
file = None
try:
self._collect_files()
# Each package will have its own log file.
if len(ap.args.PATH) == len(self._files):
logging.debug('Log files will be package specific.')
for path, pkg in zip(ap.args.PATH, self._files):
self._clf = Severity.UNKNOWN # --- Reinitialise
self._modules = [] # --/
for file in pkg:
self._analyse(path=file)
self._report_worst_classification()
self._create_log(path=path)
# A single log will contain all results.
else:
logging.debug('Log files will be package generic.')
for pkg in self._files:
for file in pkg:
self._analyse(path=file)
self._report_worst_classification()
self._create_log()
# General project error handler.
except Exception: # pragma: nocover
print()
logging.critical('The following error occurred:\n\n%s\n'
'Current file: %s\n\n'
'Processing aborted.\n',
traceback.format_exc(),
file or None) # In the event of an empty package.
sys.exit(ExCode.ERR_MAIN.value)
[docs]
def _analyse(self, path: str):
"""Analyse the provided module file.
Args:
path (str): Full path to the file to be analysed.
:Tasks:
- Create a :class:`~badsnakes.libs.module.Module` object and
analyse.
- Report the findings (verbose/non-verbose).
- Set the maximum (worst) classification.
"""
logging.debug('Analysing file: %s', os.path.basename(path))
m = Module(path=path)
m.analyse()
r = ReporterModule(modules=m)
if ap.args.verbose:
r.report()
else:
r.report_classification_only()
self._clf = max(self._clf, m.classification)
self._modules.append(m)
[docs]
def _collect_files(self):
"""Collect all files to be analysed.
This method is used to populate the :attr:`_files` attribute,
which contains the files to be analysed.
:Logic:
Create an instance of the
:class:`badsnakes.libs.collector.Collector` class and call
the :meth:`~badsnakes.libs.collector.Collector.collect`
method.
The Collector class is designed to 1) identify the input
type, and 2) return the associated file(s).
The list of files returned by the collector is assigned to
the :attr:`_files` attribute.
Finally, any paths listed by the ``--exclude_dirs`` argument
are removed from the :attr:`_files` list.
This method must store the collector into a class attribute to
preserve the life of the wheel collector's temporary directory
object.
"""
self._collector = Collector(paths=ap.args.PATH)
self._collector.collect()
self._files = self._collector.files
self._exclude_directories()
[docs]
def _create_log(self, path: str='badsnakes'):
"""Create a log file if instructed via the CLI.
If the ``--log`` argument was passed to the CLI, this method will
be triggered.
Args:
path (str, optional): Path from which the log's filename is
to be derived. Defaults to 'badsnakes'.
"""
if ap.args.log:
path = utilities.derive_log_filename(path=path)
logging.debug('Log path: %s', path)
logger = Logger(path=path, modules=self._modules)
logger.write()
[docs]
def _exclude_directories(self):
"""Remove any paths starting in an ``--exclude_dirs`` path."""
# pylint: disable=consider-using-f-string
if ap.args.exclude_dirs:
files = []
for pkg in self._files:
# This is intentionally verbose to enable debug logging.
keep = utilities.exclude_dirs(source=pkg, exclude=ap.args.exclude_dirs)
files.append(keep)
logging.debug('Files excluded:')
logging.debug('%s', '\n\t '.join(map('- {}'.format, set(pkg) - set(keep))))
self._files = files
[docs]
def _report_worst_classification(self):
"""Report the worst overall classification."""
print(f'\nOverall (worst) classification: {self._clf.name}\n')
# %% Prevent from running on module import.
# Enable running as either a script (dev/debugging) or as an executable.
if __name__ == '__main__': # pragma: nocover
bs = BadSnakes()
bs.main()
else: # pragma: nocover
[docs]
def main():
"""Entry-point exposed for the executable.
The ``"badsnakes.badsnakes:main"`` value is set in
``pyproject.toml``'s ``[project.scripts]`` table as the
entry-point for the installed executable.
"""
# pylint: disable=redefined-outer-name
bs = BadSnakes()
bs.main()