#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
:Purpose: This module provides file collection functionality to the
project.
Specifically, this module is called by
:class:`badsnakes.badsnakes.BadSnakes.main` to populate the
'files list' which holds all files to be analysed.
The CLI argument ``PATH`` is passed into this module, which
then traverses either the list of files, the directory or
extracts the wheel, in efforts to determine the files which
should be analysed. These files are passed back to the caller
via the :attr:`files` property.
:Platform: Linux/Windows | Python 3.10+
:Developer: J Berendt
:Email: development@s3dev.uk
:Comments: n/a
:Examples:
Collect plain-text files from a given *directory*::
>>> from badsnakes.libs.collector import Collector
>>> c = Collector(paths=['/path/to/files'])
>>> c.collect()
>>> c.files
[['/path/to/files/project.py',
'/path/to/files/script.sh']]
Collect plain-text files from a Python *wheel*::
>>> from badsnakes.libs.collector import Collector
>>> c = Collector(paths=['/path/to/project-0.7.3-py3-none-any.whl'])
>>> c.collect()
>>> c.files
[['/tmp/tmpqnm6yka2/project/module00.py',
'/tmp/tmpqnm6yka2/project/module01.py',
'/tmp/tmpqnm6yka2/project/module02.py',
...,
'/tmp/tmpqnm6yka2/project/script.sh',
'/tmp/tmpqnm6yka2/project/file.txt',
...,
'/tmp/tmpqnm6yka2/project/module08.py',
'/tmp/tmpqnm6yka2/project/module09.py',
'/tmp/tmpqnm6yka2/project/module10.py']]
"""
# pylint: disable=import-error
import logging
import os
import re
import tempfile
import zipfile
from glob import glob
# locals
from badsnakes.libs.config import systemcfg
from badsnakes.libs.utilities import utilities
[docs]
class MixedTypesError(Exception):
"""Custom error class raised for mixed ``PATH`` type errors."""
[docs]
class _CollectorBase:
"""Private base class providing file collection functionality.
Args:
path (str): Full path to the module, directory or wheel for
collection.
"""
def __init__(self, path: str):
"""Collector base class initialiser."""
self._path = path
self._files = []
@property
def files(self) -> list:
"""Accessor to the list of collected files."""
return self._files
[docs]
def collect(self):
"""Collect all files for this class-type."""
# Dummy method to be overridden by the specialising class.
[docs]
class _CollectorDirectory(_CollectorBase):
"""Collect all files for analysis from the given directory.
This *private* class is not part of the public interface. Please call the
:class:`Collector` class instead.
"""
[docs]
def collect(self, path: str=None):
"""Collect all files for this class-type.
Args:
path (str, optional): Directory path. This argument was
originally implemented for use by :class:`_CollectorWheel`
to enable directory traversal using existing logic.
Defaults to None.
:Logic:
1) Using ``glob.glob`` recursively, all files (including
hidden files) are collected.
2) Next, using ``filter`` remove any files which match the
exclusion pattern and are not plain-text. See *Tip* below.
3) Map ``os.path.realpath`` to all files to expand the
filepaths.
.. tip::
The excluded directories are maintained by the list in
``config.toml`` under the ``system.exclude_dirs`` key.
"""
path = path if path else self._path
exclude = systemcfg['exclude_dirs']
rexp = re.compile(f"({'|'.join(exclude)})")
files = glob(os.path.join(path, '**'), include_hidden=True, recursive=True)
files_ = filter(lambda x: (not rexp.search(x)) and utilities.istext(x), files)
self._files = list(map(os.path.realpath, files_))
[docs]
class _CollectorWheel(_CollectorBase):
"""Collect all files for analysis from a Python wheel.
This *private* class is not part of the public interface. Please call the
:class:`Collector` class instead.
Args:
path (str): Full path to the wheel file.
"""
def __init__(self, path: str):
"""Wheel collector class initialiser."""
super().__init__(path=path)
self._tmpdir = None # Used to keep the tmpdir object alive.
@property
def tmpdir(self) -> tempfile.TemporaryDirectory:
"""Accessor to the temporary directory object."""
return self._tmpdir
[docs]
def collect(self):
"""Unzip a wheel file and collect files.
:Logic:
1) Create a temporary directory object (using ``tempfile``).
2) Using ``zipfile``, unzip the wheel into the temporary
directory.
3) Create an instance of the :class:`_CollectorDirectory`
class and pass the path to the temp directory into the
class for file collection.
4) Store the list of collected files into the :attr:`_files`
attribute.
:Temp Directory:
The :class:`tempfile.TemporaryDirectory` object created by
this method is *not* explicitly closed, as the directory must
exist for analysing the files. Therefore, the temp directory
is removed when the ``tmpdir`` object has been destroyed,
generally on program completion.
For this reason, the object must be kept 'alive' in the class
instance, and therefore *cannot* be a local variable. To keep
the object alive, the class' instance of the temp directory
object is appended to a list in the parent class.
"""
# pylint: disable=consider-using-f-string
# pylint: disable=consider-using-with
self._tmpdir = tempfile.TemporaryDirectory() # No with to keep the object alive.
with zipfile.ZipFile(file=self._path, mode='r') as zf:
for f in zf.filelist:
zf.extract(member=f, path=self._tmpdir.name)
logging.debug('Unpacked wheel files (from %s):', self._tmpdir.name)
logging.debug('%s', '\n\t '.join(map('- {}'.format,
glob(os.path.join(self._tmpdir.name, '**'),
recursive=True,
include_hidden=True))))
_c = _CollectorDirectory(path=None)
_c.collect(path=self._tmpdir.name)
self._files = _c.files
[docs]
class Collector:
"""Primary file collection interface class.
Args:
paths (list): A list of file paths or directories from the
argument parser.
Note:
On instantiation, all elements in the ``paths`` list argument are
expanded to their realpath and tested to ensure they exist.
"""
def __init__(self, paths: list):
"""File collector class initialiser."""
self._paths = paths if isinstance(paths, (list, tuple)) else [paths]
self._paths = list(map(os.path.realpath, self._paths))
self._files = [] # Files to be processed.
self._tmpdirs = [] # Used to keep the wheel's temp dir object alive.
self._checks()
@property
def files(self) -> list:
"""Accessor to the list of Python files to be analysed.
Note:
This property is a *list of lists*.
Each outer list represents a wheel or a directory, with each
inner list representing the files contained therein.
"""
return self._files
[docs]
def collect(self):
"""Collect files for analysis from the provided paths.
:Criteria:
Using the private :meth:`_identify` method, the file
collection is routed to the appropriate file collector based
on the type of path provided to the ``paths`` argument on
instantiation.
- **Directory**: All paths in the :attr:`_paths` attribute
must be directories.
- **Module**: All paths in the :attr:`_paths` attribute must
be plain-text files.
- **Wheel**: All paths in the :attr:`_paths` attributes must
be Python wheels, or zip files.
Only files of the same type (directory, module or wheel) can
be collected at the same time, otherwise a ``ValueError`` is
raised.
Raises:
MixedTypesError: Raised if the :attr:`_paths` attribute contains
a mix of the types listed above.
"""
match self._identify():
case 'dir':
self._collect_from_directory()
case 'modules':
self._collect_from_files()
case 'wheel':
self._collect_from_wheel()
case _:
msg = ('Invalid file types (or mix of file types) provided. '
'All files provided must be of the same type (module, wheel or directory).')
# raise ValueError(msg)
raise MixedTypesError(msg)
[docs]
def _checks(self):
"""Perform pre-collection checks.
:Checks:
- All files exist.
Raises:
FileNotFoundError: Raised if any file in ``paths`` does not
exist.
"""
for p in self._paths:
if not os.path.exists(p):
raise FileNotFoundError(f'File not found: {p}')
[docs]
def _collect_from_directory(self):
"""Collect all plain-text files from a directory.
Before this method is called, all paths are tested to ensure
they are directories.
"""
logging.debug('Collecting files from a directory ...')
for dir_ in self._paths:
c = _CollectorDirectory(path=dir_)
c.collect()
self._files.append(c.files)
[docs]
def _collect_from_files(self):
"""Collect all plain-text files.
As the realpath conversion and file exists check have already
been performed, this method can simply append the :attr:`_paths`
argument to :attr:`_files`, for the caller's use.
"""
logging.debug('Collecting files from modules ...')
self._files.append(self._paths)
[docs]
def _collect_from_wheel(self):
"""Collect all plain-text files from wheels.
Before this method is called, all paths are tested to ensure
they are wheels (or .zip files).
"""
logging.debug('Collecting files from wheels ...')
for wheel in self._paths:
c = _CollectorWheel(path=wheel)
c.collect()
self._files.append(c.files)
self._tmpdirs.append(c.tmpdir) # Keep the tmpdir object alive.
[docs]
def _identify(self) -> str:
"""Identify the type of collection to take place.
Returns:
str: One of the following strings are returned, based on the
content of the ``paths`` argument:
- Directory: 'dir'
- Python modules: 'modules'
- Wheel: 'wheel'
- Anything else: 'invalid'
"""
# These tests are ordered by fastest to slowest.
if self._isdir():
return 'dir'
if self._iswheel():
return 'wheel'
if self._istext():
return 'modules'
return 'invalid'
[docs]
def _isdir(self) -> bool:
"""Test if *all* elements of ``paths`` are directories.
Returns:
bool: True if *all* paths are directories, otherwise False.
"""
return all(map(os.path.isdir, self._paths))
[docs]
def _istext(self) -> bool:
"""Test if *all* elements of ``paths`` are plain-text files.
Returns:
bool: True if *all* elements of ``paths`` are plain-text
files, otherwise False.
"""
# return not any(map(utilities.isbinary, self._paths))
return all(map(utilities.istext, self._paths))
[docs]
def _iswheel(self) -> bool:
"""Test if *all* elements of ``paths`` are Python wheels.
Note:
A file is tested as a wheel by checking the first four bytes
of the file itself, *not* using the file extension. As such
a ``.zip`` file will pass this test as well.
Returns:
bool: True if *all* elements of ``paths`` are Python wheels
(or ZIP archives), otherwise False.
"""
if all(map(os.path.isfile, self._paths)): # Required to raise mixed-types error.
return all(map(utilities.iszip, self._paths))
return False