Source code for pycollect.pycollect

import inspect
import os
import re
from typing import Iterable, Optional, Set


[docs]class PythonFileCollector: """ PythonFileCollector provides method "collect" to collect files while applying exclusion patterns to files and directories. Exclusion patterns are in respect to file and directory names only, NOT taking into account the absolute nor the relative file or directory path. When not using regex patterns, a single wildcard, "*", can be used anywhere in a pattern to filter names "starting with" and/or "ending with". Also, a single exclamation mark, "!", can be used at the beginning of the pattern to negate it. These only applies when :param use_regex_patterns: is False. Notice that using regex patterns may be slower. :param use_regex_patterns: (default: False) flag to indicate whether or not to use regex to match patterns. When this flag it set to False the PythonFileCollector._WILDCARD (default: "*") character is interpreted as wildcard and patterns starting with the PythonFileCollector._NEGATION (default: "!") character are negated. :param additional_file_exclusion_patterns: (default: None) additional patterns to filter out of collection files. In addition to the PythonFileCollector._DEFAULT_FILE_EXCLUSION_PATTERNS or PythonFileCollector._DEFAULT_FILE_EXCLUSION_REGEX_PATTERNS any file that matches these patterns will be excluded from collection. :param additional_dir_exclusion_patterns: (default: None) additional patterns to filter out of collection directories. In addition to the PythonFileCollector._DEFAULT_DIR_EXCLUSION_PATTERNS or PythonFileCollector._DEFAULT_DIR_EXCLUSION_REGEX_PATTERNS any directory that matches these patterns will be excluded from collection. """ _WILDCARD = "*" _NEGATION = "!" _INIT_FILE = "__init__.py" _DEFAULT_FILE_EXCLUSION_PATTERNS = {"!*.py", ".*", "~*"} _DEFAULT_DIR_EXCLUSION_PATTERNS = { "__pycache__", "tmp", "build", "dist", "sdist", "wheelhouse", "develop-eggs", "parts", "eggs", "var", "htmlcov", "bin", "venv*", "pyvenv*", ".*", "*~", } _DEFAULT_FILE_EXCLUSION_REGEX_PATTERNS = {r"(?!(.*\.py(?!.)))", r"^\..*", r"^\~.*"} _DEFAULT_DIR_EXCLUSION_REGEX_PATTERNS = { r"^__pycache__(?!.)", r"^tmp(?!.)", r"^build(?!.)", r"^dist(?!.)", r"^sdist(?!.)", r"^wheelhouse(?!.)", r"^develop-eggs(?!.)", r"^parts(?!.)", r"^eggs(?!.)", r"^var(?!.)", r"^htmlcov(?!.)", r"^bin(?!.)", r"^venv.*", r"^pyvenv.*", r"^\..*", r".*~(?!.)", } def __init__( self, use_regex_patterns: bool = False, additional_file_exclusion_patterns: Iterable[str] = None, additional_dir_exclusion_patterns: Iterable[str] = None, ): self.enable_regex_patterns = use_regex_patterns if self.enable_regex_patterns: self.file_exclusion_patterns = ( self._DEFAULT_FILE_EXCLUSION_REGEX_PATTERNS.copy() ) self.dir_exclusion_patterns = ( self._DEFAULT_DIR_EXCLUSION_REGEX_PATTERNS.copy() ) else: self.file_exclusion_patterns = self._DEFAULT_FILE_EXCLUSION_PATTERNS.copy() self.dir_exclusion_patterns = self._DEFAULT_DIR_EXCLUSION_PATTERNS.copy() if additional_file_exclusion_patterns: self.file_exclusion_patterns.update(additional_file_exclusion_patterns) if additional_dir_exclusion_patterns: self.dir_exclusion_patterns.update(additional_dir_exclusion_patterns) @staticmethod def _get_caller_path() -> str: caller_filename = inspect.stack(2)[1] caller_abs_path = os.path.abspath(caller_filename) return os.path.dirname(caller_abs_path)
[docs] def collect( self, search_path: Optional[str] = None, recursion_limit: Optional[int] = None, follow_symlinks: bool = True, ) -> Set[os.DirEntry]: """ Method to perform Python files collection in the specified search path, respecting exclusion patterns set to the class object. :param search_path: (default: uses the caller's path) absolute or relative path from which to search for when collecting Python files. This is expected to be a directory. :param recursion_limit: (default: None) directory recursion limit. The directory indicated by the :param search_path: is considered level 0 of recursion. :param follow_symlinks: (default: True) boolean indicating whether or not to follow symbolic links when collecting Python files. :return: A set of DirEntry instances referring to each collected file is returned. """ if search_path is None: search_path = self._get_caller_path() collected_files = set() # type: Set[os.DirEntry] excluded_files = set() # type: Set[os.DirEntry] collected_dirs = set() # type: Set[os.DirEntry] excluded_dirs = set() # type: Set[os.DirEntry] for entry in os.scandir(search_path): if entry.is_file(follow_symlinks=follow_symlinks): if self._should_exclude_file(entry): excluded_files.add(entry) continue collected_files.add(entry) continue if entry.is_dir(follow_symlinks=follow_symlinks): if self._should_exclude_dir(entry): excluded_dirs.add(entry) continue collected_dirs.add(entry) continue if recursion_limit is None or recursion_limit > 0: if recursion_limit is not None: recursion_limit -= 1 for subdir in collected_dirs: if not any( entry.is_file(follow_symlinks=follow_symlinks) and self._INIT_FILE == entry.name for entry in os.scandir(subdir.path) ): continue collected_files.update( self.collect( search_path=subdir.path, recursion_limit=recursion_limit, follow_symlinks=follow_symlinks, ) ) return collected_files
def _should_exclude_file(self, entry: os.DirEntry) -> bool: filename = entry.name # type: str return any( self._matches_pattern(pattern, filename) for pattern in self.file_exclusion_patterns ) def _should_exclude_dir(self, entry: os.DirEntry) -> bool: dirname = entry.name # type: str return any( self._matches_pattern(pattern, dirname) for pattern in self.dir_exclusion_patterns ) def _matches_pattern(self, pattern: str, name: str) -> bool: if self.enable_regex_patterns: return bool(re.match(pattern, name)) negate = False if pattern.startswith(self._NEGATION): negate = True pattern = pattern[1:] splitted = pattern.split(self._WILDCARD) matches_pattern = name.startswith(splitted[0]) and name.endswith(splitted[-1]) return matches_pattern if not negate else not matches_pattern