Source code for pycollect.pycollect

import inspect
import os
import re
from typing import Iterable, Optional, List


[docs]class PythonFileCollector: """ PythonFileCollector class provides general configurations and utility methods for collecting Python files """ _WILDCARD = "*" _INIT_FILE = "__init__.py" _DEFAULT_FILE_INCLUSION_PATTERNS = ["*.py"] _DEFAULT_FILE_EXCLUSION_PATTERNS = [".*", "~*"] _DEFAULT_DIR_INCLUSION_PATTERNS = ["*"] _DEFAULT_DIR_EXCLUSION_PATTERNS = [ "__pycache__", "tmp", "build", "dist", "sdist", "wheelhouse", "develop-eggs", "parts", "eggs", "var", "htmlcov", "bin", "venv*", "pyvenv*", ".*", "*~", ] _DEFAULT_FILE_INCLUSION_REGEX_PATTERNS = [r".*\.py(?!.)"] _DEFAULT_FILE_EXCLUSION_REGEX_PATTERNS = [r"^\..*", r"^\~.*"] _DEFAULT_DIR_INCLUSION_REGEX_PATTERNS = [r".*"] _DEFAULT_DIR_EXCLUSION_REGEX_PATTERNS = [ r"^__pycache__(?!.)", r"^tmp(?!.)", r"^build(?!.)", r"^dist(?!.)", r"^sdist(?!.)", r"^wheelhouse(?!.)", r"^develop-eggs(?!.)", r"^parts(?!.)", r"^eggs(?!.)", r"^var(?!.)", r"^htmlcov(?!.)", r"^bin(?!.)", r"^venv.*", r"^pyvenv.*", r"^\..*", r".*~(?!.)", ] def __init__( self, enable_regex_patterns: bool = False, file_inclusion_patterns: Iterable[str] = None, file_exclusion_patterns: Iterable[str] = None, dir_inclusion_patterns: Iterable[str] = None, dir_exclusion_patterns: Iterable[str] = None, ): self._enable_regex_patterns = enable_regex_patterns self._file_inclusion_patterns = ( file_inclusion_patterns if file_inclusion_patterns is not None else self._DEFAULT_FILE_INCLUSION_PATTERNS if not self._enable_regex_patterns else self._DEFAULT_FILE_INCLUSION_REGEX_PATTERNS ) self._file_exclusion_patterns = ( file_exclusion_patterns if file_exclusion_patterns is not None else self._DEFAULT_FILE_EXCLUSION_PATTERNS if not self._enable_regex_patterns else self._DEFAULT_FILE_EXCLUSION_REGEX_PATTERNS ) self._dir_inclusion_patterns = ( dir_inclusion_patterns if dir_inclusion_patterns is not None else self._DEFAULT_DIR_INCLUSION_PATTERNS if not self._enable_regex_patterns else self._DEFAULT_DIR_INCLUSION_REGEX_PATTERNS ) self._dir_exclusion_patterns = ( dir_exclusion_patterns if dir_exclusion_patterns is not None else self._DEFAULT_DIR_EXCLUSION_PATTERNS if not self._enable_regex_patterns else self._DEFAULT_DIR_EXCLUSION_REGEX_PATTERNS ) @staticmethod def _get_caller_path() -> str: caller_filename = inspect.stack(2)[1] caller_abs_path = os.path.abspath(caller_filename) return os.path.dirname(caller_abs_path) def collect_python_files( self, search_path: Optional[str] = None, recursion_limit: Optional[int] = None, follow_symlinks: bool = True, ) -> List[os.DirEntry]: if search_path is None: search_path = self._get_caller_path() collected_files = [] # type: List[os.DirEntry] excluded_files = [] # type: List[os.DirEntry] collected_dirs = [] # type: List[os.DirEntry] excluded_dirs = [] # type: List[os.DirEntry] for entry in os.scandir(search_path): if entry.is_file(follow_symlinks=follow_symlinks): if self._should_exclude_file(entry): excluded_files.append(entry) continue collected_files.append(entry) continue if entry.is_dir(follow_symlinks=follow_symlinks): if self._should_exclude_dir(entry): excluded_dirs.append(entry) continue collected_dirs.append(entry) continue if recursion_limit is None or recursion_limit > 0: if recursion_limit is not None: recursion_limit -= 1 for subdir in collected_dirs: if not any( entry.is_file(follow_symlinks=follow_symlinks) and self._INIT_FILE == entry.name for entry in os.scandir(subdir.path) ): continue collected_files += self.collect_python_files( search_path=subdir.path, recursion_limit=recursion_limit, follow_symlinks=follow_symlinks, ) return collected_files def _should_exclude_file(self, entry: os.DirEntry) -> bool: filename = entry.name # type: str return any( self._matches_pattern(pattern, filename) for pattern in self._file_exclusion_patterns ) or all( not self._matches_pattern(pattern, filename) for pattern in self._file_inclusion_patterns ) def _should_exclude_dir(self, entry: os.DirEntry) -> bool: dirname = entry.name # type: str return any( self._matches_pattern(pattern, dirname) for pattern in self._dir_exclusion_patterns ) or all( not self._matches_pattern(pattern, dirname) for pattern in self._dir_inclusion_patterns ) def _matches_pattern(self, pattern: str, name: str) -> bool: if self._enable_regex_patterns: return bool(re.match(pattern, name)) splitted = pattern.split(self._WILDCARD) return name.startswith(splitted[0]) and name.endswith(splitted[-1])