Module src.jsonid.jsonid
jsonid entry-point.
Functions
async def create_manifest(path: str) ‑> list[str]
-
Expand source code
async def create_manifest(path: str) -> list[str]: """Get a list of paths to process.""" paths = [] for root, _, files in os.walk(path): for file in files: file_path = os.path.join(root, file) logger.debug(file_path) paths.append(file_path) return paths
Get a list of paths to process.
async def identify_json(paths: list[str], binary: bool)
-
Expand source code
async def identify_json(paths: list[str], binary: bool): """Identify objects""" print("---") for path in paths: valid, data = await identify_plaintext_bytestream(path) if not valid: logger.debug("%s: is not plaintext", path) if binary: logger.warning("report on binary object...") continue if data != "": logger.debug("processing: %s", path) res = registry.matcher(data) print(f"file: {path}") print("identifiers:") for item in res: print(" ", item) print("---")
Identify objects
def main() ‑> None
-
Expand source code
def main() -> None: """Primary entry point for this script.""" parser = argparse.ArgumentParser( prog="json-id", description="proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values", epilog="for more information visit https://github.com/ffdev-info/json-id", ) parser.add_argument( "--debug", help="use debug loggng", required=False, action="store_true", ) parser.add_argument( "--path", help="file path to process", required=True, ) parser.add_argument( "--binary", help="report on binary formats as well as plaintext", required=False, action="store_true", ) parser.add_argument( "--registry", help="path to a custom registry to lead into memory replacing the default", required=False, ) parser.add_argument( "--pronom", help="return a PRONOM-centric view of the results", required=False, ) parser.add_argument( "--language", help="return results in different languages", required=False, ) args = parser.parse_args() logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO) logger.debug("debug logging is configured") if args.registry: raise NotImplementedError("custom registry is not yet available") if args.pronom: raise NotImplementedError("pronom view is not yet implemented") if args.language: raise NotImplementedError("multiple languages are not yet implemented") asyncio.run( process_data( path=args.path, binary=args.binary, ) )
Primary entry point for this script.
async def process_data(path: str, binary: bool)
-
Expand source code
async def process_data(path: str, binary: bool): """Process all objects at a given path""" logger.debug("processing: %s", path) if not os.path.exists(path): logger.error("path: '%s' does not exist", path) sys.exit(1) if os.path.isfile(path): await identify_json([path], binary) sys.exit(0) paths = await create_manifest(path) if not paths: logger.info("no files in directory: %s", path) sys.exit(1) await identify_json(paths, binary)
Process all objects at a given path