Module src.jsonid.jsonid
jsonid entry-point.
Functions
async def create_manifest(path: str) ‑> list[str]
-
Expand source code
async def create_manifest(path: str) -> list[str]: """Get a list of paths to process.""" paths = [] for root, _, files in os.walk(path): for file in files: file_path = os.path.join(root, file) logger.debug(file_path) paths.append(file_path) return paths
Get a list of paths to process.
def decode(content: str)
-
Expand source code
def decode(content: str): """Decode the given content stream.""" data = "" try: data = json.loads(content) except json.decoder.JSONDecodeError as err: logger.debug("can't process: %s", err) return False, None return True, data
Decode the given content stream.
def get_date_time() ‑> str
-
Expand source code
def get_date_time() -> str: """Return a datetime string for now(),""" return datetime.datetime.now(timezone.utc).strftime(version.UTC_TIME_FORMAT)
Return a datetime string for now(),
async def identify_json(paths: list[str], binary: bool)
-
Expand source code
async def identify_json(paths: list[str], binary: bool): """Identify objects""" for idx, path in enumerate(paths): valid, data = await identify_plaintext_bytestream(path) if not valid: logger.debug("%s: is not plaintext", path) if binary: logger.warning("report on binary object...") continue if data != "": logger.debug("processing: %s", path) if idx == 0: print("---") print(version_header()) print("---") res = registry.matcher(data) print(f"file: {path}") print("identifiers:") for item in res: print(" ", item) print("---")
Identify objects
def main() ‑> None
-
Expand source code
def main() -> None: """Primary entry point for this script.""" parser = argparse.ArgumentParser( prog="json-id", description="proof-of-concept identifier for JSON objects on disk based on identifying valid objects and their key-values", epilog="for more information visit https://github.com/ffdev-info/json-id", ) parser.add_argument( "--debug", help="use debug loggng", required=False, action="store_true", ) parser.add_argument( "--path", help="file path to process", required=True, ) parser.add_argument( "--binary", help="report on binary formats as well as plaintext", required=False, action="store_true", ) parser.add_argument( "--registry", help="path to a custom registry to lead into memory replacing the default", required=False, ) parser.add_argument( "--pronom", help="return a PRONOM-centric view of the results", required=False, ) parser.add_argument( "--export", help="export the embedded registry", required=False, ) parser.add_argument( "--language", help="return results in different languages", required=False, ) args = parser.parse_args() logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO) logger.debug("debug logging is configured") if args.registry: raise NotImplementedError("custom registry is not yet available") if args.pronom: raise NotImplementedError("pronom view is not yet implemented") if args.language: raise NotImplementedError("multiple languages are not yet implemented") if args.export: raise NotImplementedError("registry export is not yet implemented") asyncio.run( process_data( path=args.path, binary=args.binary, ) )
Primary entry point for this script.
async def process_data(path: str, binary: bool)
-
Expand source code
async def process_data(path: str, binary: bool): """Process all objects at a given path""" logger.debug("processing: %s", path) if not os.path.exists(path): logger.error("path: '%s' does not exist", path) sys.exit(1) if os.path.isfile(path): await identify_json([path], binary) sys.exit(0) paths = await create_manifest(path) if not paths: logger.info("no files in directory: %s", path) sys.exit(1) await identify_json(paths, binary)
Process all objects at a given path
def version_header() ‑> str
-
Expand source code
def version_header() -> str: """Output a formatted version header.""" return f"""jsonid: {version.get_version()} scandate: {get_date_time()}""".strip()
Output a formatted version header.