csvpath.managers.paths.paths_registrar

  1import os
  2import json
  3from csvpath.util.exceptions import InputException
  4from csvpath.util.file_readers import DataFileReader
  5from csvpath.util.file_writers import DataFileWriter
  6from csvpath.util.nos import Nos
  7from .paths_metadata import PathsMetadata
  8from ..listener import Listener
  9from ..metadata import Metadata
 10from ..registrar import Registrar
 11
 12
 13class PathsRegistrar(Registrar, Listener):
 14    """@private"""
 15    def __init__(self, csvpaths):
 16        # super().__init__(csvpaths)
 17        Registrar.__init__(self, csvpaths)
 18        Listener.__init__(self, csvpaths.config)
 19        self._manager = None
 20        self.type_name = "paths"
 21
 22    @property
 23    def manager(self):
 24        if self._manager is None:
 25            self._manager = self.csvpaths.paths_manager
 26        return self._manager
 27
 28    def get_manifest(self, mpath) -> list:
 29        with DataFileReader(mpath) as file:
 30            j = json.load(file.source)
 31            return j
 32
 33    def register_complete(self, mdata: Metadata) -> None:
 34        mdata.manifest_path = self.manifest_path(name=mdata.named_paths_name)
 35        mdata.fingerprint = self._fingerprint(name=mdata.named_paths_name)
 36        self.distribute_update(mdata)
 37
 38    def update_manifest_if(self, *, group_file_path, name, paths=None):
 39        #
 40        # if we find that the current group file does not have the same
 41        # fingerprint as the most recent on file, we register a new version.
 42        # this is not the expected way things work, but if someone makes an
 43        # update in place, without re-adding the named-paths, this is what
 44        # happens.
 45        #
 46        f = self._fingerprint(group_file_path=group_file_path)
 47        mpath = self.manifest_path(name)
 48        cf = self._most_recent_fingerprint(mpath)
 49        if f != cf:
 50            mdata = PathsMetadata()
 51            mdata.archive_name = self.csvpaths.config.archive_name
 52            mdata.named_paths_name = name
 53            #
 54            # why two of these? :/
 55            #
 56            # mdata.named_paths_file = group_file_path
 57            mdata.group_file_path = group_file_path
 58            mdata.named_paths = paths
 59            mdata.named_paths_identities = [
 60                t[0] for t in self.manager.get_identified_paths_in(name)
 61            ]
 62            if paths:
 63                mdata.named_paths_count = len(paths)
 64            mdata.manifest_path = mpath
 65            mdata.fingerprint = f
 66            self.distribute_update(mdata)
 67        else:
 68            #
 69            # leave as info so nobody has to dig to see why no update
 70            #
 71            self.csvpaths.logger.info(
 72                "Fingerprints of named-paths %s match, as expected; no need to fire update event",
 73                name,
 74            )
 75
 76    def metadata_update(self, mdata: Metadata) -> None:
 77        jdata = self.get_manifest(mdata.manifest_path)
 78        if len(jdata) == 0 or jdata[len(jdata) - 1]["fingerprint"] != mdata.fingerprint:
 79            m = {}
 80            #
 81            # the inputs dir may be outside the archive dir, as by default, or
 82            # inside. regardless, the point is that archive is the namespace.
 83            # the inputs dirs are intended to stage assets for the archive
 84            # regardless of if they are located in the archive or not.
 85            #
 86            m["archive_name"] = mdata.archive_name
 87            m["named_paths_name"] = mdata.named_paths_name
 88            m["named_paths_home"] = mdata.named_paths_home
 89            m["group_file_path"] = mdata.group_file_path
 90            if mdata.source_path is not None:
 91                m["source_path"] = mdata.source_path
 92            m["named_paths"] = mdata.named_paths
 93            m["named_paths_identities"] = mdata.named_paths_identities
 94            m["named_paths_count"] = mdata.named_paths_count
 95            m["fingerprint"] = mdata.fingerprint
 96            m["time"] = mdata.time_string
 97            if mdata.time_started is not None:
 98                m["time_started"] = mdata.time_started_string
 99            if mdata.time_completed is not None:
100                m["time_completed"] = mdata.time_completed_string
101            m["uuid"] = mdata.uuid_string
102            m["manifest_path"] = mdata.manifest_path
103            jdata.append(m)
104            with DataFileWriter(path=mdata.manifest_path) as file:
105                json.dump(jdata, file.sink, indent=2)
106        else:
107            #
108            # leave as info so nobody has to dig to see why no update
109            #
110            self.csvpaths.logger.info(
111                "Fingerprint of named-paths file for %s matches the manifest; no need to update",
112                mdata.named_paths_name,
113            )
114
115    def manifest_path(self, name: str) -> None:
116        nhome = self.manager.named_paths_home(name)
117        mf = os.path.join(nhome, "manifest.json")
118        if not Nos(mf).exists():
119            with DataFileWriter(path=mf) as file:
120                file.append("[]")
121        return mf
122
123    def _most_recent_fingerprint(self, manifest_path: str) -> str:
124        jdata = self.get_manifest(manifest_path)
125        if len(jdata) == 0:
126            return None
127        return jdata[len(jdata) - 1]["fingerprint"]
128
129    def _simple_name(self, path) -> str:
130        i = path.rfind(Nos(path).sep)
131        sname = None
132        if i == -1:
133            sname = path
134        else:
135            sname = path[i + 1 :]
136        return sname
137
138    def _fingerprint(self, *, name=None, group_file_path=None) -> str:
139        if group_file_path is None and name is not None:
140            home = self.manager.named_paths_home(name)
141            group_file_path = os.path.join(home, "group.csvpaths")
142        elif group_file_path is None and name is None:
143            raise InputException(
144                "Either the named-paths name or the path to the group file must be provided"
145            )
146        if Nos(group_file_path).exists():
147            with DataFileReader(group_file_path) as reader:
148                h = reader.fingerprint()
149                return h
150        return None