csvpath.managers.results.result_registrar

  1import os
  2import json
  3from datetime import datetime
  4from csvpath.util.file_readers import DataFileReader
  5from csvpath.util.file_writers import DataFileWriter
  6from csvpath.util.nos import Nos
  7from ..listener import Listener
  8from ..metadata import Metadata
  9from ..registrar import Registrar
 10from .result_metadata import ResultMetadata
 11
 12
 13class ResultRegistrar(Registrar, Listener):
 14    """@private"""
 15    def __init__(self, *, csvpaths, result, result_serializer=None):
 16        # super().__init__(csvpaths, result)
 17        Registrar.__init__(self, csvpaths, result)
 18        Listener.__init__(self, csvpaths.config)
 19        self.result_serializer = result_serializer
 20        self.type_name = "result"
 21
 22    def register_start(self, mdata: Metadata) -> None:
 23        p = self.named_paths_manifest
 24        mdata.by_line = self.result.by_line
 25        mdata.manifest_path = self.manifest_path
 26        mdata.instance_index = self.result.run_index
 27        mdata.actual_data_file = self.result.actual_data_file
 28        mdata.origin_data_file = self.result.origin_data_file
 29        ri = int(self.result.run_index) if self.result.run_index else 0
 30        if ri >= 1:
 31            rs = self.result.csvpath.csvpaths.results_manager.get_named_results(
 32                self.result.paths_name
 33            )
 34            r = rs[ri - 1]
 35            mdata.preceding_instance_identity = r.identity_or_index
 36        if p is None:
 37            self.result.csvpath.csvpaths.logger.debug(
 38                "No named-paths manifest available at %s so not setting named_paths_uuid_string",
 39                self.named_paths_manifest_path,
 40            )
 41        else:
 42            mdata.named_paths_uuid_string = p["uuid"]
 43        self.distribute_update(mdata)
 44
 45    def register_complete(self, mdata: Metadata = None) -> None:
 46        #
 47        # results manager delegates the bits to the
 48        # serializer and the metadata assembly to this
 49        # registrar, so we expect it to hand us nothing
 50        # but the result object and serializer.
 51        #
 52        m = self.manifest
 53        if mdata is None:
 54            mdata = ResultMetadata(config=self.csvpaths.config)
 55        mdata.from_manifest(m)
 56        mdata.archive_name = self.archive_name
 57        mdata.named_results_name = self.result.paths_name
 58        mdata.run = self.result_serializer.get_run_dir_name_from_datetime(
 59            self.result.run_time
 60        )
 61        mdata.by_line = self.result.by_line
 62        mdata.source_mode_preceding = self.result.source_mode_preceding
 63        mdata.run_home = self.result.run_dir
 64        mdata.instance_home = self.result.instance_dir
 65        mdata.instance_identity = self.result.identity_or_index
 66        mdata.instance_index = self.result.run_index
 67        mdata.named_file_name = self.result.file_name
 68        mdata.input_data_file = self.result.file_name
 69        mdata.file_fingerprints = self.file_fingerprints
 70        mdata.file_count = len(mdata.file_fingerprints)
 71        mdata.error_count = self.result.errors_count
 72        mdata.valid = self.result.csvpath.is_valid
 73        mdata.completed = self.completed
 74        mdata.files_expected = self.all_expected_files
 75        if self.result.csvpath.transfers:
 76            tpaths = self.result.csvpath.csvpaths.results_manager.transfer_paths(
 77                self.result
 78            )
 79            mdata.transfers = tpaths
 80        mdata.actual_data_file = self.result.actual_data_file
 81        mdata.origin_data_file = self.result.origin_data_file
 82        ri = int(self.result.run_index) if self.result.run_index else 0
 83        if ri >= 1:
 84            rs = self.result.csvpath.csvpaths.results_manager.get_named_results(
 85                self.result.paths_name
 86            )
 87            r = rs[ri - 1]
 88            mdata.preceding_instance_identity = r.identity_or_index
 89        self.distribute_update(mdata)
 90
 91    def metadata_update(self, mdata: Metadata) -> None:
 92        m = {}
 93        if mdata.time is None:
 94            raise ValueError("Time cannot be None")
 95        m["time"] = mdata.time_string
 96        m["uuid"] = mdata.uuid_string
 97        m["serial"] = mdata.by_line is False
 98        m["archive_name"] = mdata.archive_name
 99        m["named_results_name"] = mdata.named_results_name
100        m["named_paths_uuid"] = mdata.named_paths_uuid_string
101        m["run"] = mdata.run
102        m["run_home"] = mdata.run_home
103        m["instance_identity"] = mdata.instance_identity
104        m["instance_index"] = mdata.instance_index
105        m["instance_home"] = mdata.instance_home
106        m["file_fingerprints"] = mdata.file_fingerprints
107        m["files_expected"] = mdata.files_expected
108        m["file_count"] = mdata.file_count
109        m["valid"] = mdata.valid
110        m["completed"] = mdata.completed
111        m["source_mode_preceding"] = mdata.source_mode_preceding
112        if mdata.source_mode_preceding:
113            m["preceding_instance_identity"] = mdata.preceding_instance_identity
114        m["actual_data_file"] = mdata.actual_data_file
115        m["origin_data_file"] = mdata.origin_data_file
116        m["named_file_name"] = mdata.named_file_name
117        if mdata.transfers:
118            m["transfers"] = mdata.transfers
119        mp = self.manifest_path
120        m["manifest_path"] = mp
121        with DataFileWriter(path=mp) as file:
122            json.dump(m, file.sink, indent=2)
123
124    @property
125    def archive_name(self) -> str:
126        ap = self.result.csvpath.config.archive_path
127        sep = Nos(ap).sep
128        i = ap.rfind(sep)
129        if i > 0:
130            return ap[i + 1 :]
131        return ap
132
133    # gets the manifest for the named_paths as a whole
134    @property
135    def named_paths_manifest(self) -> dict | None:
136        if Nos(self.named_paths_manifest_path).exists():
137            with DataFileReader(self.named_paths_manifest_path) as file:
138                d = json.load(file.source)
139                return d
140        return None
141
142    # gets the manifest for the named_paths as a whole from the run dir
143    @property
144    def named_paths_manifest_path(self) -> str:
145        return os.path.join(self.result.run_dir, "manifest.json")
146
147    #
148    # switch to use ResultManifestReader.manifest
149    #
150    @property
151    def manifest(self) -> dict | None:
152        mp = self.manifest_path
153        if not Nos(mp).exists():
154            with DataFileWriter(path=self.manifest_path) as file:
155                json.dump({}, file.sink, indent=2)
156                return {}
157        with DataFileReader(self.manifest_path) as file:
158            d = json.load(file.source)
159            return d
160        return None
161
162    @property
163    def manifest_path(self) -> str:
164        h = os.path.join(self.result_path, "manifest.json")
165        return h
166
167    @property
168    def result_path(self) -> str:
169        rdir = self.result_serializer.get_instance_dir(
170            run_dir=self.result.run_dir, identity=self.result.identity_or_index
171        )
172        if not Nos(rdir).exists():
173            Nos(rdir).makedir()
174        return rdir
175
176    @property
177    def completed(self) -> bool:
178        return self.result.csvpath.completed
179
180    @property
181    def all_expected_files(self) -> bool:
182        #
183        # we can not have any/all of data.csv, unmatched.csv, and printouts.txt without
184        # it necessarily being a failure mode. but we can require them as a matter of
185        # content validation.
186        #
187        if (
188            self.result.csvpath.all_expected_files is None
189            or len(self.result.csvpath.all_expected_files) == 0
190        ):
191            if not self.has_file("meta.json"):
192                return False
193            if not self.has_file("errors.json"):
194                return False
195            if not self.has_file("vars.json"):
196                return False
197            return True
198        for t in self.result.csvpath.all_expected_files:
199            t = t.strip()
200            if t.startswith("no-data"):
201                if self.has_file("data.csv"):
202                    return False
203            if t.startswith("data") or t.startswith("all"):
204                if not self.has_file("data.csv"):
205                    return False
206            if t.startswith("no-unmatched"):
207                if self.has_file("unmatched.csv"):
208                    return False
209            if t.startswith("unmatched") or t.startswith("all"):
210                if not self.has_file("unmatched.csv"):
211                    return False
212            if t.startswith("no-printouts"):
213                if self.has_file("printouts.txt"):
214                    return False
215            if t.startswith("printouts") or t.startswith("all"):
216                if not self.has_file("printouts.txt"):
217                    return False
218            if not self.has_file("meta.json"):
219                return False
220            if not self.has_file("errors.json"):
221                return False
222            if not self.has_file("vars.json"):
223                return False
224        return True
225
226    def has_file(self, t: str) -> bool:
227        r = self.result_path
228        return Nos(os.path.join(r, t)).exists()
229
230    @property
231    def file_fingerprints(self) -> dict[str]:
232        r = self.result_path
233        fps = {}
234        for t in [
235            "data.csv",
236            "meta.json",
237            "unmatched.csv",
238            "printouts.txt",
239            "errors.json",
240            "vars.json",
241        ]:
242            f = self._fingerprint(os.path.join(r, t))
243            if f is None:
244                continue
245            fps[t] = f
246        return fps
247
248    def _fingerprint(self, path) -> str:
249        if path.find("://") == -1 and not path.startswith("/"):
250            path = f"{os.getcwd()}/{path}"
251        if Nos(path).exists():
252            with DataFileReader(path) as f:
253                h = f.fingerprint()
254                return h
255        return None