csvpath.managers.results.results_registrar

  1import os
  2from datetime import datetime, timezone
  3import json
  4import time
  5from csvpath.util.nos import Nos
  6from csvpath.util.file_info import FileInfo
  7from csvpath.util.exceptions import FileException
  8from csvpath.util.file_readers import DataFileReader
  9from csvpath.util.file_writers import DataFileWriter
 10from .result import Result
 11from .result_serializer import ResultSerializer
 12from .result_registrar import ResultRegistrar
 13from .results_metadata import ResultsMetadata
 14from ..run.run_metadata import RunMetadata
 15from ..registrar import Registrar
 16from ..listener import Listener
 17from ..metadata import Metadata
 18
 19
 20class ResultsRegistrar(Registrar, Listener):
 21    """@private"""
 22    COMPLETE = "complete"
 23
 24    def __init__(
 25        self, *, csvpaths, run_dir: str, pathsname: str, results: list[Result] = None
 26    ) -> None:
 27        # super().__init__(csvpaths=csvpaths)
 28        Registrar.__init__(self, csvpaths)
 29        Listener.__init__(self, csvpaths.config)
 30        self.pathsname = pathsname
 31        self.run_dir = run_dir
 32        self.results = results
 33        self.type_name = "results"
 34
 35    def register_start(self, mdata: ResultsMetadata) -> None:
 36        mdata.status = "start"
 37        mdata.manifest_path = self.manifest_path
 38        filename = mdata.named_file_name
 39        fingerprint = self.csvpaths.file_manager.get_fingerprint_for_name(filename)
 40        filepath = self.csvpaths.file_manager.get_named_file(filename)
 41        ffingerprint = self._fingerprint_file(filepath)
 42        mdata.named_file_fingerprint = ffingerprint
 43        if self.results and len(self.results) > 0:
 44            mdata.by_line = self.results[0].by_line
 45        mdata.named_file_fingerprint_on_file = fingerprint
 46        mdata.named_file_path = filepath
 47        mdata.named_file_size = self._size(filepath)
 48        mdata.named_file_last_change = self._last_change(filepath)
 49        self.distribute_update(mdata)
 50        # after we distribute the update
 51        # if we see a fingerprint mismatch we need to log it
 52        # and maybe blow up
 53        if mdata.named_file_fingerprint and mdata.named_file_fingerprint_on_file:
 54            if mdata.named_file_fingerprint != mdata.named_file_fingerprint_on_file:
 55                self.csvpaths.logger.warning(
 56                    "fingerprints of input file %s do not agree: orig:%s != current:%s",
 57                    mdata.named_file_path,
 58                    mdata.named_file_fingerprint,
 59                    mdata.named_file_fingerprint_on_file,
 60                )
 61            houf = self.csvpaths.config.halt_on_unmatched_file_fingerprints()
 62            if (
 63                houf is True
 64                and mdata.named_file_fingerprint != mdata.named_file_fingerprint_on_file
 65            ):
 66                raise FileException(
 67                    f"""File was modified since being registered.
 68                    New {mdata.named_file_fingerprint} does not equal
 69                    on-file {mdata.named_file_fingerprint_on_file}.
 70                    See manifest for {mdata.named_file_path} at {mdata.time}.
 71                    Processing halted."""
 72                )
 73
 74    def register_complete(self, mdata) -> None:
 75        #
 76        # load what's already in the manifest
 77        #
 78        m = self.manifest
 79        mdata.from_manifest(m)
 80        if self.results and len(self.results) > 0:
 81            mdata.by_line = self.results[0].by_line
 82        mdata.set_time_completed()
 83        mdata.status = ResultsRegistrar.COMPLETE
 84        mdata.all_completed = self.all_completed()
 85        mdata.all_valid = self.all_valid()
 86        mdata.error_count = self.error_count()
 87        mdata.all_expected_files = self.all_expected_files()
 88        mdata.manifest_path = self.manifest_path
 89        self.distribute_update(mdata)
 90
 91    def metadata_update(self, mdata: Metadata) -> None:
 92        m = {}
 93        m["time"] = mdata.time_string
 94        m["uuid"] = mdata.uuid_string
 95        m["serial"] = mdata.by_line is False
 96        if mdata.time_completed:
 97            m["time_completed"] = mdata.time_completed_string
 98            m["all_completed"] = mdata.all_completed
 99            m["all_valid"] = mdata.all_valid
100            m["error_count"] = mdata.error_count
101            m["all_expected_files"] = mdata.all_expected_files
102        m["status"] = mdata.status
103        m["run_home"] = mdata.run_home
104        m["named_results_name"] = mdata.named_results_name
105        m["named_paths_name"] = mdata.named_paths_name
106        m["named_file_name"] = mdata.named_file_name
107        m["named_file_path"] = mdata.named_file_path
108        m["named_file_size"] = mdata.named_file_size
109        m["named_file_last_change"] = mdata.named_file_last_change
110        m["named_file_fingerprint"] = mdata.named_file_fingerprint
111        m["named_file_fingerprint_on_file"] = mdata.named_file_fingerprint_on_file
112        m["hostname"] = mdata.hostname
113        m["username"] = mdata.username
114        m["ip_address"] = mdata.ip_address
115        mp = mdata.manifest_path
116        m["manifest_path"] = mp
117        with DataFileWriter(path=mp) as file:
118            json.dump(m, file.sink, indent=2)
119
120    def _fingerprint_file(self, path) -> str:
121        with DataFileReader(path) as f:
122            h = f.fingerprint()
123        return h
124
125    def _size(self, path) -> str:
126        try:
127            fi = FileInfo.info(path)
128            return fi["bytes"]
129        except FileNotFoundError:
130            return 0
131
132    def _last_change(self, path) -> str:
133        try:
134            fi = FileInfo.info(path)
135            return fi["last_mod"]
136        except FileNotFoundError:
137            return -1
138
139    def all_valid(self) -> bool:
140        for r in self.results:
141            if not r.csvpath.is_valid:
142                return False
143        return True
144
145    def all_completed(self) -> bool:
146        for r in self.results:
147            if not r.csvpath.completed:
148                return False
149        return True
150
151    def error_count(self) -> bool:
152        ec = 0
153        for r in self.results:
154            ec += r.errors_count
155        return ec
156
157    def all_expected_files(self) -> bool:
158        rs = ResultSerializer(self.csvpaths.config.archive_path)
159        for r in self.results:
160            rr = ResultRegistrar(csvpaths=self.csvpaths, result=r, result_serializer=rs)
161            if not rr.all_expected_files:
162                return False
163        return True
164
165    @property
166    def manifest(self) -> dict[str, str | bool]:
167        mp = self.manifest_path
168        with DataFileReader(mp) as file:
169            d = json.load(file.source)
170            return d
171        return None
172
173    @property
174    def manifest_path(self) -> str:
175        if not Nos(self.run_dir).exists():
176            Nos(self.run_dir).makedir()
177        mp = os.path.join(self.run_dir, "manifest.json")
178        return mp