1import os
2from datetime import datetime, timezone
3import json
4import time
5from csvpath.util.nos import Nos
6from csvpath.util.file_info import FileInfo
7from csvpath.util.exceptions import FileException
8from csvpath.util.file_readers import DataFileReader
9from csvpath.util.file_writers import DataFileWriter
10from .result import Result
11from .result_serializer import ResultSerializer
12from .result_registrar import ResultRegistrar
13from .results_metadata import ResultsMetadata
14from ..run.run_metadata import RunMetadata
15from ..registrar import Registrar
16from ..listener import Listener
17from ..metadata import Metadata
18
19
20class ResultsRegistrar(Registrar, Listener):
21 """@private"""
22 COMPLETE = "complete"
23
24 def __init__(
25 self, *, csvpaths, run_dir: str, pathsname: str, results: list[Result] = None
26 ) -> None:
27 # super().__init__(csvpaths=csvpaths)
28 Registrar.__init__(self, csvpaths)
29 Listener.__init__(self, csvpaths.config)
30 self.pathsname = pathsname
31 self.run_dir = run_dir
32 self.results = results
33 self.type_name = "results"
34
35 def register_start(self, mdata: ResultsMetadata) -> None:
36 mdata.status = "start"
37 mdata.manifest_path = self.manifest_path
38 filename = mdata.named_file_name
39 fingerprint = self.csvpaths.file_manager.get_fingerprint_for_name(filename)
40 filepath = self.csvpaths.file_manager.get_named_file(filename)
41 ffingerprint = self._fingerprint_file(filepath)
42 mdata.named_file_fingerprint = ffingerprint
43 if self.results and len(self.results) > 0:
44 mdata.by_line = self.results[0].by_line
45 mdata.named_file_fingerprint_on_file = fingerprint
46 mdata.named_file_path = filepath
47 mdata.named_file_size = self._size(filepath)
48 mdata.named_file_last_change = self._last_change(filepath)
49 self.distribute_update(mdata)
50 # after we distribute the update
51 # if we see a fingerprint mismatch we need to log it
52 # and maybe blow up
53 if mdata.named_file_fingerprint and mdata.named_file_fingerprint_on_file:
54 if mdata.named_file_fingerprint != mdata.named_file_fingerprint_on_file:
55 self.csvpaths.logger.warning(
56 "fingerprints of input file %s do not agree: orig:%s != current:%s",
57 mdata.named_file_path,
58 mdata.named_file_fingerprint,
59 mdata.named_file_fingerprint_on_file,
60 )
61 houf = self.csvpaths.config.halt_on_unmatched_file_fingerprints()
62 if (
63 houf is True
64 and mdata.named_file_fingerprint != mdata.named_file_fingerprint_on_file
65 ):
66 raise FileException(
67 f"""File was modified since being registered.
68 New {mdata.named_file_fingerprint} does not equal
69 on-file {mdata.named_file_fingerprint_on_file}.
70 See manifest for {mdata.named_file_path} at {mdata.time}.
71 Processing halted."""
72 )
73
74 def register_complete(self, mdata) -> None:
75 #
76 # load what's already in the manifest
77 #
78 m = self.manifest
79 mdata.from_manifest(m)
80 if self.results and len(self.results) > 0:
81 mdata.by_line = self.results[0].by_line
82 mdata.set_time_completed()
83 mdata.status = ResultsRegistrar.COMPLETE
84 mdata.all_completed = self.all_completed()
85 mdata.all_valid = self.all_valid()
86 mdata.error_count = self.error_count()
87 mdata.all_expected_files = self.all_expected_files()
88 mdata.manifest_path = self.manifest_path
89 self.distribute_update(mdata)
90
91 def metadata_update(self, mdata: Metadata) -> None:
92 m = {}
93 m["time"] = mdata.time_string
94 m["uuid"] = mdata.uuid_string
95 m["serial"] = mdata.by_line is False
96 if mdata.time_completed:
97 m["time_completed"] = mdata.time_completed_string
98 m["all_completed"] = mdata.all_completed
99 m["all_valid"] = mdata.all_valid
100 m["error_count"] = mdata.error_count
101 m["all_expected_files"] = mdata.all_expected_files
102 m["status"] = mdata.status
103 m["run_home"] = mdata.run_home
104 m["named_results_name"] = mdata.named_results_name
105 m["named_paths_name"] = mdata.named_paths_name
106 m["named_file_name"] = mdata.named_file_name
107 m["named_file_path"] = mdata.named_file_path
108 m["named_file_size"] = mdata.named_file_size
109 m["named_file_last_change"] = mdata.named_file_last_change
110 m["named_file_fingerprint"] = mdata.named_file_fingerprint
111 m["named_file_fingerprint_on_file"] = mdata.named_file_fingerprint_on_file
112 m["hostname"] = mdata.hostname
113 m["username"] = mdata.username
114 m["ip_address"] = mdata.ip_address
115 mp = mdata.manifest_path
116 m["manifest_path"] = mp
117 with DataFileWriter(path=mp) as file:
118 json.dump(m, file.sink, indent=2)
119
120 def _fingerprint_file(self, path) -> str:
121 with DataFileReader(path) as f:
122 h = f.fingerprint()
123 return h
124
125 def _size(self, path) -> str:
126 try:
127 fi = FileInfo.info(path)
128 return fi["bytes"]
129 except FileNotFoundError:
130 return 0
131
132 def _last_change(self, path) -> str:
133 try:
134 fi = FileInfo.info(path)
135 return fi["last_mod"]
136 except FileNotFoundError:
137 return -1
138
139 def all_valid(self) -> bool:
140 for r in self.results:
141 if not r.csvpath.is_valid:
142 return False
143 return True
144
145 def all_completed(self) -> bool:
146 for r in self.results:
147 if not r.csvpath.completed:
148 return False
149 return True
150
151 def error_count(self) -> bool:
152 ec = 0
153 for r in self.results:
154 ec += r.errors_count
155 return ec
156
157 def all_expected_files(self) -> bool:
158 rs = ResultSerializer(self.csvpaths.config.archive_path)
159 for r in self.results:
160 rr = ResultRegistrar(csvpaths=self.csvpaths, result=r, result_serializer=rs)
161 if not rr.all_expected_files:
162 return False
163 return True
164
165 @property
166 def manifest(self) -> dict[str, str | bool]:
167 mp = self.manifest_path
168 with DataFileReader(mp) as file:
169 d = json.load(file.source)
170 return d
171 return None
172
173 @property
174 def manifest_path(self) -> str:
175 if not Nos(self.run_dir).exists():
176 Nos(self.run_dir).makedir()
177 mp = os.path.join(self.run_dir, "manifest.json")
178 return mp