1import os
2import json
3from datetime import datetime
4from csvpath.util.file_readers import DataFileReader
5from csvpath.util.file_writers import DataFileWriter
6from csvpath.util.nos import Nos
7from ..listener import Listener
8from ..metadata import Metadata
9from ..registrar import Registrar
10from .result_metadata import ResultMetadata
11
12
13class ResultRegistrar(Registrar, Listener):
14 """@private"""
15 def __init__(self, *, csvpaths, result, result_serializer=None):
16 # super().__init__(csvpaths, result)
17 Registrar.__init__(self, csvpaths, result)
18 Listener.__init__(self, csvpaths.config)
19 self.result_serializer = result_serializer
20 self.type_name = "result"
21
22 def register_start(self, mdata: Metadata) -> None:
23 p = self.named_paths_manifest
24 mdata.by_line = self.result.by_line
25 mdata.manifest_path = self.manifest_path
26 mdata.instance_index = self.result.run_index
27 mdata.actual_data_file = self.result.actual_data_file
28 mdata.origin_data_file = self.result.origin_data_file
29 ri = int(self.result.run_index) if self.result.run_index else 0
30 if ri >= 1:
31 rs = self.result.csvpath.csvpaths.results_manager.get_named_results(
32 self.result.paths_name
33 )
34 r = rs[ri - 1]
35 mdata.preceding_instance_identity = r.identity_or_index
36 if p is None:
37 self.result.csvpath.csvpaths.logger.debug(
38 "No named-paths manifest available at %s so not setting named_paths_uuid_string",
39 self.named_paths_manifest_path,
40 )
41 else:
42 mdata.named_paths_uuid_string = p["uuid"]
43 self.distribute_update(mdata)
44
45 def register_complete(self, mdata: Metadata = None) -> None:
46 #
47 # results manager delegates the bits to the
48 # serializer and the metadata assembly to this
49 # registrar, so we expect it to hand us nothing
50 # but the result object and serializer.
51 #
52 m = self.manifest
53 if mdata is None:
54 mdata = ResultMetadata(config=self.csvpaths.config)
55 mdata.from_manifest(m)
56 mdata.archive_name = self.archive_name
57 mdata.named_results_name = self.result.paths_name
58 mdata.run = self.result_serializer.get_run_dir_name_from_datetime(
59 self.result.run_time
60 )
61 mdata.by_line = self.result.by_line
62 mdata.source_mode_preceding = self.result.source_mode_preceding
63 mdata.run_home = self.result.run_dir
64 mdata.instance_home = self.result.instance_dir
65 mdata.instance_identity = self.result.identity_or_index
66 mdata.instance_index = self.result.run_index
67 mdata.named_file_name = self.result.file_name
68 mdata.input_data_file = self.result.file_name
69 mdata.file_fingerprints = self.file_fingerprints
70 mdata.file_count = len(mdata.file_fingerprints)
71 mdata.error_count = self.result.errors_count
72 mdata.valid = self.result.csvpath.is_valid
73 mdata.completed = self.completed
74 mdata.files_expected = self.all_expected_files
75 if self.result.csvpath.transfers:
76 tpaths = self.result.csvpath.csvpaths.results_manager.transfer_paths(
77 self.result
78 )
79 mdata.transfers = tpaths
80 mdata.actual_data_file = self.result.actual_data_file
81 mdata.origin_data_file = self.result.origin_data_file
82 ri = int(self.result.run_index) if self.result.run_index else 0
83 if ri >= 1:
84 rs = self.result.csvpath.csvpaths.results_manager.get_named_results(
85 self.result.paths_name
86 )
87 r = rs[ri - 1]
88 mdata.preceding_instance_identity = r.identity_or_index
89 self.distribute_update(mdata)
90
91 def metadata_update(self, mdata: Metadata) -> None:
92 m = {}
93 if mdata.time is None:
94 raise ValueError("Time cannot be None")
95 m["time"] = mdata.time_string
96 m["uuid"] = mdata.uuid_string
97 m["serial"] = mdata.by_line is False
98 m["archive_name"] = mdata.archive_name
99 m["named_results_name"] = mdata.named_results_name
100 m["named_paths_uuid"] = mdata.named_paths_uuid_string
101 m["run"] = mdata.run
102 m["run_home"] = mdata.run_home
103 m["instance_identity"] = mdata.instance_identity
104 m["instance_index"] = mdata.instance_index
105 m["instance_home"] = mdata.instance_home
106 m["file_fingerprints"] = mdata.file_fingerprints
107 m["files_expected"] = mdata.files_expected
108 m["file_count"] = mdata.file_count
109 m["valid"] = mdata.valid
110 m["completed"] = mdata.completed
111 m["source_mode_preceding"] = mdata.source_mode_preceding
112 if mdata.source_mode_preceding:
113 m["preceding_instance_identity"] = mdata.preceding_instance_identity
114 m["actual_data_file"] = mdata.actual_data_file
115 m["origin_data_file"] = mdata.origin_data_file
116 m["named_file_name"] = mdata.named_file_name
117 if mdata.transfers:
118 m["transfers"] = mdata.transfers
119 mp = self.manifest_path
120 m["manifest_path"] = mp
121 with DataFileWriter(path=mp) as file:
122 json.dump(m, file.sink, indent=2)
123
124 @property
125 def archive_name(self) -> str:
126 ap = self.result.csvpath.config.archive_path
127 sep = Nos(ap).sep
128 i = ap.rfind(sep)
129 if i > 0:
130 return ap[i + 1 :]
131 return ap
132
133 # gets the manifest for the named_paths as a whole
134 @property
135 def named_paths_manifest(self) -> dict | None:
136 if Nos(self.named_paths_manifest_path).exists():
137 with DataFileReader(self.named_paths_manifest_path) as file:
138 d = json.load(file.source)
139 return d
140 return None
141
142 # gets the manifest for the named_paths as a whole from the run dir
143 @property
144 def named_paths_manifest_path(self) -> str:
145 return os.path.join(self.result.run_dir, "manifest.json")
146
147 #
148 # switch to use ResultManifestReader.manifest
149 #
150 @property
151 def manifest(self) -> dict | None:
152 mp = self.manifest_path
153 if not Nos(mp).exists():
154 with DataFileWriter(path=self.manifest_path) as file:
155 json.dump({}, file.sink, indent=2)
156 return {}
157 with DataFileReader(self.manifest_path) as file:
158 d = json.load(file.source)
159 return d
160 return None
161
162 @property
163 def manifest_path(self) -> str:
164 h = os.path.join(self.result_path, "manifest.json")
165 return h
166
167 @property
168 def result_path(self) -> str:
169 rdir = self.result_serializer.get_instance_dir(
170 run_dir=self.result.run_dir, identity=self.result.identity_or_index
171 )
172 if not Nos(rdir).exists():
173 Nos(rdir).makedir()
174 return rdir
175
176 @property
177 def completed(self) -> bool:
178 return self.result.csvpath.completed
179
180 @property
181 def all_expected_files(self) -> bool:
182 #
183 # we can not have any/all of data.csv, unmatched.csv, and printouts.txt without
184 # it necessarily being a failure mode. but we can require them as a matter of
185 # content validation.
186 #
187 if (
188 self.result.csvpath.all_expected_files is None
189 or len(self.result.csvpath.all_expected_files) == 0
190 ):
191 if not self.has_file("meta.json"):
192 return False
193 if not self.has_file("errors.json"):
194 return False
195 if not self.has_file("vars.json"):
196 return False
197 return True
198 for t in self.result.csvpath.all_expected_files:
199 t = t.strip()
200 if t.startswith("no-data"):
201 if self.has_file("data.csv"):
202 return False
203 if t.startswith("data") or t.startswith("all"):
204 if not self.has_file("data.csv"):
205 return False
206 if t.startswith("no-unmatched"):
207 if self.has_file("unmatched.csv"):
208 return False
209 if t.startswith("unmatched") or t.startswith("all"):
210 if not self.has_file("unmatched.csv"):
211 return False
212 if t.startswith("no-printouts"):
213 if self.has_file("printouts.txt"):
214 return False
215 if t.startswith("printouts") or t.startswith("all"):
216 if not self.has_file("printouts.txt"):
217 return False
218 if not self.has_file("meta.json"):
219 return False
220 if not self.has_file("errors.json"):
221 return False
222 if not self.has_file("vars.json"):
223 return False
224 return True
225
226 def has_file(self, t: str) -> bool:
227 r = self.result_path
228 return Nos(os.path.join(r, t)).exists()
229
230 @property
231 def file_fingerprints(self) -> dict[str]:
232 r = self.result_path
233 fps = {}
234 for t in [
235 "data.csv",
236 "meta.json",
237 "unmatched.csv",
238 "printouts.txt",
239 "errors.json",
240 "vars.json",
241 ]:
242 f = self._fingerprint(os.path.join(r, t))
243 if f is None:
244 continue
245 fps[t] = f
246 return fps
247
248 def _fingerprint(self, path) -> str:
249 if path.find("://") == -1 and not path.startswith("/"):
250 path = f"{os.getcwd()}/{path}"
251 if Nos(path).exists():
252 with DataFileReader(path) as f:
253 h = f.fingerprint()
254 return h
255 return None