1import os
2import json
3import csv
4from typing import NewType, List, Dict, Optional, Union
5from datetime import datetime
6from csvpath import CsvPath
7from csvpath.matching.util.runtime_data_collector import RuntimeDataCollector
8from csvpath.util.line_spooler import LineSpooler
9from csvpath.util.file_writers import DataFileWriter
10from csvpath.util.nos import Nos
11
12Simpledata = NewType("Simpledata", Union[None | str | int | float | bool])
13"""@private"""
14Listdata = NewType("Listdata", list[None | str | int | float | bool])
15"""@private"""
16Csvdata = NewType("Csvdata", list[List[str]])
17"""@private"""
18Metadata = NewType("Metadata", Dict[str, Simpledata])
19"""@private"""
20
21
22class ResultSerializer:
23 """@private"""
24 def __init__(self, base_dir: str):
25 # base is the archive dir from config.ini
26 self.base_dir = base_dir
27 self.result = None
28
29 def save_result(self, result) -> None:
30 self.result = result
31 runtime_data = {}
32 result.csvpath.csvpaths.logger.debug(
33 "Saving result of %s.%s", result.paths_name, result.identity_or_index
34 )
35 RuntimeDataCollector.collect(result.csvpath, runtime_data, local=True)
36 runtime_data["run_index"] = result.run_index
37 es = []
38 if result is not None and result.errors:
39 es = [e.to_json() for e in result.errors]
40 self._save(
41 metadata=result.csvpath.metadata,
42 errors=es,
43 variables=result.variables,
44 lines=result.lines,
45 printouts=result.printouts,
46 runtime_data=runtime_data,
47 paths_name=result.paths_name,
48 file_name=result.file_name,
49 identity=result.identity_or_index,
50 run_time=result.run_time,
51 run_dir=result.run_dir,
52 run_index=result.run_index,
53 unmatched=result.unmatched,
54 )
55 self.result = None
56
57 def _save(
58 self,
59 *,
60 metadata: Metadata,
61 runtime_data: Metadata,
62 errors: List[Metadata],
63 variables: dict[str, Simpledata | Listdata | Metadata],
64 lines: Csvdata,
65 printouts: dict[str, list[str]],
66 paths_name: str,
67 file_name: str,
68 identity: str,
69 run_time: datetime,
70 run_dir: str,
71 run_index: int,
72 unmatched: list[Listdata],
73 ) -> None:
74 """Save a single Result object to basedir/paths_name/run_time/identity_or_index."""
75 meta = {
76 "paths_name": paths_name,
77 "file_name": file_name,
78 "run_time": f"{run_time}",
79 "run_index": run_index,
80 "identity": identity,
81 "metadata": metadata,
82 "runtime_data": runtime_data,
83 }
84 run_dir = self.get_instance_dir(run_dir=run_dir, identity=identity)
85 # Save the JSON files
86 with DataFileWriter(path=os.path.join(run_dir, "meta.json")) as f:
87 json.dump(meta, f.sink, indent=2)
88 with DataFileWriter(path=os.path.join(run_dir, "errors.json")) as f:
89 json.dump(errors, f.sink, indent=2)
90 with DataFileWriter(path=os.path.join(run_dir, "vars.json")) as f:
91 json.dump(variables, f.sink, indent=2)
92 # Save lines returned as a CSV file. note that they may have already
93 # spooled and the spooler been discarded.
94 if lines is not None:
95 if isinstance(lines, LineSpooler) and lines.closed is True:
96 self.result.csvpath.logger.debug(
97 "line spooler has already written its data"
98 )
99 elif isinstance(lines, LineSpooler):
100 self.result.csvpath.logger.debug(
101 "not writing data in/from line spooler even though lines.closed is not True"
102 )
103 else:
104 #
105 # this may not be right, but I think we can/maybe should not write data unless
106 # we have some. that would match the possible spooler behavior. it would also
107 # match fast_forward, which might be confusing, but if we capture the what method
108 # a run used that's not a worry. and if we don't, not having a data file is a
109 # poor indicator of the method anyway.
110 #
111 if lines is not None and len(lines) > 0:
112 with DataFileWriter(path=os.path.join(run_dir, "data.csv")) as f:
113 writer = csv.writer(f.sink)
114 writer.writerows(lines)
115 #
116 # writing is not needed. LineSpoolers are intended to stream their
117 # lines to disk. if we write here we'll be reading and writing the
118 # same file at the same time.
119 #
120 if (
121 unmatched is not None
122 and not isinstance(unmatched, LineSpooler)
123 and len(unmatched) > 0
124 ):
125 with DataFileWriter(path=os.path.join(run_dir, "unmatched.csv")) as f:
126 writer = csv.writer(f.sink)
127 writer.writerows(unmatched)
128
129 # Save the printout lines
130 if self._has_printouts(printouts):
131 with DataFileWriter(path=os.path.join(run_dir, "printouts.txt")) as f:
132 for k, v in printouts.items():
133 f.sink.write(f"---- PRINTOUT: {k}\n")
134 for _ in v:
135 f.sink.write(f"{_}\n")
136
137 def _has_printouts(self, pos) -> bool:
138 if pos is None:
139 return False
140 if len(pos) == 0:
141 return False
142 for k, v in pos.items():
143 if v is not None and len(v) > 0:
144 return True
145 return False
146
147 def _deref_paths_name(self, paths_name) -> str:
148 #
149 # if we have a reference we need to de-ref so that our path has only
150 # the named-paths name at the top, not the $, datatype, etc.
151 #
152 paths_name = paths_name.lstrip("$")
153 i = paths_name.find(".")
154 if i > -1:
155 paths_name = paths_name[0:i]
156 i = paths_name.find("#")
157 if i > -1:
158 paths_name = paths_name[0:i]
159 return paths_name
160
161 def get_run_dir_name_from_datetime(self, dt) -> str:
162 if dt is None:
163 return None
164 t = dt.strftime("%Y-%m-%d_%I-%M-%S")
165 return t
166
167 def get_run_dir(self, *, paths_name, run_time):
168 paths_name = self._deref_paths_name(paths_name)
169 run_dir = os.path.join(self.base_dir, paths_name)
170 if not Nos(run_dir).dir_exists():
171 Nos(run_dir).makedirs()
172 if not isinstance(run_time, str):
173 run_time = self.get_run_dir_name_from_datetime(run_time)
174 run_dir = os.path.join(run_dir, f"{run_time}")
175 # the path existing for a different named-paths run in progress
176 # or having completed less than 1000ms ago is expected to be
177 # uncommon in real world usage. CsvPaths are single user instances
178 # atm. a server process would namespace each CsvPaths instance
179 # to prevent conflicts. if there is a conflict the two runs would
180 # overwrite each other. this prevents that.
181 if Nos(run_dir).dir_exists():
182 i = 0
183 adir = f"{run_dir}.{i}"
184 while Nos(adir).dir_exists():
185 i += 1
186 adir = f"{run_dir}.{i}"
187 run_dir = adir
188 return run_dir
189
190 def get_instance_dir(self, run_dir, identity) -> str:
191 run_dir = os.path.join(run_dir, identity)
192 if not Nos(run_dir).exists():
193 Nos(run_dir).makedirs()
194 return run_dir