csvpath.managers.results.result_serializer

  1import os
  2import json
  3import csv
  4from typing import NewType, List, Dict, Optional, Union
  5from datetime import datetime
  6from csvpath import CsvPath
  7from csvpath.matching.util.runtime_data_collector import RuntimeDataCollector
  8from csvpath.util.line_spooler import LineSpooler
  9from csvpath.util.file_writers import DataFileWriter
 10from csvpath.util.nos import Nos
 11
 12Simpledata = NewType("Simpledata", Union[None | str | int | float | bool])
 13"""@private"""
 14Listdata = NewType("Listdata", list[None | str | int | float | bool])
 15"""@private"""
 16Csvdata = NewType("Csvdata", list[List[str]])
 17"""@private"""
 18Metadata = NewType("Metadata", Dict[str, Simpledata])
 19"""@private"""
 20
 21
 22class ResultSerializer:
 23    """@private"""
 24    def __init__(self, base_dir: str):
 25        # base is the archive dir from config.ini
 26        self.base_dir = base_dir
 27        self.result = None
 28
 29    def save_result(self, result) -> None:
 30        self.result = result
 31        runtime_data = {}
 32        result.csvpath.csvpaths.logger.debug(
 33            "Saving result of %s.%s", result.paths_name, result.identity_or_index
 34        )
 35        RuntimeDataCollector.collect(result.csvpath, runtime_data, local=True)
 36        runtime_data["run_index"] = result.run_index
 37        es = []
 38        if result is not None and result.errors:
 39            es = [e.to_json() for e in result.errors]
 40        self._save(
 41            metadata=result.csvpath.metadata,
 42            errors=es,
 43            variables=result.variables,
 44            lines=result.lines,
 45            printouts=result.printouts,
 46            runtime_data=runtime_data,
 47            paths_name=result.paths_name,
 48            file_name=result.file_name,
 49            identity=result.identity_or_index,
 50            run_time=result.run_time,
 51            run_dir=result.run_dir,
 52            run_index=result.run_index,
 53            unmatched=result.unmatched,
 54        )
 55        self.result = None
 56
 57    def _save(
 58        self,
 59        *,
 60        metadata: Metadata,
 61        runtime_data: Metadata,
 62        errors: List[Metadata],
 63        variables: dict[str, Simpledata | Listdata | Metadata],
 64        lines: Csvdata,
 65        printouts: dict[str, list[str]],
 66        paths_name: str,
 67        file_name: str,
 68        identity: str,
 69        run_time: datetime,
 70        run_dir: str,
 71        run_index: int,
 72        unmatched: list[Listdata],
 73    ) -> None:
 74        """Save a single Result object to basedir/paths_name/run_time/identity_or_index."""
 75        meta = {
 76            "paths_name": paths_name,
 77            "file_name": file_name,
 78            "run_time": f"{run_time}",
 79            "run_index": run_index,
 80            "identity": identity,
 81            "metadata": metadata,
 82            "runtime_data": runtime_data,
 83        }
 84        run_dir = self.get_instance_dir(run_dir=run_dir, identity=identity)
 85        # Save the JSON files
 86        with DataFileWriter(path=os.path.join(run_dir, "meta.json")) as f:
 87            json.dump(meta, f.sink, indent=2)
 88        with DataFileWriter(path=os.path.join(run_dir, "errors.json")) as f:
 89            json.dump(errors, f.sink, indent=2)
 90        with DataFileWriter(path=os.path.join(run_dir, "vars.json")) as f:
 91            json.dump(variables, f.sink, indent=2)
 92        # Save lines returned as a CSV file. note that they may have already
 93        # spooled and the spooler been discarded.
 94        if lines is not None:
 95            if isinstance(lines, LineSpooler) and lines.closed is True:
 96                self.result.csvpath.logger.debug(
 97                    "line spooler has already written its data"
 98                )
 99            elif isinstance(lines, LineSpooler):
100                self.result.csvpath.logger.debug(
101                    "not writing data in/from line spooler even though lines.closed is not True"
102                )
103            else:
104                #
105                # this may not be right, but I think we can/maybe should not write data unless
106                # we have some. that would match the possible spooler behavior. it would also
107                # match fast_forward, which might be confusing, but if we capture the what method
108                # a run used that's not a worry. and if we don't, not having a data file is a
109                # poor indicator of the method anyway.
110                #
111                if lines is not None and len(lines) > 0:
112                    with DataFileWriter(path=os.path.join(run_dir, "data.csv")) as f:
113                        writer = csv.writer(f.sink)
114                        writer.writerows(lines)
115        #
116        # writing is not needed. LineSpoolers are intended to stream their
117        # lines to disk. if we write here we'll be reading and writing the
118        # same file at the same time.
119        #
120        if (
121            unmatched is not None
122            and not isinstance(unmatched, LineSpooler)
123            and len(unmatched) > 0
124        ):
125            with DataFileWriter(path=os.path.join(run_dir, "unmatched.csv")) as f:
126                writer = csv.writer(f.sink)
127                writer.writerows(unmatched)
128
129        # Save the printout lines
130        if self._has_printouts(printouts):
131            with DataFileWriter(path=os.path.join(run_dir, "printouts.txt")) as f:
132                for k, v in printouts.items():
133                    f.sink.write(f"---- PRINTOUT: {k}\n")
134                    for _ in v:
135                        f.sink.write(f"{_}\n")
136
137    def _has_printouts(self, pos) -> bool:
138        if pos is None:
139            return False
140        if len(pos) == 0:
141            return False
142        for k, v in pos.items():
143            if v is not None and len(v) > 0:
144                return True
145        return False
146
147    def _deref_paths_name(self, paths_name) -> str:
148        #
149        # if we have a reference we need to de-ref so that our path has only
150        # the named-paths name at the top, not the $, datatype, etc.
151        #
152        paths_name = paths_name.lstrip("$")
153        i = paths_name.find(".")
154        if i > -1:
155            paths_name = paths_name[0:i]
156        i = paths_name.find("#")
157        if i > -1:
158            paths_name = paths_name[0:i]
159        return paths_name
160
161    def get_run_dir_name_from_datetime(self, dt) -> str:
162        if dt is None:
163            return None
164        t = dt.strftime("%Y-%m-%d_%I-%M-%S")
165        return t
166
167    def get_run_dir(self, *, paths_name, run_time):
168        paths_name = self._deref_paths_name(paths_name)
169        run_dir = os.path.join(self.base_dir, paths_name)
170        if not Nos(run_dir).dir_exists():
171            Nos(run_dir).makedirs()
172        if not isinstance(run_time, str):
173            run_time = self.get_run_dir_name_from_datetime(run_time)
174        run_dir = os.path.join(run_dir, f"{run_time}")
175        # the path existing for a different named-paths run in progress
176        # or having completed less than 1000ms ago is expected to be
177        # uncommon in real world usage. CsvPaths are single user instances
178        # atm. a server process would namespace each CsvPaths instance
179        # to prevent conflicts. if there is a conflict the two runs would
180        # overwrite each other. this prevents that.
181        if Nos(run_dir).dir_exists():
182            i = 0
183            adir = f"{run_dir}.{i}"
184            while Nos(adir).dir_exists():
185                i += 1
186                adir = f"{run_dir}.{i}"
187            run_dir = adir
188        return run_dir
189
190    def get_instance_dir(self, run_dir, identity) -> str:
191        run_dir = os.path.join(run_dir, identity)
192        if not Nos(run_dir).exists():
193            Nos(run_dir).makedirs()
194        return run_dir