csvpath.managers.files.file_manager

  1import os
  2import json
  3import csv
  4from json import JSONDecodeError
  5from csvpath.util.file_readers import DataFileReader
  6from csvpath.util.file_writers import DataFileWriter
  7from csvpath.util.reference_parser import ReferenceParser
  8from csvpath.util.exceptions import InputException, FileException
  9from csvpath.util.nos import Nos
 10from .file_registrar import FileRegistrar
 11from .file_cacher import FileCacher
 12from .file_metadata import FileMetadata
 13
 14
 15class FileManager:
 16    def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None):
 17        """@private"""
 18        if named_files is None:
 19            named_files = {}
 20        self._csvpaths = csvpaths
 21        self.registrar = FileRegistrar(csvpaths)
 22        """@private"""
 23        self.cacher = FileCacher(csvpaths)
 24        """@private"""
 25
 26    @property
 27    def csvpaths(self):
 28        """@private"""
 29        return self._csvpaths
 30
 31    #
 32    # named file dir is like: inputs/named_files
 33    #
 34    @property
 35    def named_files_dir(self) -> str:
 36        """@private"""
 37        return self._csvpaths.config.inputs_files_path
 38
 39    #
 40    # the root manifest file tracking all name-file stagings. note that
 41    # this is created by an optional listener. it is possible to run without
 42    # creating the root manifest or capturing the data with another listener.
 43    #
 44    @property
 45    def files_root_manifest(self) -> dict:
 46        """@private"""
 47        p = self.files_root_manifest_path
 48        if Nos(p).exists():
 49            with DataFileReader(p) as reader:
 50                return json.load(reader.source)
 51        return None
 52
 53    @property
 54    def files_root_manifest_path(self) -> dict:
 55        """@private"""
 56        return os.path.join(self.named_files_dir, "manifest.json")
 57
 58    #
 59    # named-file homes are a dir like: inputs/named_files/March-2024/March-2024.csv
 60    #
 61    def named_file_home(self, name: str) -> str:
 62        """@private"""
 63        #
 64        # not a named-file name
 65        #
 66        if name.find("/") > -1:
 67            #
 68            # this is definitely not what we should be returning. but it is what
 69            # works in the new world of remote and fully-qualified local paths.
 70            # for now, going with it. the previous implementation was wonky too,
 71            # in a different and not visible way, but not good, so this is a step
 72            # up in multiple ways.
 73            #
 74            return ""
 75        #
 76        # added
 77        #
 78        home = None
 79        if name.startswith("/"):
 80            home = name
 81        else:
 82            #
 83            # done add
 84            #
 85            home = os.path.join(self.named_files_dir, name)
 86        #
 87        # added
 88        #
 89        if Nos(home).isfile():
 90            home = home[0 : home.rfind(Nos(home).sep)]
 91        #
 92        # done add
 93        #
 94        return home
 95
 96    def assure_named_file_home(self, name: str) -> str:
 97        """@private"""
 98        home = self.named_file_home(name)
 99        if not os.path.exists(home):
100            Nos(home).makedirs()
101        return home
102
103    #
104    # file homes are paths to files like:
105    #   inputs/named_files/March-2024/March-2024.csv/March-2024.csv
106    # which become paths to fingerprint-named file versions like:
107    #   inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv
108    # once the files have been fingerprinted
109    #
110    def assure_file_home(self, name: str, path: str) -> str:
111        """@private"""
112        if path.find("#") > -1:
113            path = path[0 : path.find("#")]
114        sep = Nos(path).sep
115        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
116        home = self.named_file_home(name)
117        home = os.path.join(home, fname)
118        if not Nos(home).exists():
119            Nos(home).makedirs()
120        return home
121
122    @property
123    def named_files_count(self) -> int:
124        """@private"""
125        return len(self.named_file_names)
126
127    @property
128    def named_file_names(self) -> list:
129        """@private"""
130        b = self.named_files_dir
131        ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()]
132        return ns
133
134    def name_exists(self, name: str) -> bool:
135        """@private"""
136        p = self.named_file_home(name)
137        b = Nos(p).dir_exists()
138        return b
139
140    def remove_named_file(self, name: str) -> None:
141        """@private"""
142        p = os.path.join(self.named_files_dir, name)
143        Nos(p).remove()
144
145    def remove_all_named_files(self) -> None:
146        """@private"""
147        names = self.named_file_names
148        for name in names:
149            self.remove_named_file(name)
150
151    def set_named_files(self, nf: dict[str, str]) -> None:
152        """@private"""
153        for k, v in nf.items():
154            self.add_named_file(name=k, path=v)
155
156    def set_named_files_from_json(self, filename: str) -> None:
157        """named-files from json files are always local"""
158        try:
159            #
160            # TODO: named-files json files are always local. they should
161            # be able to be on s3 so that we are completely independent of
162            # the local disk w/re file manager
163            #
164            with open(filename, "r", encoding="utf-8") as f:
165                j = json.load(f)
166                self.set_named_files(j)
167        except (OSError, ValueError, TypeError, JSONDecodeError) as ex:
168            self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}")
169            if self.csvpaths.ecoms.do_i_raise():
170                raise
171
172    def add_named_files_from_dir(self, dirname: str):
173        dlist = Nos(dirname).listdir()
174        base = dirname
175        for p in dlist:
176            _ = p.lower()
177            ext = p[p.rfind(".") + 1 :].strip().lower()
178            if ext in self._csvpaths.config.csv_file_extensions:
179                name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")]
180                path = os.path.join(base, p)
181                self.add_named_file(name=name, path=path)
182            else:
183                self._csvpaths.logger.debug(
184                    "%s is not in accept list", os.path.join(base, p)
185                )
186
187    #
188    # -------------------------------------
189    #
190    def add_named_file(self, *, name: str, path: str) -> None:
191        #
192        # create folder tree in inputs/named_files/name/filename
193        #
194        home = self.assure_file_home(name, path)
195        file_home = home
196        mark = None
197        #
198        # find mark if there. mark indicates a sheet. it is found
199        # as the trailing word after a # at the end of the path e.g.
200        # my-xlsx.xlsx#sheet2
201        #
202        hm = home.find("#")
203        if hm > -1:
204            mark = home[hm + 1 :]
205            home = home[0:hm]
206        pm = path.find("#")
207        if pm > -1:
208            mark = path[pm + 1 :]
209            path = path[0:pm]
210        #
211        # copy file to its home location
212        #
213        self._copy_in(path, home)
214        name_home = self.named_file_home(name)
215        rpath, h = self._fingerprint(home)
216        mdata = FileMetadata(self.csvpaths.config)
217        mdata.named_file_name = name
218        #
219        # we need the declared path, incl. any extra path info, in order
220        # to know if we are being pointed at a sub-portion of the data, e.g.
221        # an excel worksheet.
222        #
223        path = f"{path}#{mark}" if mark else path
224        mdata.origin_path = path
225        mdata.archive_name = self._csvpaths.config.archive_name
226        mdata.fingerprint = h
227        mdata.file_path = rpath
228        mdata.file_home = file_home
229        mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :]
230        mdata.name_home = name_home
231        mdata.mark = mark
232        self.registrar.register_complete(mdata)
233
234    def _copy_in(self, path, home) -> None:
235        """@private"""
236        sep = Nos(path).sep
237        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
238        # creates
239        #   a/file.csv -> named_files/name/file.csv/file.csv
240        # the dir name matching the resulting file name is correct
241        # once the file is landed and fingerprinted, the file
242        # name is changed.
243        temp = os.path.join(home, fname)
244        #
245        # this is another place that is too s3 vs. local. we'll have
246        # other source/sinks to support.
247        #
248        if path.startswith("s3:") and not home.startswith("s3"):
249            self._copy_down(path, temp, mode="wb")
250        elif path.startswith("s3:") and home.startswith("s3"):
251            Nos(path).copy(temp)
252        elif not path.startswith("s3:") and not home.startswith("s3"):
253            self._copy_down(path, temp, mode="wb")
254        elif not path.startswith("s3:") and home.startswith("s3"):
255            self._copy_down(path, temp, mode="wb")
256        else:
257            ...  # not possible. just being explicit for the moment.
258        return temp
259
260    def _copy_down(self, path, temp, mode="wb") -> None:
261        """@private"""
262        with DataFileReader(path) as reader:
263            with DataFileWriter(path=temp, mode=mode) as writer:
264                for line in reader.next_raw():
265                    writer.append(line)
266
267    #
268    # can take a reference. the ref would only be expected to point
269    # to the results of a csvpath in a named-paths group. it would be
270    # in this form: $group.results.2024-01-01_10-15-20.mypath
271    # where this gets interesting is the datestamp identifing the
272    # run. we need to allow for var sub and/or other shortcuts
273    #
274    def get_named_file(self, name: str) -> str:
275        ret = None
276        if name.startswith("$"):
277            ref = ReferenceParser(name)
278            if ref.datatype != ReferenceParser.RESULTS:
279                raise InputException(
280                    f"Reference datatype must be {ReferenceParser.RESULTS}"
281                )
282            reman = self._csvpaths.results_manager
283            ret = reman.data_file_for_reference(name)
284        else:
285            if not self.name_exists(name):
286                return None
287            n = self.named_file_home(name)
288            ret = self.registrar.registered_file(n)
289        return ret
290
291    def get_fingerprint_for_name(self, name) -> str:
292        """@private"""
293        if name.startswith("$"):
294            # atm, we don't give fingerprints for references doing rewind/replay
295            return ""
296        #
297        # note: this is not creating fingerprints, just getting existing ones.
298        #
299        return self.registrar.get_fingerprint(self.named_file_home(name))
300
301    #
302    # -------------------------------------
303    #
304    def get_named_file_reader(self, name: str) -> DataFileReader:
305        """@private"""
306        path = self.get_named_file(name)
307        t = self.registrar.type_of_file(self.named_file_home(name))
308        return FileManager.get_reader(path, filetype=t)
309
310    @classmethod
311    def get_reader(
312        cls, path: str, *, filetype: str = None, delimiter=None, quotechar=None
313    ) -> DataFileReader:
314        """@private"""
315        return DataFileReader(
316            path, filetype=filetype, delimiter=delimiter, quotechar=quotechar
317        )
318
319    def _fingerprint(self, path) -> str:
320        """@private"""
321        sep = Nos(path).sep
322        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
323        t = None
324        i = fname.find(".")
325        if i > -1:
326            t = fname[i + 1 :]
327        else:
328            t = fname
329        i = t.find("#")
330        if i > -1:
331            t = t[0:i]
332        #
333        # creating the initial file name, where the file starts
334        #
335        fpath = os.path.join(path, fname)
336        h = None
337        #
338        # this version should work local and minimize traffic when in S3
339        #
340        hpath = None
341        remove_fpath = False
342        with DataFileReader(fpath) as f:
343            h = f.fingerprint()
344            #
345            # creating the new path using the fingerprint as filename
346            #
347            hpath = os.path.join(path, h)
348            if t is not None:
349                hpath = f"{hpath}.{t}"
350            #
351            # if we're re-adding the file we don't need to make
352            # another copy of it. re-adds are fine.
353            #
354            # need an s3 way to do this
355            remove_fpath = Nos(hpath).exists()
356            #
357            # if a first add, rename the file to the fingerprint + ext
358            #
359        if remove_fpath:
360            Nos(fpath).remove()
361            return hpath, h
362        if hpath:
363            Nos(fpath).rename(hpath)
364        return hpath, h
class FileManager:
 16class FileManager:
 17    def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None):
 18        """@private"""
 19        if named_files is None:
 20            named_files = {}
 21        self._csvpaths = csvpaths
 22        self.registrar = FileRegistrar(csvpaths)
 23        """@private"""
 24        self.cacher = FileCacher(csvpaths)
 25        """@private"""
 26
 27    @property
 28    def csvpaths(self):
 29        """@private"""
 30        return self._csvpaths
 31
 32    #
 33    # named file dir is like: inputs/named_files
 34    #
 35    @property
 36    def named_files_dir(self) -> str:
 37        """@private"""
 38        return self._csvpaths.config.inputs_files_path
 39
 40    #
 41    # the root manifest file tracking all name-file stagings. note that
 42    # this is created by an optional listener. it is possible to run without
 43    # creating the root manifest or capturing the data with another listener.
 44    #
 45    @property
 46    def files_root_manifest(self) -> dict:
 47        """@private"""
 48        p = self.files_root_manifest_path
 49        if Nos(p).exists():
 50            with DataFileReader(p) as reader:
 51                return json.load(reader.source)
 52        return None
 53
 54    @property
 55    def files_root_manifest_path(self) -> dict:
 56        """@private"""
 57        return os.path.join(self.named_files_dir, "manifest.json")
 58
 59    #
 60    # named-file homes are a dir like: inputs/named_files/March-2024/March-2024.csv
 61    #
 62    def named_file_home(self, name: str) -> str:
 63        """@private"""
 64        #
 65        # not a named-file name
 66        #
 67        if name.find("/") > -1:
 68            #
 69            # this is definitely not what we should be returning. but it is what
 70            # works in the new world of remote and fully-qualified local paths.
 71            # for now, going with it. the previous implementation was wonky too,
 72            # in a different and not visible way, but not good, so this is a step
 73            # up in multiple ways.
 74            #
 75            return ""
 76        #
 77        # added
 78        #
 79        home = None
 80        if name.startswith("/"):
 81            home = name
 82        else:
 83            #
 84            # done add
 85            #
 86            home = os.path.join(self.named_files_dir, name)
 87        #
 88        # added
 89        #
 90        if Nos(home).isfile():
 91            home = home[0 : home.rfind(Nos(home).sep)]
 92        #
 93        # done add
 94        #
 95        return home
 96
 97    def assure_named_file_home(self, name: str) -> str:
 98        """@private"""
 99        home = self.named_file_home(name)
100        if not os.path.exists(home):
101            Nos(home).makedirs()
102        return home
103
104    #
105    # file homes are paths to files like:
106    #   inputs/named_files/March-2024/March-2024.csv/March-2024.csv
107    # which become paths to fingerprint-named file versions like:
108    #   inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv
109    # once the files have been fingerprinted
110    #
111    def assure_file_home(self, name: str, path: str) -> str:
112        """@private"""
113        if path.find("#") > -1:
114            path = path[0 : path.find("#")]
115        sep = Nos(path).sep
116        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
117        home = self.named_file_home(name)
118        home = os.path.join(home, fname)
119        if not Nos(home).exists():
120            Nos(home).makedirs()
121        return home
122
123    @property
124    def named_files_count(self) -> int:
125        """@private"""
126        return len(self.named_file_names)
127
128    @property
129    def named_file_names(self) -> list:
130        """@private"""
131        b = self.named_files_dir
132        ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()]
133        return ns
134
135    def name_exists(self, name: str) -> bool:
136        """@private"""
137        p = self.named_file_home(name)
138        b = Nos(p).dir_exists()
139        return b
140
141    def remove_named_file(self, name: str) -> None:
142        """@private"""
143        p = os.path.join(self.named_files_dir, name)
144        Nos(p).remove()
145
146    def remove_all_named_files(self) -> None:
147        """@private"""
148        names = self.named_file_names
149        for name in names:
150            self.remove_named_file(name)
151
152    def set_named_files(self, nf: dict[str, str]) -> None:
153        """@private"""
154        for k, v in nf.items():
155            self.add_named_file(name=k, path=v)
156
157    def set_named_files_from_json(self, filename: str) -> None:
158        """named-files from json files are always local"""
159        try:
160            #
161            # TODO: named-files json files are always local. they should
162            # be able to be on s3 so that we are completely independent of
163            # the local disk w/re file manager
164            #
165            with open(filename, "r", encoding="utf-8") as f:
166                j = json.load(f)
167                self.set_named_files(j)
168        except (OSError, ValueError, TypeError, JSONDecodeError) as ex:
169            self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}")
170            if self.csvpaths.ecoms.do_i_raise():
171                raise
172
173    def add_named_files_from_dir(self, dirname: str):
174        dlist = Nos(dirname).listdir()
175        base = dirname
176        for p in dlist:
177            _ = p.lower()
178            ext = p[p.rfind(".") + 1 :].strip().lower()
179            if ext in self._csvpaths.config.csv_file_extensions:
180                name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")]
181                path = os.path.join(base, p)
182                self.add_named_file(name=name, path=path)
183            else:
184                self._csvpaths.logger.debug(
185                    "%s is not in accept list", os.path.join(base, p)
186                )
187
188    #
189    # -------------------------------------
190    #
191    def add_named_file(self, *, name: str, path: str) -> None:
192        #
193        # create folder tree in inputs/named_files/name/filename
194        #
195        home = self.assure_file_home(name, path)
196        file_home = home
197        mark = None
198        #
199        # find mark if there. mark indicates a sheet. it is found
200        # as the trailing word after a # at the end of the path e.g.
201        # my-xlsx.xlsx#sheet2
202        #
203        hm = home.find("#")
204        if hm > -1:
205            mark = home[hm + 1 :]
206            home = home[0:hm]
207        pm = path.find("#")
208        if pm > -1:
209            mark = path[pm + 1 :]
210            path = path[0:pm]
211        #
212        # copy file to its home location
213        #
214        self._copy_in(path, home)
215        name_home = self.named_file_home(name)
216        rpath, h = self._fingerprint(home)
217        mdata = FileMetadata(self.csvpaths.config)
218        mdata.named_file_name = name
219        #
220        # we need the declared path, incl. any extra path info, in order
221        # to know if we are being pointed at a sub-portion of the data, e.g.
222        # an excel worksheet.
223        #
224        path = f"{path}#{mark}" if mark else path
225        mdata.origin_path = path
226        mdata.archive_name = self._csvpaths.config.archive_name
227        mdata.fingerprint = h
228        mdata.file_path = rpath
229        mdata.file_home = file_home
230        mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :]
231        mdata.name_home = name_home
232        mdata.mark = mark
233        self.registrar.register_complete(mdata)
234
235    def _copy_in(self, path, home) -> None:
236        """@private"""
237        sep = Nos(path).sep
238        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
239        # creates
240        #   a/file.csv -> named_files/name/file.csv/file.csv
241        # the dir name matching the resulting file name is correct
242        # once the file is landed and fingerprinted, the file
243        # name is changed.
244        temp = os.path.join(home, fname)
245        #
246        # this is another place that is too s3 vs. local. we'll have
247        # other source/sinks to support.
248        #
249        if path.startswith("s3:") and not home.startswith("s3"):
250            self._copy_down(path, temp, mode="wb")
251        elif path.startswith("s3:") and home.startswith("s3"):
252            Nos(path).copy(temp)
253        elif not path.startswith("s3:") and not home.startswith("s3"):
254            self._copy_down(path, temp, mode="wb")
255        elif not path.startswith("s3:") and home.startswith("s3"):
256            self._copy_down(path, temp, mode="wb")
257        else:
258            ...  # not possible. just being explicit for the moment.
259        return temp
260
261    def _copy_down(self, path, temp, mode="wb") -> None:
262        """@private"""
263        with DataFileReader(path) as reader:
264            with DataFileWriter(path=temp, mode=mode) as writer:
265                for line in reader.next_raw():
266                    writer.append(line)
267
268    #
269    # can take a reference. the ref would only be expected to point
270    # to the results of a csvpath in a named-paths group. it would be
271    # in this form: $group.results.2024-01-01_10-15-20.mypath
272    # where this gets interesting is the datestamp identifing the
273    # run. we need to allow for var sub and/or other shortcuts
274    #
275    def get_named_file(self, name: str) -> str:
276        ret = None
277        if name.startswith("$"):
278            ref = ReferenceParser(name)
279            if ref.datatype != ReferenceParser.RESULTS:
280                raise InputException(
281                    f"Reference datatype must be {ReferenceParser.RESULTS}"
282                )
283            reman = self._csvpaths.results_manager
284            ret = reman.data_file_for_reference(name)
285        else:
286            if not self.name_exists(name):
287                return None
288            n = self.named_file_home(name)
289            ret = self.registrar.registered_file(n)
290        return ret
291
292    def get_fingerprint_for_name(self, name) -> str:
293        """@private"""
294        if name.startswith("$"):
295            # atm, we don't give fingerprints for references doing rewind/replay
296            return ""
297        #
298        # note: this is not creating fingerprints, just getting existing ones.
299        #
300        return self.registrar.get_fingerprint(self.named_file_home(name))
301
302    #
303    # -------------------------------------
304    #
305    def get_named_file_reader(self, name: str) -> DataFileReader:
306        """@private"""
307        path = self.get_named_file(name)
308        t = self.registrar.type_of_file(self.named_file_home(name))
309        return FileManager.get_reader(path, filetype=t)
310
311    @classmethod
312    def get_reader(
313        cls, path: str, *, filetype: str = None, delimiter=None, quotechar=None
314    ) -> DataFileReader:
315        """@private"""
316        return DataFileReader(
317            path, filetype=filetype, delimiter=delimiter, quotechar=quotechar
318        )
319
320    def _fingerprint(self, path) -> str:
321        """@private"""
322        sep = Nos(path).sep
323        fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :]
324        t = None
325        i = fname.find(".")
326        if i > -1:
327            t = fname[i + 1 :]
328        else:
329            t = fname
330        i = t.find("#")
331        if i > -1:
332            t = t[0:i]
333        #
334        # creating the initial file name, where the file starts
335        #
336        fpath = os.path.join(path, fname)
337        h = None
338        #
339        # this version should work local and minimize traffic when in S3
340        #
341        hpath = None
342        remove_fpath = False
343        with DataFileReader(fpath) as f:
344            h = f.fingerprint()
345            #
346            # creating the new path using the fingerprint as filename
347            #
348            hpath = os.path.join(path, h)
349            if t is not None:
350                hpath = f"{hpath}.{t}"
351            #
352            # if we're re-adding the file we don't need to make
353            # another copy of it. re-adds are fine.
354            #
355            # need an s3 way to do this
356            remove_fpath = Nos(hpath).exists()
357            #
358            # if a first add, rename the file to the fingerprint + ext
359            #
360        if remove_fpath:
361            Nos(fpath).remove()
362            return hpath, h
363        if hpath:
364            Nos(fpath).rename(hpath)
365        return hpath, h
def set_named_files_from_json(self, filename: str) -> None:
157    def set_named_files_from_json(self, filename: str) -> None:
158        """named-files from json files are always local"""
159        try:
160            #
161            # TODO: named-files json files are always local. they should
162            # be able to be on s3 so that we are completely independent of
163            # the local disk w/re file manager
164            #
165            with open(filename, "r", encoding="utf-8") as f:
166                j = json.load(f)
167                self.set_named_files(j)
168        except (OSError, ValueError, TypeError, JSONDecodeError) as ex:
169            self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}")
170            if self.csvpaths.ecoms.do_i_raise():
171                raise

named-files from json files are always local

def add_named_files_from_dir(self, dirname: str):
173    def add_named_files_from_dir(self, dirname: str):
174        dlist = Nos(dirname).listdir()
175        base = dirname
176        for p in dlist:
177            _ = p.lower()
178            ext = p[p.rfind(".") + 1 :].strip().lower()
179            if ext in self._csvpaths.config.csv_file_extensions:
180                name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")]
181                path = os.path.join(base, p)
182                self.add_named_file(name=name, path=path)
183            else:
184                self._csvpaths.logger.debug(
185                    "%s is not in accept list", os.path.join(base, p)
186                )
def add_named_file(self, *, name: str, path: str) -> None:
191    def add_named_file(self, *, name: str, path: str) -> None:
192        #
193        # create folder tree in inputs/named_files/name/filename
194        #
195        home = self.assure_file_home(name, path)
196        file_home = home
197        mark = None
198        #
199        # find mark if there. mark indicates a sheet. it is found
200        # as the trailing word after a # at the end of the path e.g.
201        # my-xlsx.xlsx#sheet2
202        #
203        hm = home.find("#")
204        if hm > -1:
205            mark = home[hm + 1 :]
206            home = home[0:hm]
207        pm = path.find("#")
208        if pm > -1:
209            mark = path[pm + 1 :]
210            path = path[0:pm]
211        #
212        # copy file to its home location
213        #
214        self._copy_in(path, home)
215        name_home = self.named_file_home(name)
216        rpath, h = self._fingerprint(home)
217        mdata = FileMetadata(self.csvpaths.config)
218        mdata.named_file_name = name
219        #
220        # we need the declared path, incl. any extra path info, in order
221        # to know if we are being pointed at a sub-portion of the data, e.g.
222        # an excel worksheet.
223        #
224        path = f"{path}#{mark}" if mark else path
225        mdata.origin_path = path
226        mdata.archive_name = self._csvpaths.config.archive_name
227        mdata.fingerprint = h
228        mdata.file_path = rpath
229        mdata.file_home = file_home
230        mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :]
231        mdata.name_home = name_home
232        mdata.mark = mark
233        self.registrar.register_complete(mdata)
def get_named_file(self, name: str) -> str:
275    def get_named_file(self, name: str) -> str:
276        ret = None
277        if name.startswith("$"):
278            ref = ReferenceParser(name)
279            if ref.datatype != ReferenceParser.RESULTS:
280                raise InputException(
281                    f"Reference datatype must be {ReferenceParser.RESULTS}"
282                )
283            reman = self._csvpaths.results_manager
284            ret = reman.data_file_for_reference(name)
285        else:
286            if not self.name_exists(name):
287                return None
288            n = self.named_file_home(name)
289            ret = self.registrar.registered_file(n)
290        return ret