csvpath.managers.files.file_manager
1import os 2import json 3import csv 4from json import JSONDecodeError 5from csvpath.util.file_readers import DataFileReader 6from csvpath.util.file_writers import DataFileWriter 7from csvpath.util.reference_parser import ReferenceParser 8from csvpath.util.exceptions import InputException, FileException 9from csvpath.util.nos import Nos 10from .file_registrar import FileRegistrar 11from .file_cacher import FileCacher 12from .file_metadata import FileMetadata 13 14 15class FileManager: 16 def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None): 17 """@private""" 18 if named_files is None: 19 named_files = {} 20 self._csvpaths = csvpaths 21 self.registrar = FileRegistrar(csvpaths) 22 """@private""" 23 self.cacher = FileCacher(csvpaths) 24 """@private""" 25 26 @property 27 def csvpaths(self): 28 """@private""" 29 return self._csvpaths 30 31 # 32 # named file dir is like: inputs/named_files 33 # 34 @property 35 def named_files_dir(self) -> str: 36 """@private""" 37 return self._csvpaths.config.inputs_files_path 38 39 # 40 # the root manifest file tracking all name-file stagings. note that 41 # this is created by an optional listener. it is possible to run without 42 # creating the root manifest or capturing the data with another listener. 43 # 44 @property 45 def files_root_manifest(self) -> dict: 46 """@private""" 47 p = self.files_root_manifest_path 48 if Nos(p).exists(): 49 with DataFileReader(p) as reader: 50 return json.load(reader.source) 51 return None 52 53 @property 54 def files_root_manifest_path(self) -> dict: 55 """@private""" 56 return os.path.join(self.named_files_dir, "manifest.json") 57 58 # 59 # named-file homes are a dir like: inputs/named_files/March-2024/March-2024.csv 60 # 61 def named_file_home(self, name: str) -> str: 62 """@private""" 63 # 64 # not a named-file name 65 # 66 if name.find("/") > -1: 67 # 68 # this is definitely not what we should be returning. but it is what 69 # works in the new world of remote and fully-qualified local paths. 70 # for now, going with it. the previous implementation was wonky too, 71 # in a different and not visible way, but not good, so this is a step 72 # up in multiple ways. 73 # 74 return "" 75 # 76 # added 77 # 78 home = None 79 if name.startswith("/"): 80 home = name 81 else: 82 # 83 # done add 84 # 85 home = os.path.join(self.named_files_dir, name) 86 # 87 # added 88 # 89 if Nos(home).isfile(): 90 home = home[0 : home.rfind(Nos(home).sep)] 91 # 92 # done add 93 # 94 return home 95 96 def assure_named_file_home(self, name: str) -> str: 97 """@private""" 98 home = self.named_file_home(name) 99 if not os.path.exists(home): 100 Nos(home).makedirs() 101 return home 102 103 # 104 # file homes are paths to files like: 105 # inputs/named_files/March-2024/March-2024.csv/March-2024.csv 106 # which become paths to fingerprint-named file versions like: 107 # inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv 108 # once the files have been fingerprinted 109 # 110 def assure_file_home(self, name: str, path: str) -> str: 111 """@private""" 112 if path.find("#") > -1: 113 path = path[0 : path.find("#")] 114 sep = Nos(path).sep 115 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 116 home = self.named_file_home(name) 117 home = os.path.join(home, fname) 118 if not Nos(home).exists(): 119 Nos(home).makedirs() 120 return home 121 122 @property 123 def named_files_count(self) -> int: 124 """@private""" 125 return len(self.named_file_names) 126 127 @property 128 def named_file_names(self) -> list: 129 """@private""" 130 b = self.named_files_dir 131 ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()] 132 return ns 133 134 def name_exists(self, name: str) -> bool: 135 """@private""" 136 p = self.named_file_home(name) 137 b = Nos(p).dir_exists() 138 return b 139 140 def remove_named_file(self, name: str) -> None: 141 """@private""" 142 p = os.path.join(self.named_files_dir, name) 143 Nos(p).remove() 144 145 def remove_all_named_files(self) -> None: 146 """@private""" 147 names = self.named_file_names 148 for name in names: 149 self.remove_named_file(name) 150 151 def set_named_files(self, nf: dict[str, str]) -> None: 152 """@private""" 153 for k, v in nf.items(): 154 self.add_named_file(name=k, path=v) 155 156 def set_named_files_from_json(self, filename: str) -> None: 157 """named-files from json files are always local""" 158 try: 159 # 160 # TODO: named-files json files are always local. they should 161 # be able to be on s3 so that we are completely independent of 162 # the local disk w/re file manager 163 # 164 with open(filename, "r", encoding="utf-8") as f: 165 j = json.load(f) 166 self.set_named_files(j) 167 except (OSError, ValueError, TypeError, JSONDecodeError) as ex: 168 self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}") 169 if self.csvpaths.ecoms.do_i_raise(): 170 raise 171 172 def add_named_files_from_dir(self, dirname: str): 173 dlist = Nos(dirname).listdir() 174 base = dirname 175 for p in dlist: 176 _ = p.lower() 177 ext = p[p.rfind(".") + 1 :].strip().lower() 178 if ext in self._csvpaths.config.csv_file_extensions: 179 name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")] 180 path = os.path.join(base, p) 181 self.add_named_file(name=name, path=path) 182 else: 183 self._csvpaths.logger.debug( 184 "%s is not in accept list", os.path.join(base, p) 185 ) 186 187 # 188 # ------------------------------------- 189 # 190 def add_named_file(self, *, name: str, path: str) -> None: 191 # 192 # create folder tree in inputs/named_files/name/filename 193 # 194 home = self.assure_file_home(name, path) 195 file_home = home 196 mark = None 197 # 198 # find mark if there. mark indicates a sheet. it is found 199 # as the trailing word after a # at the end of the path e.g. 200 # my-xlsx.xlsx#sheet2 201 # 202 hm = home.find("#") 203 if hm > -1: 204 mark = home[hm + 1 :] 205 home = home[0:hm] 206 pm = path.find("#") 207 if pm > -1: 208 mark = path[pm + 1 :] 209 path = path[0:pm] 210 # 211 # copy file to its home location 212 # 213 self._copy_in(path, home) 214 name_home = self.named_file_home(name) 215 rpath, h = self._fingerprint(home) 216 mdata = FileMetadata(self.csvpaths.config) 217 mdata.named_file_name = name 218 # 219 # we need the declared path, incl. any extra path info, in order 220 # to know if we are being pointed at a sub-portion of the data, e.g. 221 # an excel worksheet. 222 # 223 path = f"{path}#{mark}" if mark else path 224 mdata.origin_path = path 225 mdata.archive_name = self._csvpaths.config.archive_name 226 mdata.fingerprint = h 227 mdata.file_path = rpath 228 mdata.file_home = file_home 229 mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :] 230 mdata.name_home = name_home 231 mdata.mark = mark 232 self.registrar.register_complete(mdata) 233 234 def _copy_in(self, path, home) -> None: 235 """@private""" 236 sep = Nos(path).sep 237 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 238 # creates 239 # a/file.csv -> named_files/name/file.csv/file.csv 240 # the dir name matching the resulting file name is correct 241 # once the file is landed and fingerprinted, the file 242 # name is changed. 243 temp = os.path.join(home, fname) 244 # 245 # this is another place that is too s3 vs. local. we'll have 246 # other source/sinks to support. 247 # 248 if path.startswith("s3:") and not home.startswith("s3"): 249 self._copy_down(path, temp, mode="wb") 250 elif path.startswith("s3:") and home.startswith("s3"): 251 Nos(path).copy(temp) 252 elif not path.startswith("s3:") and not home.startswith("s3"): 253 self._copy_down(path, temp, mode="wb") 254 elif not path.startswith("s3:") and home.startswith("s3"): 255 self._copy_down(path, temp, mode="wb") 256 else: 257 ... # not possible. just being explicit for the moment. 258 return temp 259 260 def _copy_down(self, path, temp, mode="wb") -> None: 261 """@private""" 262 with DataFileReader(path) as reader: 263 with DataFileWriter(path=temp, mode=mode) as writer: 264 for line in reader.next_raw(): 265 writer.append(line) 266 267 # 268 # can take a reference. the ref would only be expected to point 269 # to the results of a csvpath in a named-paths group. it would be 270 # in this form: $group.results.2024-01-01_10-15-20.mypath 271 # where this gets interesting is the datestamp identifing the 272 # run. we need to allow for var sub and/or other shortcuts 273 # 274 def get_named_file(self, name: str) -> str: 275 ret = None 276 if name.startswith("$"): 277 ref = ReferenceParser(name) 278 if ref.datatype != ReferenceParser.RESULTS: 279 raise InputException( 280 f"Reference datatype must be {ReferenceParser.RESULTS}" 281 ) 282 reman = self._csvpaths.results_manager 283 ret = reman.data_file_for_reference(name) 284 else: 285 if not self.name_exists(name): 286 return None 287 n = self.named_file_home(name) 288 ret = self.registrar.registered_file(n) 289 return ret 290 291 def get_fingerprint_for_name(self, name) -> str: 292 """@private""" 293 if name.startswith("$"): 294 # atm, we don't give fingerprints for references doing rewind/replay 295 return "" 296 # 297 # note: this is not creating fingerprints, just getting existing ones. 298 # 299 return self.registrar.get_fingerprint(self.named_file_home(name)) 300 301 # 302 # ------------------------------------- 303 # 304 def get_named_file_reader(self, name: str) -> DataFileReader: 305 """@private""" 306 path = self.get_named_file(name) 307 t = self.registrar.type_of_file(self.named_file_home(name)) 308 return FileManager.get_reader(path, filetype=t) 309 310 @classmethod 311 def get_reader( 312 cls, path: str, *, filetype: str = None, delimiter=None, quotechar=None 313 ) -> DataFileReader: 314 """@private""" 315 return DataFileReader( 316 path, filetype=filetype, delimiter=delimiter, quotechar=quotechar 317 ) 318 319 def _fingerprint(self, path) -> str: 320 """@private""" 321 sep = Nos(path).sep 322 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 323 t = None 324 i = fname.find(".") 325 if i > -1: 326 t = fname[i + 1 :] 327 else: 328 t = fname 329 i = t.find("#") 330 if i > -1: 331 t = t[0:i] 332 # 333 # creating the initial file name, where the file starts 334 # 335 fpath = os.path.join(path, fname) 336 h = None 337 # 338 # this version should work local and minimize traffic when in S3 339 # 340 hpath = None 341 remove_fpath = False 342 with DataFileReader(fpath) as f: 343 h = f.fingerprint() 344 # 345 # creating the new path using the fingerprint as filename 346 # 347 hpath = os.path.join(path, h) 348 if t is not None: 349 hpath = f"{hpath}.{t}" 350 # 351 # if we're re-adding the file we don't need to make 352 # another copy of it. re-adds are fine. 353 # 354 # need an s3 way to do this 355 remove_fpath = Nos(hpath).exists() 356 # 357 # if a first add, rename the file to the fingerprint + ext 358 # 359 if remove_fpath: 360 Nos(fpath).remove() 361 return hpath, h 362 if hpath: 363 Nos(fpath).rename(hpath) 364 return hpath, h
class
FileManager:
16class FileManager: 17 def __init__(self, *, named_files: dict[str, str] = None, csvpaths=None): 18 """@private""" 19 if named_files is None: 20 named_files = {} 21 self._csvpaths = csvpaths 22 self.registrar = FileRegistrar(csvpaths) 23 """@private""" 24 self.cacher = FileCacher(csvpaths) 25 """@private""" 26 27 @property 28 def csvpaths(self): 29 """@private""" 30 return self._csvpaths 31 32 # 33 # named file dir is like: inputs/named_files 34 # 35 @property 36 def named_files_dir(self) -> str: 37 """@private""" 38 return self._csvpaths.config.inputs_files_path 39 40 # 41 # the root manifest file tracking all name-file stagings. note that 42 # this is created by an optional listener. it is possible to run without 43 # creating the root manifest or capturing the data with another listener. 44 # 45 @property 46 def files_root_manifest(self) -> dict: 47 """@private""" 48 p = self.files_root_manifest_path 49 if Nos(p).exists(): 50 with DataFileReader(p) as reader: 51 return json.load(reader.source) 52 return None 53 54 @property 55 def files_root_manifest_path(self) -> dict: 56 """@private""" 57 return os.path.join(self.named_files_dir, "manifest.json") 58 59 # 60 # named-file homes are a dir like: inputs/named_files/March-2024/March-2024.csv 61 # 62 def named_file_home(self, name: str) -> str: 63 """@private""" 64 # 65 # not a named-file name 66 # 67 if name.find("/") > -1: 68 # 69 # this is definitely not what we should be returning. but it is what 70 # works in the new world of remote and fully-qualified local paths. 71 # for now, going with it. the previous implementation was wonky too, 72 # in a different and not visible way, but not good, so this is a step 73 # up in multiple ways. 74 # 75 return "" 76 # 77 # added 78 # 79 home = None 80 if name.startswith("/"): 81 home = name 82 else: 83 # 84 # done add 85 # 86 home = os.path.join(self.named_files_dir, name) 87 # 88 # added 89 # 90 if Nos(home).isfile(): 91 home = home[0 : home.rfind(Nos(home).sep)] 92 # 93 # done add 94 # 95 return home 96 97 def assure_named_file_home(self, name: str) -> str: 98 """@private""" 99 home = self.named_file_home(name) 100 if not os.path.exists(home): 101 Nos(home).makedirs() 102 return home 103 104 # 105 # file homes are paths to files like: 106 # inputs/named_files/March-2024/March-2024.csv/March-2024.csv 107 # which become paths to fingerprint-named file versions like: 108 # inputs/named_files/March-2024/March-2024.csv/12467d811d1589ede586e3a42c41046641bedc1c73941f4c21e2fd2966f188b4.csv 109 # once the files have been fingerprinted 110 # 111 def assure_file_home(self, name: str, path: str) -> str: 112 """@private""" 113 if path.find("#") > -1: 114 path = path[0 : path.find("#")] 115 sep = Nos(path).sep 116 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 117 home = self.named_file_home(name) 118 home = os.path.join(home, fname) 119 if not Nos(home).exists(): 120 Nos(home).makedirs() 121 return home 122 123 @property 124 def named_files_count(self) -> int: 125 """@private""" 126 return len(self.named_file_names) 127 128 @property 129 def named_file_names(self) -> list: 130 """@private""" 131 b = self.named_files_dir 132 ns = [n for n in Nos(b).listdir() if not Nos(os.path.join(b, n)).isfile()] 133 return ns 134 135 def name_exists(self, name: str) -> bool: 136 """@private""" 137 p = self.named_file_home(name) 138 b = Nos(p).dir_exists() 139 return b 140 141 def remove_named_file(self, name: str) -> None: 142 """@private""" 143 p = os.path.join(self.named_files_dir, name) 144 Nos(p).remove() 145 146 def remove_all_named_files(self) -> None: 147 """@private""" 148 names = self.named_file_names 149 for name in names: 150 self.remove_named_file(name) 151 152 def set_named_files(self, nf: dict[str, str]) -> None: 153 """@private""" 154 for k, v in nf.items(): 155 self.add_named_file(name=k, path=v) 156 157 def set_named_files_from_json(self, filename: str) -> None: 158 """named-files from json files are always local""" 159 try: 160 # 161 # TODO: named-files json files are always local. they should 162 # be able to be on s3 so that we are completely independent of 163 # the local disk w/re file manager 164 # 165 with open(filename, "r", encoding="utf-8") as f: 166 j = json.load(f) 167 self.set_named_files(j) 168 except (OSError, ValueError, TypeError, JSONDecodeError) as ex: 169 self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}") 170 if self.csvpaths.ecoms.do_i_raise(): 171 raise 172 173 def add_named_files_from_dir(self, dirname: str): 174 dlist = Nos(dirname).listdir() 175 base = dirname 176 for p in dlist: 177 _ = p.lower() 178 ext = p[p.rfind(".") + 1 :].strip().lower() 179 if ext in self._csvpaths.config.csv_file_extensions: 180 name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")] 181 path = os.path.join(base, p) 182 self.add_named_file(name=name, path=path) 183 else: 184 self._csvpaths.logger.debug( 185 "%s is not in accept list", os.path.join(base, p) 186 ) 187 188 # 189 # ------------------------------------- 190 # 191 def add_named_file(self, *, name: str, path: str) -> None: 192 # 193 # create folder tree in inputs/named_files/name/filename 194 # 195 home = self.assure_file_home(name, path) 196 file_home = home 197 mark = None 198 # 199 # find mark if there. mark indicates a sheet. it is found 200 # as the trailing word after a # at the end of the path e.g. 201 # my-xlsx.xlsx#sheet2 202 # 203 hm = home.find("#") 204 if hm > -1: 205 mark = home[hm + 1 :] 206 home = home[0:hm] 207 pm = path.find("#") 208 if pm > -1: 209 mark = path[pm + 1 :] 210 path = path[0:pm] 211 # 212 # copy file to its home location 213 # 214 self._copy_in(path, home) 215 name_home = self.named_file_home(name) 216 rpath, h = self._fingerprint(home) 217 mdata = FileMetadata(self.csvpaths.config) 218 mdata.named_file_name = name 219 # 220 # we need the declared path, incl. any extra path info, in order 221 # to know if we are being pointed at a sub-portion of the data, e.g. 222 # an excel worksheet. 223 # 224 path = f"{path}#{mark}" if mark else path 225 mdata.origin_path = path 226 mdata.archive_name = self._csvpaths.config.archive_name 227 mdata.fingerprint = h 228 mdata.file_path = rpath 229 mdata.file_home = file_home 230 mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :] 231 mdata.name_home = name_home 232 mdata.mark = mark 233 self.registrar.register_complete(mdata) 234 235 def _copy_in(self, path, home) -> None: 236 """@private""" 237 sep = Nos(path).sep 238 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 239 # creates 240 # a/file.csv -> named_files/name/file.csv/file.csv 241 # the dir name matching the resulting file name is correct 242 # once the file is landed and fingerprinted, the file 243 # name is changed. 244 temp = os.path.join(home, fname) 245 # 246 # this is another place that is too s3 vs. local. we'll have 247 # other source/sinks to support. 248 # 249 if path.startswith("s3:") and not home.startswith("s3"): 250 self._copy_down(path, temp, mode="wb") 251 elif path.startswith("s3:") and home.startswith("s3"): 252 Nos(path).copy(temp) 253 elif not path.startswith("s3:") and not home.startswith("s3"): 254 self._copy_down(path, temp, mode="wb") 255 elif not path.startswith("s3:") and home.startswith("s3"): 256 self._copy_down(path, temp, mode="wb") 257 else: 258 ... # not possible. just being explicit for the moment. 259 return temp 260 261 def _copy_down(self, path, temp, mode="wb") -> None: 262 """@private""" 263 with DataFileReader(path) as reader: 264 with DataFileWriter(path=temp, mode=mode) as writer: 265 for line in reader.next_raw(): 266 writer.append(line) 267 268 # 269 # can take a reference. the ref would only be expected to point 270 # to the results of a csvpath in a named-paths group. it would be 271 # in this form: $group.results.2024-01-01_10-15-20.mypath 272 # where this gets interesting is the datestamp identifing the 273 # run. we need to allow for var sub and/or other shortcuts 274 # 275 def get_named_file(self, name: str) -> str: 276 ret = None 277 if name.startswith("$"): 278 ref = ReferenceParser(name) 279 if ref.datatype != ReferenceParser.RESULTS: 280 raise InputException( 281 f"Reference datatype must be {ReferenceParser.RESULTS}" 282 ) 283 reman = self._csvpaths.results_manager 284 ret = reman.data_file_for_reference(name) 285 else: 286 if not self.name_exists(name): 287 return None 288 n = self.named_file_home(name) 289 ret = self.registrar.registered_file(n) 290 return ret 291 292 def get_fingerprint_for_name(self, name) -> str: 293 """@private""" 294 if name.startswith("$"): 295 # atm, we don't give fingerprints for references doing rewind/replay 296 return "" 297 # 298 # note: this is not creating fingerprints, just getting existing ones. 299 # 300 return self.registrar.get_fingerprint(self.named_file_home(name)) 301 302 # 303 # ------------------------------------- 304 # 305 def get_named_file_reader(self, name: str) -> DataFileReader: 306 """@private""" 307 path = self.get_named_file(name) 308 t = self.registrar.type_of_file(self.named_file_home(name)) 309 return FileManager.get_reader(path, filetype=t) 310 311 @classmethod 312 def get_reader( 313 cls, path: str, *, filetype: str = None, delimiter=None, quotechar=None 314 ) -> DataFileReader: 315 """@private""" 316 return DataFileReader( 317 path, filetype=filetype, delimiter=delimiter, quotechar=quotechar 318 ) 319 320 def _fingerprint(self, path) -> str: 321 """@private""" 322 sep = Nos(path).sep 323 fname = path if path.rfind(sep) == -1 else path[path.rfind(sep) + 1 :] 324 t = None 325 i = fname.find(".") 326 if i > -1: 327 t = fname[i + 1 :] 328 else: 329 t = fname 330 i = t.find("#") 331 if i > -1: 332 t = t[0:i] 333 # 334 # creating the initial file name, where the file starts 335 # 336 fpath = os.path.join(path, fname) 337 h = None 338 # 339 # this version should work local and minimize traffic when in S3 340 # 341 hpath = None 342 remove_fpath = False 343 with DataFileReader(fpath) as f: 344 h = f.fingerprint() 345 # 346 # creating the new path using the fingerprint as filename 347 # 348 hpath = os.path.join(path, h) 349 if t is not None: 350 hpath = f"{hpath}.{t}" 351 # 352 # if we're re-adding the file we don't need to make 353 # another copy of it. re-adds are fine. 354 # 355 # need an s3 way to do this 356 remove_fpath = Nos(hpath).exists() 357 # 358 # if a first add, rename the file to the fingerprint + ext 359 # 360 if remove_fpath: 361 Nos(fpath).remove() 362 return hpath, h 363 if hpath: 364 Nos(fpath).rename(hpath) 365 return hpath, h
def
set_named_files_from_json(self, filename: str) -> None:
157 def set_named_files_from_json(self, filename: str) -> None: 158 """named-files from json files are always local""" 159 try: 160 # 161 # TODO: named-files json files are always local. they should 162 # be able to be on s3 so that we are completely independent of 163 # the local disk w/re file manager 164 # 165 with open(filename, "r", encoding="utf-8") as f: 166 j = json.load(f) 167 self.set_named_files(j) 168 except (OSError, ValueError, TypeError, JSONDecodeError) as ex: 169 self.csvpaths.error_manager.handle_error(source=self, msg=f"{ex}") 170 if self.csvpaths.ecoms.do_i_raise(): 171 raise
named-files from json files are always local
def
add_named_files_from_dir(self, dirname: str):
173 def add_named_files_from_dir(self, dirname: str): 174 dlist = Nos(dirname).listdir() 175 base = dirname 176 for p in dlist: 177 _ = p.lower() 178 ext = p[p.rfind(".") + 1 :].strip().lower() 179 if ext in self._csvpaths.config.csv_file_extensions: 180 name = p if p.rfind(".") == -1 else p[0 : p.rfind(".")] 181 path = os.path.join(base, p) 182 self.add_named_file(name=name, path=path) 183 else: 184 self._csvpaths.logger.debug( 185 "%s is not in accept list", os.path.join(base, p) 186 )
def
add_named_file(self, *, name: str, path: str) -> None:
191 def add_named_file(self, *, name: str, path: str) -> None: 192 # 193 # create folder tree in inputs/named_files/name/filename 194 # 195 home = self.assure_file_home(name, path) 196 file_home = home 197 mark = None 198 # 199 # find mark if there. mark indicates a sheet. it is found 200 # as the trailing word after a # at the end of the path e.g. 201 # my-xlsx.xlsx#sheet2 202 # 203 hm = home.find("#") 204 if hm > -1: 205 mark = home[hm + 1 :] 206 home = home[0:hm] 207 pm = path.find("#") 208 if pm > -1: 209 mark = path[pm + 1 :] 210 path = path[0:pm] 211 # 212 # copy file to its home location 213 # 214 self._copy_in(path, home) 215 name_home = self.named_file_home(name) 216 rpath, h = self._fingerprint(home) 217 mdata = FileMetadata(self.csvpaths.config) 218 mdata.named_file_name = name 219 # 220 # we need the declared path, incl. any extra path info, in order 221 # to know if we are being pointed at a sub-portion of the data, e.g. 222 # an excel worksheet. 223 # 224 path = f"{path}#{mark}" if mark else path 225 mdata.origin_path = path 226 mdata.archive_name = self._csvpaths.config.archive_name 227 mdata.fingerprint = h 228 mdata.file_path = rpath 229 mdata.file_home = file_home 230 mdata.file_name = file_home[file_home.rfind(Nos(file_home).sep) + 1 :] 231 mdata.name_home = name_home 232 mdata.mark = mark 233 self.registrar.register_complete(mdata)
def
get_named_file(self, name: str) -> str:
275 def get_named_file(self, name: str) -> str: 276 ret = None 277 if name.startswith("$"): 278 ref = ReferenceParser(name) 279 if ref.datatype != ReferenceParser.RESULTS: 280 raise InputException( 281 f"Reference datatype must be {ReferenceParser.RESULTS}" 282 ) 283 reman = self._csvpaths.results_manager 284 ret = reman.data_file_for_reference(name) 285 else: 286 if not self.name_exists(name): 287 return None 288 n = self.named_file_home(name) 289 ret = self.registrar.registered_file(n) 290 return ret