Package psicons :: Package core :: Module utils
[hide private]
[frames] | no frames]

Source Code for Module psicons.core.utils

  1  """
 
  2  Miscellaneous utilities for use in writing and using commands.
 
  3  
 
  4  Often we will want to name or create a file based on  from pre-existing file
 
  5  paths:
 
  6  
 
  7  * create an output file in the same directory as an input file
 
  8  * clean up some data and save it in a file of the same name with "-clean"
 
  9    appended, but before the extension.
 
 10  * use the same extension for an output file as an input file.
 
 11  * what is the extension or name of this file anyway?
 
 12  
 
 13  Thus, here we provide a set of functions for extracting components from a file
 
 14  path and extrapolating new names from the same. For consistency, here is the
 
 15  terminology used:
 
 16  
 
 17  dir
 
 18          the path of the directory a file is in
 
 19  base
 
 20          the local name of a file, e.g. "foo.txt", "schedule.doc"
 
 21  ext
 
 22          the extension of a file, e.g. ".txt", ".doc". Note this includes the
 
 23          separating period, so that the extensions of files "foo.txt", "foo." and
 
 24          "foo" and ".txt", "." and "" respectively.
 
 25  stem
 
 26          the base file name, without the extension, e.g. "foo" and "schedule" for
 
 27          "foo.txt", "schedule.doc" respectively.
 
 28  substem
 
 29          the stem of a file name, less the final "word" e.g. "foo" and "schedule_abc"
 
 30          for "foo-bar.txt" and "schedule_abc_2011.doc" respectively.
 
 31  
 
 32  """ 
 33  
 
 34  ### IMPORTS
 
 35  
 
 36  import re 
 37  from os import path 
 38  from datetime import datetime 
 39  
 
 40  
 
 41  ### CONSTANTS & DEFINES
 
 42  
 
 43  INTERP_RE = re.compile (r'\{([^}]+)\}') 
 44  SUBSTEM_RE = re.compile (r'.([\.\-]\S+)$') 
 45  
 
 46  
 
 47  ### IMPLEMENTATION
 
 48  
 
49 -def dir_base (p):
50 """ 51 Return the directory and file name of a path. 52 53 :Parameters: 54 p 55 a file name or path 56 57 :Returns: 58 the directory and base name of the file path 59 60 For example:: 61 62 >>> dir_base ('bar.foo') 63 ('', 'bar.foo') 64 >>> dir_base ('/bar.foo') 65 ('/', 'bar.foo') 66 >>> dir_base ('baz/bar.foo') 67 ('baz/', 'bar.foo') 68 >>> dir_base ('/quux/baz/bar.foo') 69 ('/quux/baz/', 'bar.foo') 70 71 We use here the Python convention of referring to the file name (as opposed 72 to path) as the "base". Also, there's an apparent oddity in the native 73 splitting of paths, to do with the parent directory:: 74 75 >>> path.split ('bar.foo') 76 ('', 'bar.foo') 77 >>> path.split ('/bar.foo') 78 ('/', 'bar.foo') 79 >>> path.split ('baz/bar.foo') 80 ('baz', 'bar.foo') 81 82 That is, the dividing filesep only appears if there is nothing else in the 83 directory component. This makes things difficult if you are trying to pick 84 apart a file path and reassemble it - do I need a file separator? was there 85 one there before? Fortunately path.join is reasonably clever about fusing 86 paths, but this is still an inconsistency and so is fixed here. 87 88 """ 89 # TODO: adapt to work with non-Unix file systems (use filesep) 90 d, b = path.split (p) 91 if (d and (not d.endswith (path.sep))): 92 d += path.sep 93 return d, b
94 95 dirbase_from_path = dir_base 96 97 # TODO: rename as split?
98 -def dir_stem_ext (p):
99 """ 100 Return the directory, stem of the file name and extension from a path. 101 102 :Parameters: 103 p 104 a file name or path 105 106 :Returns: 107 the directory and base name stem and extension of the file path 108 109 For example:: 110 111 >>> dir_stem_ext ('bar.foo') 112 ('', 'bar', '.foo') 113 >>> dir_stem_ext ('bar.') 114 ('', 'bar', '.') 115 >>> dir_stem_ext ('bar') 116 ('', 'bar', '') 117 >>> dir_stem_ext ('baz/bar.foo') 118 ('baz/', 'bar', '.foo') 119 >>> dir_stem_ext ('/quux/baz/bar.foo') 120 ('/quux/baz/', 'bar', '.foo') 121 122 Note this uses the file separator convention of ``dir_base``. 123 124 """ 125 dir, base = dir_base (p) 126 stem, ext = path.splitext (base) 127 return dir, stem, ext
128 129 dirstemext_from_path = dir_stem_ext 130 131
132 -def substem_mod (s):
133 """ 134 Split the 'stem' of a file name into a core name and a trailing modifier. 135 136 :Parameters: 137 s 138 a file (base) name 139 140 :Returns: 141 the substem and modifier (trailing word) of the file name 142 143 Many file names are structured as "foo-mod.ext" or "foo.mod.ext" where "mod" 144 is some qualifier, e.g. "report-2.doc", "programme.20101201.txt". This 145 sniffs out the usual forms of these endings and if found, splits and 146 returns the name there. 147 148 For example:: 149 150 >>> substem_mod ('report-2') 151 ('report', '-2') 152 >>> substem_mod ('programme.20101201') 153 ('programme', '.20101201') 154 >>> substem_mod ('report 2') 155 ('report 2', '') 156 >>> substem_mod ('programme_20101201') 157 ('programme_20101201', '') 158 159 """ 160 # TODO: what do Macs do with file copies? 161 # TODO: what about the windows 'foo (2).txt' pattern? Need multi regexes? 162 # XXX: do we need the word separator? 163 # XXX: actually would it just be more consistent to split into words and 164 # return [:-1], [-1] 165 # TODO: what is there's no end word, only one word? 166 # XXX: substem and mod are horrible names. Use words? 167 match = SUBSTEM_RE.search (s) 168 if (match): 169 return s[:match.start(1)], s[match.start(1):] 170 else: 171 return s, ''
172 173
174 -def interpolate (str, sub_hash):
175 """ 176 Interpolate the bracketed sections of the passed string as keyed substrings. 177 178 :Parameters: 179 str 180 a string containing parens delimited locations for substitution 181 sub_hash : dict 182 words and substitutions to be used on the string 183 184 :Returns: 185 the string with substitutions made 186 187 The intent for this is as a very simple templating or substitution system to 188 be used in configurations and the like. 189 190 For example:: 191 192 >>> d = {'foo': '123', 'bar': '456'} 193 >>> interpolate ('abcdef', d) 194 'abcdef' 195 >>> interpolate ('abc{foo}def', d) 196 'abc123def' 197 >>> interpolate ('ab{foo}cd{bar}ef', d) 198 'ab123cd456ef' 199 200 """ 201 # XXX: actually, do we need to do this? are python keywords all that bad? 202 # maybe we should just use a proper templating system? 203 # TODO: what does this do with proper python substitutions? 204 return INTERP_RE.sub (lambda x: sub_hash[x.group(0)[1:-1].strip()], str)
205 206
207 -def interpolate_from_path (p, tmpl, subs={}):
208 """ 209 Interpolate using qualities of a file path. 210 211 :Parameters: 212 p : str 213 a file name or path 214 tmpl : str 215 the template for the output, containing parens delimited locations 216 for substitution 217 subs : dict 218 additional words and substitutions to be used on the string 219 220 :Returns: 221 the template string with substitutions made 222 223 This allows a new file name or path (or actually any string) to be generated 224 by interpolation from a file path. This allows construction of paths to files 225 in the same directory, files with the same name but different extension, 226 files with the same name except for a suffix, etc. 227 228 The substitution keywords are: 229 230 ext 231 input path file extension, e.g. ".txt" 232 base 233 input path file base (name), e.g. "foo-bar.txt" 234 stem 235 input path file name stem, e.g. "foo-bar" 236 dir 237 input path directory, e.g. "quux/" 238 dirstem 239 input path directory and stem, e.g. "quux/foo-bar" 240 substem 241 input path substem, e.g. "foo" 242 mod 243 input path modifier, e.g. "-bar" 244 date 245 current date 246 time 247 current time 248 249 250 For example:: 251 252 >>> pth = '/foo/bar.baz' 253 >>> interpolate_from_path (pth, '{stem}.new{ext}') 254 'bar.new.baz' 255 >>> d = {'prefix': 'PRFX', 'ext': '.txt'} 256 >>> interpolate_from_path (pth, '{prefix}{stem}.new{ext}', d) 257 'PRFXbar.new.txt' 258 259 """ 260 # ???: order args as they are used 261 # TODO: some sort of number up (tested or untested) or counter 262 # TODO: a default output template (so you only need to def subs?) 263 _, b = dir_base (p) 264 d, s, e = dir_stem_ext (p) 265 ss, m = substem_mod (s) 266 now = datetime.now() 267 default_subs = { 268 "ext": e, 269 "base": b, 270 "stem": s, 271 "dir": d, 272 "dirstem": d+s, 273 "substem": ss, 274 "mod": m, 275 "date": now.strftime ("%Y%m%d"), 276 "time": now.strftime ("%H%M%S"), 277 "datetime": now.strftime ("%Y%m%dT%H%M%S"), 278 } 279 default_subs.update (subs) 280 return interpolate (tmpl, default_subs)
281 282 283 284 285 ### MAIN 286 287 if __name__ == "__main__": 288 import doctest 289 doctest.testmod() 290 291 ### END 292