1 """
2 Miscellaneous utilities for use in writing and using commands.
3
4 Often we will want to name or create a file based on from pre-existing file
5 paths:
6
7 * create an output file in the same directory as an input file
8 * clean up some data and save it in a file of the same name with "-clean"
9 appended, but before the extension.
10 * use the same extension for an output file as an input file.
11 * what is the extension or name of this file anyway?
12
13 Thus, here we provide a set of functions for extracting components from a file
14 path and extrapolating new names from the same. For consistency, here is the
15 terminology used:
16
17 dir
18 the path of the directory a file is in
19 base
20 the local name of a file, e.g. "foo.txt", "schedule.doc"
21 ext
22 the extension of a file, e.g. ".txt", ".doc". Note this includes the
23 separating period, so that the extensions of files "foo.txt", "foo." and
24 "foo" and ".txt", "." and "" respectively.
25 stem
26 the base file name, without the extension, e.g. "foo" and "schedule" for
27 "foo.txt", "schedule.doc" respectively.
28 substem
29 the stem of a file name, less the final "word" e.g. "foo" and "schedule_abc"
30 for "foo-bar.txt" and "schedule_abc_2011.doc" respectively.
31
32 """
33
34
35
36 import re
37 from os import path
38 from datetime import datetime
39
40
41
42
43 INTERP_RE = re.compile (r'\{([^}]+)\}')
44 SUBSTEM_RE = re.compile (r'.([\.\-]\S+)$')
45
46
47
48
50 """
51 Return the directory and file name of a path.
52
53 :Parameters:
54 p
55 a file name or path
56
57 :Returns:
58 the directory and base name of the file path
59
60 For example::
61
62 >>> dir_base ('bar.foo')
63 ('', 'bar.foo')
64 >>> dir_base ('/bar.foo')
65 ('/', 'bar.foo')
66 >>> dir_base ('baz/bar.foo')
67 ('baz/', 'bar.foo')
68 >>> dir_base ('/quux/baz/bar.foo')
69 ('/quux/baz/', 'bar.foo')
70
71 We use here the Python convention of referring to the file name (as opposed
72 to path) as the "base". Also, there's an apparent oddity in the native
73 splitting of paths, to do with the parent directory::
74
75 >>> path.split ('bar.foo')
76 ('', 'bar.foo')
77 >>> path.split ('/bar.foo')
78 ('/', 'bar.foo')
79 >>> path.split ('baz/bar.foo')
80 ('baz', 'bar.foo')
81
82 That is, the dividing filesep only appears if there is nothing else in the
83 directory component. This makes things difficult if you are trying to pick
84 apart a file path and reassemble it - do I need a file separator? was there
85 one there before? Fortunately path.join is reasonably clever about fusing
86 paths, but this is still an inconsistency and so is fixed here.
87
88 """
89
90 d, b = path.split (p)
91 if (d and (not d.endswith (path.sep))):
92 d += path.sep
93 return d, b
94
95 dirbase_from_path = dir_base
96
97
99 """
100 Return the directory, stem of the file name and extension from a path.
101
102 :Parameters:
103 p
104 a file name or path
105
106 :Returns:
107 the directory and base name stem and extension of the file path
108
109 For example::
110
111 >>> dir_stem_ext ('bar.foo')
112 ('', 'bar', '.foo')
113 >>> dir_stem_ext ('bar.')
114 ('', 'bar', '.')
115 >>> dir_stem_ext ('bar')
116 ('', 'bar', '')
117 >>> dir_stem_ext ('baz/bar.foo')
118 ('baz/', 'bar', '.foo')
119 >>> dir_stem_ext ('/quux/baz/bar.foo')
120 ('/quux/baz/', 'bar', '.foo')
121
122 Note this uses the file separator convention of ``dir_base``.
123
124 """
125 dir, base = dir_base (p)
126 stem, ext = path.splitext (base)
127 return dir, stem, ext
128
129 dirstemext_from_path = dir_stem_ext
130
131
133 """
134 Split the 'stem' of a file name into a core name and a trailing modifier.
135
136 :Parameters:
137 s
138 a file (base) name
139
140 :Returns:
141 the substem and modifier (trailing word) of the file name
142
143 Many file names are structured as "foo-mod.ext" or "foo.mod.ext" where "mod"
144 is some qualifier, e.g. "report-2.doc", "programme.20101201.txt". This
145 sniffs out the usual forms of these endings and if found, splits and
146 returns the name there.
147
148 For example::
149
150 >>> substem_mod ('report-2')
151 ('report', '-2')
152 >>> substem_mod ('programme.20101201')
153 ('programme', '.20101201')
154 >>> substem_mod ('report 2')
155 ('report 2', '')
156 >>> substem_mod ('programme_20101201')
157 ('programme_20101201', '')
158
159 """
160
161
162
163
164
165
166
167 match = SUBSTEM_RE.search (s)
168 if (match):
169 return s[:match.start(1)], s[match.start(1):]
170 else:
171 return s, ''
172
173
175 """
176 Interpolate the bracketed sections of the passed string as keyed substrings.
177
178 :Parameters:
179 str
180 a string containing parens delimited locations for substitution
181 sub_hash : dict
182 words and substitutions to be used on the string
183
184 :Returns:
185 the string with substitutions made
186
187 The intent for this is as a very simple templating or substitution system to
188 be used in configurations and the like.
189
190 For example::
191
192 >>> d = {'foo': '123', 'bar': '456'}
193 >>> interpolate ('abcdef', d)
194 'abcdef'
195 >>> interpolate ('abc{foo}def', d)
196 'abc123def'
197 >>> interpolate ('ab{foo}cd{bar}ef', d)
198 'ab123cd456ef'
199
200 """
201
202
203
204 return INTERP_RE.sub (lambda x: sub_hash[x.group(0)[1:-1].strip()], str)
205
206
208 """
209 Interpolate using qualities of a file path.
210
211 :Parameters:
212 p : str
213 a file name or path
214 tmpl : str
215 the template for the output, containing parens delimited locations
216 for substitution
217 subs : dict
218 additional words and substitutions to be used on the string
219
220 :Returns:
221 the template string with substitutions made
222
223 This allows a new file name or path (or actually any string) to be generated
224 by interpolation from a file path. This allows construction of paths to files
225 in the same directory, files with the same name but different extension,
226 files with the same name except for a suffix, etc.
227
228 The substitution keywords are:
229
230 ext
231 input path file extension, e.g. ".txt"
232 base
233 input path file base (name), e.g. "foo-bar.txt"
234 stem
235 input path file name stem, e.g. "foo-bar"
236 dir
237 input path directory, e.g. "quux/"
238 dirstem
239 input path directory and stem, e.g. "quux/foo-bar"
240 substem
241 input path substem, e.g. "foo"
242 mod
243 input path modifier, e.g. "-bar"
244 date
245 current date
246 time
247 current time
248
249
250 For example::
251
252 >>> pth = '/foo/bar.baz'
253 >>> interpolate_from_path (pth, '{stem}.new{ext}')
254 'bar.new.baz'
255 >>> d = {'prefix': 'PRFX', 'ext': '.txt'}
256 >>> interpolate_from_path (pth, '{prefix}{stem}.new{ext}', d)
257 'PRFXbar.new.txt'
258
259 """
260
261
262
263 _, b = dir_base (p)
264 d, s, e = dir_stem_ext (p)
265 ss, m = substem_mod (s)
266 now = datetime.now()
267 default_subs = {
268 "ext": e,
269 "base": b,
270 "stem": s,
271 "dir": d,
272 "dirstem": d+s,
273 "substem": ss,
274 "mod": m,
275 "date": now.strftime ("%Y%m%d"),
276 "time": now.strftime ("%H%M%S"),
277 "datetime": now.strftime ("%Y%m%dT%H%M%S"),
278 }
279 default_subs.update (subs)
280 return interpolate (tmpl, default_subs)
281
282
283
284
285
286
287 if __name__ == "__main__":
288 import doctest
289 doctest.testmod()
290
291
292