1
2
3 """
4 Rename files as by the ISBN buried in their original name.
5
6 """
7
8
9
10
11 __docformat__ = 'restructuredtext en'
12
13
14
15
16 import logging
17 import sys, re
18 from os import path, rename
19 from optparse import OptionParser
20 from exceptions import BaseException
21
22 from biblio.webquery import errors
23 from config import *
24 from common import *
25
26
27
28
29 ISBN10_PAT = r'(\d{9}[\d|X])'
30 ISBN13_PAT = r'(\d{13})'
31
32 ISBN_PATS = [
33 r'\(ISBN([^\)]+)\)',
34 r'^(\d{13})$',
35 r'^(\d{13})[\b|_|\.|\-|\s]',
36 r'[\b|_|\.|\-|\s](\d{13})$',
37 r'[\b|_|\.|\-|\s](\d{13})[\b|_|\.]',
38 r'^(\d{9}[\d|X])$',
39 r'^(\d{9}[\d|X])[\b|_|\.|\s|\-]',
40 r'[\b|_|\.|\-|\s](\d{9}[\d|X])$',
41 r'[\b|_|\.|\-|\s](\d{9}[\d|X])[\b|_|\.|\-|\s]',
42 r'ISBN\s*(\d{13})',
43 r'ISBN\s*(\d{9}[\d|X])',
44 r'[\[\(](\d{9}[\d|X])[\]\)]',
45 r'\D(\d{13})$',
46 r'\D(\d{9}[\d|X])$',
47
48 ]
49
50 ISBN_RE = [re.compile (p, re.IGNORECASE) for p in ISBN_PATS]
51
52 _DEV_MODE = True
53
54 DEF_NAME_FMT = '%(auth)s%(year)s_%(short_title)s_(isbn%(isbn)s)'
55 DEF_STRIP_CHARS = ''':!,'".?()'''
56 DEF_BLANK_CHARS = ''
57 STRIP_CHARS_RE = re.compile ('[\'\":\,!\.\?\(\)]')
58
59 COLLAPSE_SPACE_RE = re.compile (r'\s+')
60
61
62 CASE_CHOICES = [
63 'orig',
64 'upper',
65 'lower',
66 ]
67
68
69
70
72
73 usage = '%prog [options] FILES ...'
74 version = "version %s" % script_version
75 description='Extract an ISBN from a file name, look up the associated ' \
76 'bibliographic information in a webservice and rename the file ' \
77 'appropriately.'
78 epilog='ISBNs are extracted from filenames by pure heuristics - obviously ' \
79 'not all forms will be found. ' \
80 'The new name is generated first before the various processing ' \
81 'options are applied. In order, characters are stripped from the ' \
82 'name, excess whitespace is collapsed and then the case conversion ' \
83 'is applied. The file extension, if any, is removed before renaming ' \
84 'and re-applied afterwards. ' \
85 'We suggest you try a dryrun before renaming any files.'
86 optparser = OptionParser (usage=usage, version=version, epilog=epilog,
87 description=description)
88 add_shared_options (optparser)
89
90 optparser.add_option ('--case', '-c',
91 dest='case',
92 help="Case conversion of the new file name. Choices are %s." \
93 "The default is %s. " % (', '.join (CASE_CHOICES), CASE_CHOICES[0]),
94 choices=CASE_CHOICES,
95 default=CASE_CHOICES[0],
96 ),
97
98 optparser.add_option ('--leave_whitespace',
99 action='store_true',
100 dest='leave_whitespace',
101 help="Leave excess whitespace. By default, consecutive spaces in " \
102 "names are compacted",
103 default=False,
104 )
105
106 optparser.add_option ('--replace_whitespace',
107 dest='replace_whitespace',
108 help="Replace whitespace in the new name with this string.",
109 default='',
110 )
111
112 optparser.add_option ('--strip_chars',
113 dest='strip_chars',
114 help="Remove these characters from the new name. By default " \
115 "this are '%s'." % DEF_STRIP_CHARS,
116 default=DEF_STRIP_CHARS,
117 )
118
119 optparser.add_option ('--overwrite',
120 action='store_true',
121 dest='overwrite',
122 help="Overwrite existing files.",
123 default=False,
124 )
125
126 optparser.add_option ('--dryrun',
127 action='store_true',
128 dest='dryrun',
129 help="Check function and without renaming files.",
130 default=False,
131 )
132
133 optparser.add_option ('--template',
134 dest='template',
135 help="The form to use for renaming the file. The fields recognised are " \
136 "auth (primary authors family name), " \
137 "title (full title of the book), " \
138 "short_title (abbreviated title), " \
139 "isbn, " \
140 "year (year of publication). The default is '%s'." % DEF_NAME_FMT,
141 default=DEF_NAME_FMT,
142 )
143
144 optparser.add_option ('--unknown',
145 dest='unknown',
146 help="Use this string if value is undefined.",
147 default='unknown',
148 )
149
150
151 options, fpaths = optparser.parse_args()
152
153 if (not fpaths):
154 optparser.error ('No files specified')
155 check_shared_options (options, optparser)
156
157
158 return fpaths, options
159
160
162 """
163 Return a files base name and extension from it's path.
164 """
165 fdir, fname = path.split (fpath)
166 base, ext = path.splitext (fname)
167 return fdir, base, ext
168
169
171 """
172 Rename a file, while keeping it in the same location.
173 """
174 fdir, fname = path.split (oldpath)
175 newpath = path.join (fdir, newname)
176 rename (oldpath, newpath)
177
178
180 for r in ISBN_RE:
181 match = r.search (fname)
182 if match:
183 return match.group(1)
184 return None
185
186
188 if (bibrec.authors):
189 primary_auth = bibrec.authors[0]
190 auth_str = primary_auth.family or primary_auth.given
191 else:
192 auth_str = options.unknown
193 logging.info ('~ found %s - %s' % (auth_str, bibrec.title))
194 return options.template % {
195 'auth': auth_str,
196 'year': bibrec.year or options.unknown,
197 'short_title': bibrec.short_title or options.unknown,
198 'title': bibrec.title or options.unknown,
199 'isbn': bibrec.id or options.unknown,
200 }
201
202
203 -def postprocess_name (name, options):
204
205 assert (name)
206
207
208 for c in options.strip_chars:
209 name = name.replace (c, '')
210
211 if (not options.leave_whitespace):
212 name = COLLAPSE_SPACE_RE.sub (' ', name.strip())
213 if (options.replace_whitespace):
214 name = name.replace (' ', options.replace_whitespace)
215
216 if (options.case == 'lower'):
217 name = name.lower()
218 elif (options.case == 'upper'):
219 name = name.upper()
220
221 return name
222
223
225 fpath_list, options = parse_args()
226 logging.basicConfig (level=logging.INFO, stream=sys.stdout,
227 format= "%(message)s")
228 try:
229 webqry = construct_webquery (options.webservice, options.service_key)
230 for fpath in fpath_list:
231 logging.info ('Original %s ...' % fpath)
232 fdir, base, ext = dir_base_ext_from_path (fpath)
233 isbn = extract_isbn_from_filename (base)
234 logging.info ('~ extracted ISBN %s ...' % isbn)
235 if (isbn):
236 try:
237 bibrec_list = webqry.query_bibdata_by_isbn (isbn,
238 format='bibrecord')
239 if (bibrec_list):
240 bibinfo = bibrec_list[0]
241 new_name = generate_new_name (bibinfo, options)
242 new_name = postprocess_name (new_name, options)
243 logging.info ('~ new name %s.' % new_name)
244 newpath = path.join (fdir, new_name + ext)
245 logging.info ('~ new path %s.' % newpath)
246 rename_file = not (options.dryrun)
247 if (path.exists (newpath)):
248 logging.info ('~ path already exists')
249 if not options.overwrite:
250 rename_file = False
251 if (rename_file):
252 logging.info ('~ renaming file')
253 rename (fpath, newpath)
254 else:
255 logging.info ('- no records returned')
256 except errors.QueryError, err:
257 logging.info ('- query failed: %s.' % err)
258 else:
259 print logging.info ('- no isbn extracted')
260
261 except BaseException, err:
262 if (_DEV_MODE or options.debug):
263 raise
264 else:
265 sys.exit (err)
266 except:
267 if (_DEV_MODE or option.debug):
268 raise
269 else:
270 sys.exit ("An unknown error occurred.")
271
272
273
274
275
276
277 if __name__ == '__main__':
278 main()
279
280
281
282