Package biblio :: Package webquery :: Package scripts :: Module renamebyisbn
[hide private]
[frames] | no frames]

Source Code for Module biblio.webquery.scripts.renamebyisbn

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  """ 
  4  Rename files as by the ISBN buried in their original name. 
  5   
  6  """ 
  7  # TODO: throttle parameter? 
  8  # TODO: Amazon query? 
  9  # TODO: output in other formats? 
 10   
 11  __docformat__ = 'restructuredtext en' 
 12   
 13   
 14  ### IMPORTS ### 
 15   
 16  import logging 
 17  import sys, re 
 18  from os import path, rename 
 19  from optparse import OptionParser 
 20  from exceptions import BaseException 
 21   
 22  from biblio.webquery import errors 
 23  from config import * 
 24  from common import * 
 25   
 26   
 27  ### CONSTANTS & DEFINES ### 
 28   
 29  ISBN10_PAT = r'(\d{9}[\d|X])' 
 30  ISBN13_PAT = r'(\d{13})' 
 31   
 32  ISBN_PATS = [ 
 33     r'\(ISBN([^\)]+)\)', 
 34     r'^(\d{13})$', 
 35     r'^(\d{13})[\b|_|\.|\-|\s]', 
 36     r'[\b|_|\.|\-|\s](\d{13})$', 
 37     r'[\b|_|\.|\-|\s](\d{13})[\b|_|\.]', 
 38     r'^(\d{9}[\d|X])$', 
 39     r'^(\d{9}[\d|X])[\b|_|\.|\s|\-]', 
 40     r'[\b|_|\.|\-|\s](\d{9}[\d|X])$', 
 41     r'[\b|_|\.|\-|\s](\d{9}[\d|X])[\b|_|\.|\-|\s]', 
 42     r'ISBN\s*(\d{13})', 
 43     r'ISBN\s*(\d{9}[\d|X])', 
 44     r'[\[\(](\d{9}[\d|X])[\]\)]', 
 45     r'\D(\d{13})$', 
 46     r'\D(\d{9}[\d|X])$', 
 47   
 48  ] 
 49   
 50  ISBN_RE = [re.compile (p, re.IGNORECASE) for p in ISBN_PATS] 
 51   
 52  _DEV_MODE = True 
 53   
 54  DEF_NAME_FMT = '%(auth)s%(year)s_%(short_title)s_(isbn%(isbn)s)' 
 55  DEF_STRIP_CHARS = ''':!,'".?()''' 
 56  DEF_BLANK_CHARS = '' 
 57  STRIP_CHARS_RE = re.compile ('[\'\":\,!\.\?\(\)]') 
 58   
 59  COLLAPSE_SPACE_RE = re.compile (r'\s+') 
 60   
 61   
 62  CASE_CHOICES = [ 
 63     'orig', 
 64     'upper', 
 65     'lower', 
 66  ] 
 67   
 68   
 69  ### IMPLEMENTATION ### 
 70   
71 -def parse_args():
72 # Construct the option parser. 73 usage = '%prog [options] FILES ...' 74 version = "version %s" % script_version 75 description='Extract an ISBN from a file name, look up the associated ' \ 76 'bibliographic information in a webservice and rename the file ' \ 77 'appropriately.' 78 epilog='ISBNs are extracted from filenames by pure heuristics - obviously ' \ 79 'not all forms will be found. ' \ 80 'The new name is generated first before the various processing ' \ 81 'options are applied. In order, characters are stripped from the ' \ 82 'name, excess whitespace is collapsed and then the case conversion ' \ 83 'is applied. The file extension, if any, is removed before renaming ' \ 84 'and re-applied afterwards. ' \ 85 'We suggest you try a dryrun before renaming any files.' 86 optparser = OptionParser (usage=usage, version=version, epilog=epilog, 87 description=description) 88 add_shared_options (optparser) 89 90 optparser.add_option ('--case', '-c', 91 dest='case', 92 help="Case conversion of the new file name. Choices are %s." \ 93 "The default is %s. " % (', '.join (CASE_CHOICES), CASE_CHOICES[0]), 94 choices=CASE_CHOICES, 95 default=CASE_CHOICES[0], 96 ), 97 98 optparser.add_option ('--leave_whitespace', 99 action='store_true', 100 dest='leave_whitespace', 101 help="Leave excess whitespace. By default, consecutive spaces in " \ 102 "names are compacted", 103 default=False, 104 ) 105 106 optparser.add_option ('--replace_whitespace', 107 dest='replace_whitespace', 108 help="Replace whitespace in the new name with this string.", 109 default='', 110 ) 111 112 optparser.add_option ('--strip_chars', 113 dest='strip_chars', 114 help="Remove these characters from the new name. By default " \ 115 "this are '%s'." % DEF_STRIP_CHARS, 116 default=DEF_STRIP_CHARS, 117 ) 118 119 optparser.add_option ('--overwrite', 120 action='store_true', 121 dest='overwrite', 122 help="Overwrite existing files.", 123 default=False, 124 ) 125 126 optparser.add_option ('--dryrun', 127 action='store_true', 128 dest='dryrun', 129 help="Check function and without renaming files.", 130 default=False, 131 ) 132 133 optparser.add_option ('--template', 134 dest='template', 135 help="The form to use for renaming the file. The fields recognised are " \ 136 "auth (primary authors family name), " \ 137 "title (full title of the book), " \ 138 "short_title (abbreviated title), " \ 139 "isbn, " \ 140 "year (year of publication). The default is '%s'." % DEF_NAME_FMT, 141 default=DEF_NAME_FMT, 142 ) 143 144 optparser.add_option ('--unknown', 145 dest='unknown', 146 help="Use this string if value is undefined.", 147 default='unknown', 148 ) 149 150 # parse and check args 151 options, fpaths = optparser.parse_args() 152 153 if (not fpaths): 154 optparser.error ('No files specified') 155 check_shared_options (options, optparser) 156 157 ## Postconditions & return: 158 return fpaths, options
159 160
161 -def dir_base_ext_from_path (fpath):
162 """ 163 Return a files base name and extension from it's path. 164 """ 165 fdir, fname = path.split (fpath) 166 base, ext = path.splitext (fname) 167 return fdir, base, ext
168 169
170 -def rename_file (oldpath, newname):
171 """ 172 Rename a file, while keeping it in the same location. 173 """ 174 fdir, fname = path.split (oldpath) 175 newpath = path.join (fdir, newname) 176 rename (oldpath, newpath)
177 178
179 -def extract_isbn_from_filename (fname):
180 for r in ISBN_RE: 181 match = r.search (fname) 182 if match: 183 return match.group(1) 184 return None
185 186
187 -def generate_new_name (bibrec, options):
188 if (bibrec.authors): 189 primary_auth = bibrec.authors[0] 190 auth_str = primary_auth.family or primary_auth.given 191 else: 192 auth_str = options.unknown 193 logging.info ('~ found %s - %s' % (auth_str, bibrec.title)) 194 return options.template % { 195 'auth': auth_str, 196 'year': bibrec.year or options.unknown, 197 'short_title': bibrec.short_title or options.unknown, 198 'title': bibrec.title or options.unknown, 199 'isbn': bibrec.id or options.unknown, 200 }
201 202
203 -def postprocess_name (name, options):
204 ## Preconditions: 205 assert (name) 206 ## Main: 207 # strip chars from name 208 for c in options.strip_chars: 209 name = name.replace (c, '') 210 # clean up excess whitespace 211 if (not options.leave_whitespace): 212 name = COLLAPSE_SPACE_RE.sub (' ', name.strip()) 213 if (options.replace_whitespace): 214 name = name.replace (' ', options.replace_whitespace) 215 # harmomise case 216 if (options.case == 'lower'): 217 name = name.lower() 218 elif (options.case == 'upper'): 219 name = name.upper() 220 ## Return: 221 return name
222 223
224 -def main():
225 fpath_list, options = parse_args() 226 logging.basicConfig (level=logging.INFO, stream=sys.stdout, 227 format= "%(message)s") 228 try: 229 webqry = construct_webquery (options.webservice, options.service_key) 230 for fpath in fpath_list: 231 logging.info ('Original %s ...' % fpath) 232 fdir, base, ext = dir_base_ext_from_path (fpath) 233 isbn = extract_isbn_from_filename (base) 234 logging.info ('~ extracted ISBN %s ...' % isbn) 235 if (isbn): 236 try: 237 bibrec_list = webqry.query_bibdata_by_isbn (isbn, 238 format='bibrecord') 239 if (bibrec_list): 240 bibinfo = bibrec_list[0] 241 new_name = generate_new_name (bibinfo, options) 242 new_name = postprocess_name (new_name, options) 243 logging.info ('~ new name %s.' % new_name) 244 newpath = path.join (fdir, new_name + ext) 245 logging.info ('~ new path %s.' % newpath) 246 rename_file = not (options.dryrun) 247 if (path.exists (newpath)): 248 logging.info ('~ path already exists') 249 if not options.overwrite: 250 rename_file = False 251 if (rename_file): 252 logging.info ('~ renaming file') 253 rename (fpath, newpath) 254 else: 255 logging.info ('- no records returned') 256 except errors.QueryError, err: 257 logging.info ('- query failed: %s.' % err) 258 else: 259 print logging.info ('- no isbn extracted') 260 261 except BaseException, err: 262 if (_DEV_MODE or options.debug): 263 raise 264 else: 265 sys.exit (err) 266 except: 267 if (_DEV_MODE or option.debug): 268 raise 269 else: 270 sys.exit ("An unknown error occurred.")
271 272 273 ### TEST & DEBUG ### 274 275 ### MAIN ### 276 277 if __name__ == '__main__': 278 main() 279 280 281 ### END ###################################################################### 282