Package checkm :: Module checkm
[hide private]
[frames] | no frames]

Source Code for Module checkm.checkm

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """Checkm class library docs TODO 
  5   
  6   
  7                  [@]SourceFileOrURL  Alg     Digest  Length   ModTime   TargetFileOrURL 
  8  TOKEN NUMBER:    1                  2       3       4        5         6 
  9   
 10  """ 
 11   
 12  import os, sys 
 13  from stat import * 
 14   
 15  from collections import defaultdict 
 16   
 17  import hashlib 
 18   
 19  import codecs 
 20   
 21  import logging 
 22   
 23  logging.basicConfig(level=logging.INFO) 
 24   
 25  logger = logging.getLogger('checkm') 
 26   
27 -class DirectoryNotFound(Exception):
28 """The directory was not found, or is not accessible."""
29 - def __init__(self, *arg, **kw):
30 self.context = (arg, kw)
31 - def __repr__(self):
32 return self.context.__str__()
33
34 -class CheckmReporter(object):
35 COLUMN_NAMES = [u'# [@]SourceFileOrURL',u'Alg',u'Digest',u'Length',u'ModTime']
36 - def __init__(self):
37 self.scanner = CheckmScanner()
38
39 - def _get_max_len(self, report):
40 cols = defaultdict(lambda : 0) 41 for line in report: 42 for index in xrange(len(line)): 43 if len(line[index])>cols[index]: 44 cols[index] = len(line[index]) 45 return cols
46
47 - def _space_line(self, line, col_maxes):
48 spaced_line = [] 49 for index in xrange(len(line)): 50 spaced_line.append(line[index]) 51 spaces = col_maxes[index]-len(line[index])+4 52 spaced_line.append(u" "*spaces) 53 return u"".join(spaced_line)
54
55 - def create_checkm_file(self, scan_directory, algorithm, checkm_filename, recursive=False, columns=3):
56 logger.info("Creating checkm file(%s) for dir(%s) with Alg:%s and columns: %s" % (checkm_filename, 57 scan_directory, 58 algorithm, columns)) 59 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=columns) 60 col_maxes = self._get_max_len(report) 61 with codecs.open(checkm_filename, encoding='utf-8', mode="w") as output: 62 output.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes))) 63 for line in report: 64 output.write("%s\n" % (self._space_line(line, col_maxes))) 65 output.write("\n")
66
67 -class CheckmScanner(object):
68 HASHTYPES = ['md5', 'sha1', 'sha224','sha256','sha384','sha512']
69 - def scan_local(self, directory_path, algorithm, columns=3):
70 report = [] 71 for item in os.listdir(directory_path): 72 item_path = os.path.join(directory_path, item) 73 report.append(self.scan_path(item_path, algorithm, columns)) 74 return report
75
76 - def scan_tree(self, directory_path, algorithm, columns):
77 report = [] 78 if os.path.exists(directory_path): 79 for (dirpath, dirnames, filenames) in os.walk(directory_path): 80 for item_path in [os.path.join(dirpath, x) for x in dirnames+filenames]: 81 report.append(self.scan_path(item_path, algorithm, columns)) 82 return report 83 else: 84 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive)
85
86 - def scan_path(self, item_path, algorithm, columns):
87 if columns<3 or not isinstance(columns, int): 88 columns = 3 89 try: 90 line = [] 91 # col 1 92 line.append(unicode(item_path)) 93 # col 2 94 line.append(unicode(algorithm)) 95 # col 3 96 if os.path.isdir(item_path): 97 line.append(u'd') 98 else: 99 # No need to catch the ValueError from 100 hash_gen = getattr(hashlib, algorithm)() 101 with open(item_path, 'rb') as fh: 102 logger.info("Checking %s with algorithm %s" % (item_path, algorithm)) 103 chunk = fh.read(1024*8) 104 while chunk: 105 hash_gen.update(chunk) 106 chunk= fh.read(1024*8) 107 line.append(unicode(hash_gen.hexdigest())) 108 if columns>3: 109 # col4 - Length 110 line.append(unicode(os.stat(item_path)[ST_SIZE])) 111 if columns>4: 112 # col 5 - ModTime 113 line.append(unicode(os.stat(item_path)[ST_MTIME])) 114 return line 115 except OSError: 116 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive) 117 except AttributeError: 118 raise ValueError("This tool cannot perform hashtype %s" % algorithm)
119
120 - def scan_directory(self, directory_path, algorithm, recursive=False, columns=3):
121 if os.path.exists(directory_path): 122 if recursive: 123 return self.scan_tree(directory_path, algorithm, columns) 124 return self.scan_local(directory_path, algorithm, columns) 125 else: 126 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive)
127