Module polib
[hide private]
[frames] | no frames]

Source Code for Module polib

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # License: MIT (see LICENSE file provided) 
   5  # vim600: fdm=marker tabstop=4 shiftwidth=4 expandtab ai 
   6   
   7  # Description {{{ 
   8  """ 
   9  **polib** allows you to manipulate, create, modify gettext files (pot, po 
  10  and mo files).  You can load existing files, iterate through it's entries, 
  11  add, modify entries, comments or metadata, etc... or create new po files 
  12  from scratch. 
  13   
  14  **polib** provides a simple and pythonic API, exporting only three 
  15  convenience functions (*pofile*, *mofile* and *detect_encoding*), and the 
  16  four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating 
  17  new files/entries. 
  18   
  19  **Basic example**: 
  20   
  21  >>> import polib 
  22  >>> # load an existing po file 
  23  >>> po = polib.pofile('tests/test_utf8.po') 
  24  >>> for entry in po: 
  25  ...     # do something with entry... 
  26  ...     pass 
  27  >>> # add an entry 
  28  >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') 
  29  >>> entry.occurences = [('welcome.py', '12'), ('anotherfile.py', '34')] 
  30  >>> po.append(entry) 
  31  >>> # to save our modified po file: 
  32  >>> # po.save() 
  33  >>> # or you may want to compile the po file 
  34  >>> # po.save_as_mofile('tests/test_utf8.mo') 
  35  """ 
  36  # }}} 
  37   
  38  __author__    = 'David JEAN LOUIS <izimobil@gmail.com>' 
  39  __version__   = '0.3.0' 
  40   
  41   
  42  # dependencies {{{ 
  43  try: 
  44      import struct 
  45      import textwrap 
  46      import warnings 
  47  except ImportError, exc: 
  48      raise ImportError('polib requires python 2.3 or later with the standard' \ 
  49          ' modules "struct", "textwrap" and "warnings" (details: %s)' % exc) 
  50  # }}} 
  51   
  52  __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 
  53             'detect_encoding'] 
  54   
  55  # shortcuts for performance improvement {{{ 
  56  # yes, yes, this is quite ugly but *very* efficient 
  57  _dictget    = dict.get 
  58  _listappend = list.append 
  59  _listpop    = list.pop 
  60  _strjoin    = str.join 
  61  _strsplit   = str.split 
  62  _strstrip   = str.strip 
  63  _strreplace = str.replace 
  64  _textwrap   = textwrap.wrap 
  65  # }}} 
  66   
  67  encoding = 'utf-8' 
  68   
69 -def pofile(fpath, wrapwidth=78, autodetect_encoding=True):
70 """ 71 Convenience function that parse the po/pot file *fpath* and return 72 a POFile instance. 73 74 **Keyword arguments**: 75 - *fpath*: string, full or relative path to the po/pot file to parse 76 - *wrapwidth*: integer, the wrap width, only useful when -w option was 77 passed to xgettext, default to 78 (optional) 78 - *autodetect_encoding*: boolean, if set to False the function will 79 not try to detect the po file encoding 80 81 **Example**: 82 83 >>> import polib 84 >>> po = polib.pofile('tests/test_utf8.po') 85 >>> po #doctest: +ELLIPSIS 86 <POFile instance at ...> 87 """ 88 # pofile {{{ 89 if autodetect_encoding == True: 90 global encoding 91 encoding = detect_encoding(fpath) 92 parser = _POFileParser(fpath) 93 instance = parser.parse() 94 instance.wrapwidth = wrapwidth 95 return instance
96 # }}} 97 98
99 -def mofile(fpath, wrapwidth=78, autodetect_encoding=True):
100 """ 101 Convenience function that parse the mo file *fpath* and return 102 a MOFile instance. 103 104 **Keyword arguments**: 105 - *fpath*: string, full or relative path to the mo file to parse 106 - *wrapwidth*: integer, the wrap width, only useful when -w option was 107 passed to xgettext to generate the po file that was used to format 108 the mo file, default to 78 (optional) 109 - *autodetect_encoding*: boolean, if set to False the function will 110 not try to detect the po file encoding 111 112 **Example**: 113 114 >>> import polib 115 >>> mo = polib.mofile('tests/test_utf8.mo') 116 >>> mo #doctest: +ELLIPSIS 117 <MOFile instance at ...> 118 """ 119 # mofile {{{ 120 if autodetect_encoding == True: 121 global encoding 122 encoding = detect_encoding(fpath) 123 parser = _MOFileParser(fpath) 124 instance = parser.parse() 125 instance.wrapwidth = wrapwidth 126 return instance
127 # }}} 128 129
130 -def detect_encoding(fpath):
131 """ 132 Try to detect the encoding used by the file *fpath*. The function will 133 return polib default *encoding* if it's unable to detect it. 134 135 **Keyword argument**: 136 - *fpath*: string, full or relative path to the mo file to parse. 137 138 **Examples**: 139 140 >>> print detect_encoding('tests/test_noencoding.po') 141 utf-8 142 >>> print detect_encoding('tests/test_utf8.po') 143 UTF-8 144 >>> print detect_encoding('tests/test_utf8.mo') 145 UTF-8 146 >>> print detect_encoding('tests/test_iso-8859-15.po') 147 ISO_8859-15 148 >>> print detect_encoding('tests/test_iso-8859-15.mo') 149 ISO_8859-15 150 """ 151 # detect_encoding {{{ 152 import re 153 global encoding 154 e = encoding 155 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') 156 f = open(fpath) 157 for l in f: 158 match = rx.search(l) 159 if match: 160 e = _strstrip(match.group(1)) 161 break 162 f.close() 163 return e
164 # }}} 165 166
167 -class _BaseFile(list):
168 """ 169 Common parent class for POFile and MOFile classes. 170 This class must **not** be instanciated directly. 171 """ 172 # class _BaseFile {{{ 173 174
175 - def __init__(self, fpath=None, wrapwidth=78):
176 """ 177 Constructor. 178 179 **Keyword arguments**: 180 - *fpath*: string, path to po or mo file 181 - *wrapwidth*: integer, the wrap width, only useful when -w option 182 was passed to xgettext to generate the po file that was used to 183 format the mo file, default to 78 (optional). 184 """ 185 list.__init__(self) 186 # the opened file handle 187 self.fpath = fpath 188 # the width at which lines should be wrapped 189 self.wrapwidth = wrapwidth 190 # header 191 self.header = '' 192 # both po and mo files have metadata 193 self.metadata = {} 194 self.metadata_is_fuzzy = 0
195
196 - def __str__(self):
197 """String representation of the file.""" 198 ret = [] 199 entries = [self.metadata_as_entry()] + self 200 for entry in entries: 201 _listappend(ret, entry.__str__(self.wrapwidth)) 202 return _strjoin('\n', ret)
203
204 - def __repr__(self):
205 """Return the official string representation of the object.""" 206 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
207
208 - def metadata_as_entry(self):
209 """Return the metadata as an entry""" 210 e = POEntry(msgid='') 211 mdata = self.ordered_metadata() 212 if mdata: 213 strs = [''] 214 for name, value in mdata: 215 values = _strsplit(value, '\n') 216 for i, value in enumerate(values): # handle multiline metadata 217 if i == 0: 218 _listappend(strs, '%s: %s' % (name, _strstrip(value))) 219 else: 220 _listappend(strs, '%s' % _strstrip(value)) 221 e.msgstr = _strjoin('\n', strs) 222 return e
223
224 - def save(self, fpath=None, repr_method='__str__'):
225 """ 226 Save the po file to file *fpath* if no file handle exists for 227 the object. If there's already an open file and no fpath is 228 provided, then the existing file is rewritten with the modified 229 data. 230 231 **Keyword arguments**: 232 - *fpath*: string, full or relative path to the file. 233 - *repr_method*: string, the method to use for output. 234 """ 235 if self.fpath is None and fpath is None: 236 raise IOError('You must provide a file path to save() method') 237 contents = getattr(self, repr_method)() 238 if fpath is None: 239 fpath = self.fpath 240 fhandle = open(fpath, 'w') 241 fhandle.write(contents) 242 fhandle.close()
243
244 - def ordered_metadata(self):
245 """ 246 Convenience method that return the metadata ordered. The return 247 value is list of tuples (metadata name, metadata_value). 248 """ 249 # copy the dict first 250 metadata = self.metadata.copy() 251 data_order = [ 252 'Project-Id-Version', 253 'Report-Msgid-Bugs-To', 254 'POT-Creation-Date', 255 'PO-Revision-Date', 256 'Last-Translator', 257 'Language-Team', 258 'MIME-Version', 259 'Content-Type', 260 'Content-Transfer-Encoding' 261 ] 262 ordered_data = [] 263 for data in data_order: 264 try: 265 value = metadata.pop(data) 266 _listappend(ordered_data, (data, value)) 267 except KeyError: 268 pass 269 # the rest of the metadata won't be ordered there are no specs for this 270 keys = metadata.keys() 271 keys.sort() 272 for data in keys: 273 value = metadata[data] 274 _listappend(ordered_data, (data, value)) 275 return ordered_data
276
277 - def charset(self):
278 """ 279 Return the file encoding charset. 280 If the charset cannot be found in metadata, the function returns 281 None. 282 """ 283 try: 284 charset = _strsplit(self.metadata['Content-Type'], 'charset=')[1] 285 charset = _strstrip(charset) 286 except LookupError: 287 charset = None 288 if charset == '': 289 charset = None 290 return charset
291
292 - def to_binary(self):
293 """Return the mofile binary representation.""" 294 import struct 295 import array 296 output = '' 297 offsets = [] 298 ids = strs = '' 299 entries = self.translated_entries() 300 # the keys are sorted in the .mo file 301 def cmp(_self, other): 302 if _self.msgid > other.msgid: 303 return 1 304 elif _self.msgid < other.msgid: 305 return -1 306 else: 307 return 0
308 entries.sort(cmp) 309 # add metadata entry 310 mentry = self.metadata_as_entry() 311 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n' 312 entries = [mentry] + entries 313 entries_len = len(entries) 314 for e in entries: 315 # For each string, we need size and file offset. Each string is NUL 316 # terminated; the NUL does not count into the size. 317 offsets.append((len(ids), len(e.msgid), len(strs), len(e.msgstr))) 318 ids += e._decode(e.msgid) + '\0' 319 strs += e._decode(e.msgstr) + '\0' 320 # The header is 7 32-bit unsigned integers. 321 keystart = 7*4+16*entries_len 322 # and the values start after the keys 323 valuestart = keystart + len(ids) 324 koffsets = [] 325 voffsets = [] 326 # The string table first has the list of keys, then the list of values. 327 # Each entry has first the size of the string, then the file offset. 328 for o1, l1, o2, l2 in offsets: 329 koffsets += [l1, o1+keystart] 330 voffsets += [l2, o2+valuestart] 331 offsets = koffsets + voffsets 332 output = struct.pack("Iiiiiii", 333 0x950412de, # Magic number 334 0, # Version 335 entries_len, # # of entries 336 7*4, # start of key index 337 7*4+entries_len*8, # start of value index 338 0, 0) # size and offset of hash table 339 output += array.array("i", offsets).tostring() 340 output += ids 341 output += strs 342 return output
343 # }}} 344 345
346 -class POFile(_BaseFile):
347 ''' 348 Po (or Pot) file reader/writer. 349 POFile objects inherit the list objects methods. 350 351 **Example**: 352 353 >>> po = POFile() 354 >>> entry1 = POEntry( 355 ... msgid="Some english text", 356 ... msgstr="Un texte en anglais" 357 ... ) 358 >>> entry1.occurences = [('testfile', 12),('another_file', 1)] 359 >>> entry1.comment = "Some useful comment" 360 >>> entry2 = POEntry( 361 ... msgid="I need my dirty cheese", 362 ... msgstr="Je veux mon sale fromage" 363 ... ) 364 >>> entry2.occurences = [('testfile', 15),('another_file', 5)] 365 >>> entry2.comment = "Another useful comment" 366 >>> po.append(entry1) 367 >>> po.append(entry2) 368 >>> po.header = "Some Header" 369 >>> print po 370 # Some Header 371 msgid "" 372 msgstr "" 373 <BLANKLINE> 374 #. Some useful comment 375 #: testfile:12 another_file:1 376 msgid "Some english text" 377 msgstr "Un texte en anglais" 378 <BLANKLINE> 379 #. Another useful comment 380 #: testfile:15 another_file:5 381 msgid "I need my dirty cheese" 382 msgstr "Je veux mon sale fromage" 383 <BLANKLINE> 384 ''' 385 # class POFile {{{ 386
387 - def __str__(self):
388 """Return the string representation of the po file""" 389 ret, headers = '', _strsplit(self.header, '\n') 390 for header in headers: 391 if header[:1] in [',', ':']: 392 ret += '#%s\n' % header 393 else: 394 ret += '# %s\n' % header 395 return ret + _BaseFile.__str__(self)
396
397 - def save_as_mofile(self, fpath):
398 """ 399 Save the binary representation of the file to *fpath*. 400 401 **Keyword arguments**: 402 - *fpath*: string, full or relative path to the file. 403 """ 404 _BaseFile.save(self, fpath, 'to_binary')
405
406 - def percent_translated(self):
407 """ 408 Convenience method that return the percentage of translated 409 messages. 410 411 **Example**: 412 413 >>> import polib 414 >>> po = polib.pofile('tests/test_pofile_helpers.po') 415 >>> po.percent_translated() 416 50 417 """ 418 total = len([e for e in self if not e.obsolete]) 419 translated = len(self.translated_entries()) 420 return int((100.00 / float(total)) * translated)
421
422 - def translated_entries(self):
423 """ 424 Convenience method that return a list of translated entries. 425 426 **Example**: 427 428 >>> import polib 429 >>> po = polib.pofile('tests/test_pofile_helpers.po') 430 >>> len(po.translated_entries()) 431 5 432 """ 433 return [e for e in self if e.translated() and not e.obsolete]
434
435 - def untranslated_entries(self):
436 """ 437 Convenience method that return a list of untranslated entries. 438 439 **Example**: 440 441 >>> import polib 442 >>> po = polib.pofile('tests/test_pofile_helpers.po') 443 >>> len(po.untranslated_entries()) 444 5 445 """ 446 return [e for e in self if not e.translated() and not e.obsolete]
447
448 - def fuzzy_entries(self):
449 """ 450 Convenience method that return the list of 'fuzzy' entries. 451 452 **Example**: 453 454 >>> import polib 455 >>> po = polib.pofile('tests/test_pofile_helpers.po') 456 >>> len(po.fuzzy_entries()) 457 2 458 """ 459 return [e for e in self if 'fuzzy' in e.flags]
460
461 - def obsolete_entries(self):
462 """ 463 Convenience method that return the list of obsolete entries. 464 465 **Example**: 466 467 >>> import polib 468 >>> po = polib.pofile('tests/test_pofile_helpers.po') 469 >>> len(po.obsolete_entries()) 470 4 471 """ 472 return [e for e in self if e.obsolete]
473 # }}} 474 475
476 -class MOFile(_BaseFile):
477 ''' 478 Mo file reader/writer. 479 MOFile objects inherit the list objects methods. 480 481 **Example**: 482 483 >>> mo = MOFile() 484 >>> entry1 = POEntry( 485 ... msgid="Some english text", 486 ... msgstr="Un texte en anglais" 487 ... ) 488 >>> entry2 = POEntry( 489 ... msgid="I need my dirty cheese", 490 ... msgstr="Je veux mon sale fromage" 491 ... ) 492 >>> mo.append(entry1) 493 >>> mo.append(entry2) 494 >>> print mo 495 msgid "" 496 msgstr "" 497 <BLANKLINE> 498 msgid "Some english text" 499 msgstr "Un texte en anglais" 500 <BLANKLINE> 501 msgid "I need my dirty cheese" 502 msgstr "Je veux mon sale fromage" 503 <BLANKLINE> 504 ''' 505 # class MOFile {{{ 506
507 - def __init__(self, fpath=None, wrapwidth=78):
508 """ 509 MOFile constructor. 510 See _BaseFile.__construct. 511 """ 512 _BaseFile.__init__(self, fpath, wrapwidth) 513 self.magic_number = None 514 self.version = 0
515
516 - def save_as_pofile(self, fpath):
517 """ 518 Save the string representation of the file to *fpath*. 519 520 **Keyword argument**: 521 - *fpath*: string, full or relative path to the file. 522 """ 523 _BaseFile.save(self, fpath)
524
525 - def save(self, fpath):
526 """ 527 Save the binary representation of the file to *fpath*. 528 529 **Keyword argument**: 530 - *fpath*: string, full or relative path to the file. 531 """ 532 _BaseFile.save(self, fpath, 'to_binary')
533 # }}} 534 535
536 -class _BaseEntry:
537 """ 538 Base class for POEntry or MOEntry objects. 539 This class must *not* be instanciated directly. 540 """ 541 # class _BaseEntry {{{ 542
543 - def __init__(self, *args, **kwargs):
544 """Base Entry constructor.""" 545 # compat with older versions of polib 546 try: 547 self.msgid = args[0] 548 warnings.warn('passing msgid as non keyword argument is ' \ 549 'deprecated and will raise an error in version 0.4, pass ' \ 550 'it as a keyword argument instead.', DeprecationWarning, 2) 551 except: 552 self.msgid = _dictget(kwargs, 'msgid', '') 553 try: 554 self.msgstr = args[1] 555 warnings.warn('passing msgstr as non keyword argument is ' \ 556 'deprecated and will raise an error in version 0.4, pass ' \ 557 'it as a keyword argument instead.', DeprecationWarning, 2) 558 except: 559 self.msgstr = _dictget(kwargs, 'msgstr', '') 560 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '') 561 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {}) 562 self.obsolete = _dictget(kwargs, 'obsolete', False)
563
564 - def __repr__(self):
565 """Return the official string representation of the object.""" 566 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
567
568 - def __str__(self, wrapwidth=78):
569 """ 570 Common string representation of the POEntry and MOEntry 571 objects. 572 """ 573 if self.obsolete: 574 delflag = '#~ ' 575 else: 576 delflag = '' 577 # write the msgid 578 ret = [] 579 ret += self._str_field("msgid", delflag, "", self.msgid) 580 # write the msgid_plural if any 581 if self.msgid_plural: 582 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) 583 if self.msgstr_plural: 584 msgstrs = self.msgstr_plural 585 else: 586 msgstrs = {0:self.msgstr} 587 keys = msgstrs.keys() 588 keys.sort() 589 for index in keys: 590 msgstr = msgstrs[index] 591 plural_index = '' 592 if self.msgstr_plural: 593 plural_index = '[%s]' % index 594 ret += self._str_field("msgstr", delflag, plural_index, msgstr) 595 _listappend(ret, '') 596 return _strjoin('\n', ret)
597
598 - def _str_field(self, fieldname, delflag, plural_index, field):
599 lines = _strsplit(self._decode(field), '\n') 600 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,\ 601 _listpop(lines, 0))] 602 if lines: 603 for mstr in lines: 604 _listappend(ret, '%s"%s"' % (delflag, mstr)) 605 return ret
606
607 - def _decode(self, st):
608 if isinstance(st, unicode): 609 return st.encode(encoding) 610 return st
611 612 # }}} 613 614
615 -class POEntry(_BaseEntry):
616 """ 617 Represents a po file entry. 618 619 **Examples**: 620 621 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') 622 >>> entry.occurences = [('welcome.py', 12), ('anotherfile.py', 34)] 623 >>> print entry 624 #: welcome.py:12 anotherfile.py:34 625 msgid "Welcome" 626 msgstr "Bienvenue" 627 <BLANKLINE> 628 >>> entry = POEntry() 629 >>> entry.occurences = [('src/spam.c', 32), ('src/eggs.c', 45)] 630 >>> entry.tcomment = 'A plural translation' 631 >>> entry.flags.append('c-format') 632 >>> entry.msgid = 'I have spam but no egg !' 633 >>> entry.msgid_plural = 'I have spam and %d eggs !' 634 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" 635 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" 636 >>> print entry 637 # A plural translation 638 #: src/spam.c:32 src/eggs.c:45 639 #, c-format 640 msgid "I have spam but no egg !" 641 msgid_plural "I have spam and %d eggs !" 642 msgstr[0] "J'ai du jambon mais aucun oeuf !" 643 msgstr[1] "J'ai du jambon et %d oeufs !" 644 <BLANKLINE> 645 """ 646 # class POEntry {{{ 647
648 - def __init__(self, *args, **kwargs):
649 """POEntry constructor.""" 650 _BaseEntry.__init__(self, *args, **kwargs) 651 self.comment = _dictget(kwargs, 'comment', '') 652 self.tcomment = _dictget(kwargs, 'tcomment', '') 653 self.occurences = _dictget(kwargs, 'occurences', []) 654 self.flags = _dictget(kwargs, 'flags', [])
655
656 - def __str__(self, wrapwidth=78):
657 """ 658 Return the string representation of the entry. 659 """ 660 ret = [] 661 # comment first, if any (with text wrapping as xgettext does) 662 if self.comment != '': 663 comments = _strsplit(self._decode(self.comment), '\n') 664 for comment in comments: 665 if wrapwidth > 0 and len(comment) > wrapwidth-3: 666 lines = _textwrap(comment, wrapwidth, 667 initial_indent='#. ', 668 subsequent_indent='#. ', 669 break_long_words=False) 670 _listappend(ret, lines) 671 else: 672 _listappend(ret, '#. %s' % comment) 673 # translator comment, if any (with text wrapping as xgettext does) 674 if self.tcomment != '': 675 tcomments = _strsplit(self._decode(self.tcomment), '\n') 676 for tcomment in tcomments: 677 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: 678 lines = _textwrap(tcomment, wrapwidth, 679 initial_indent='# ', 680 subsequent_indent='# ', 681 break_long_words=False) 682 _listappend(ret, lines) 683 else: 684 _listappend(ret, '# %s' % tcomment) 685 # occurences (with text wrapping as xgettext does) 686 if self.occurences: 687 filelist = [] 688 for fpath, lineno in self.occurences: 689 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno)) 690 filestr = _strjoin(' ', filelist) 691 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: 692 # XXX textwrap split words that contain hyphen, this is not 693 # what we want for filenames, so the dirty hack is to 694 # temporally replace hyphens with a char that a file cannot 695 # contain, like "*" 696 lines = _strreplace(filestr, '-', '*') 697 lines = _textwrap(filestr, wrapwidth, 698 initial_indent='#: ', 699 subsequent_indent='#: ', 700 break_long_words=False) 701 # end of the replace hack 702 for line in lines: 703 _listappend(ret, _strreplace(line, '*', '-')) 704 else: 705 _listappend(ret, '#: '+filestr) 706 # flags 707 if self.flags: 708 flags = [] 709 for flag in self.flags: 710 _listappend(flags, flag) 711 _listappend(ret, '#, %s' % _strjoin(', ', flags)) 712 _listappend(ret, _BaseEntry.__str__(self)) 713 return _strjoin('\n', ret)
714
715 - def translated(self):
716 """Return True if the entry has been translated or False""" 717 return ((self.msgstr != '' or self.msgstr_plural) and \ 718 (not self.obsolete and 'fuzzy' not in self.flags))
719 # }}} 720 721
722 -class MOEntry(_BaseEntry):
723 """ 724 Represents a mo file entry. 725 726 **Examples**: 727 728 >>> entry = MOEntry() 729 >>> entry.msgid = 'translate me !' 730 >>> entry.msgstr = 'traduisez moi !' 731 >>> print entry 732 msgid "translate me !" 733 msgstr "traduisez moi !" 734 <BLANKLINE> 735 """ 736 # class MOEntry {{{ 737
738 - def __str__(self, wrapwidth=78):
739 """ 740 Return the string representation of the entry. 741 """ 742 return _BaseEntry.__str__(self, wrapwidth)
743 # }}} 744 745
746 -class _POFileParser:
747 """ 748 A finite state machine to parse efficiently and correctly po 749 file format. 750 """ 751 # class _POFileParser {{{
752 - def __init__(self, fpath):
753 """ 754 Constructor. 755 756 **Keyword argument**: 757 - *fpath*: string, path to the po file 758 """ 759 self.fhandle = open(fpath, 'r+') 760 self.instance = POFile(fpath=fpath) 761 self.transitions = {} 762 self.current_entry = POEntry() 763 self.current_state = 'ST' 764 self.current_token = None 765 # two memo flags used in handlers 766 self.msgstr_index = 0 767 self.entry_obsolete = 0 768 # Configure the state machine, by adding transitions. 769 # Signification of symbols: 770 # * ST: Beginning of the file (start) 771 # * HE: Header 772 # * TC: a translation comment 773 # * GC: a generated comment 774 # * OC: a file/line occurence 775 # * FL: a flags line 776 # * MI: a msgid 777 # * MP: a msgid plural 778 # * MS: a msgstr 779 # * MX: a msgstr plural 780 # * MC: a msgid or msgstr continuation line 781 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'] 782 783 self.add('TC', ['ST', 'HE'], 'HE') 784 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC') 785 self.add('GC', all_, 'GC') 786 self.add('OC', all_, 'OC') 787 self.add('FL', all_, 'FL') 788 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI') 789 self.add('MP', ['TC', 'GC', 'MI'], 'MP') 790 self.add('MS', ['MI', 'MP', 'TC'], 'MS') 791 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') 792 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
793
794 - def parse(self):
795 """ 796 Run the state machine, parse the file line by line and call process() 797 with the current matched symbol. 798 """ 799 i, lastlen = 1, 0 800 for line in self.fhandle: 801 line = _strstrip(line) 802 if line == '': 803 i = i+1 804 continue 805 if line[:3] == '#~ ': 806 line = line[3:] 807 self.entry_obsolete = 1 808 else: 809 self.entry_obsolete = 0 810 self.current_token = line 811 if line[:2] == '#:': 812 # we are on a occurences line 813 self.process('OC', i) 814 elif line[:7] == 'msgid "': 815 # we are on a msgid 816 self.process('MI', i) 817 elif line[:8] == 'msgstr "': 818 # we are on a msgstr 819 self.process('MS', i) 820 elif line[:1] == '"': 821 # we are on a continuation line or some metadata 822 self.process('MC', i) 823 elif line[:14] == 'msgid_plural "': 824 # we are on a msgid plural 825 self.process('MP', i) 826 elif line[:7] == 'msgstr[': 827 # we are on a msgstr plural 828 self.process('MX', i) 829 elif line[:3] == '#, ': 830 # we are on a flags line 831 self.process('FL', i) 832 elif line[:2] == '# ' or line == '#': 833 if line == '#': line = line + ' ' 834 # we are on a translator comment line 835 self.process('TC', i) 836 elif line[:2] == '#.': 837 # we are on a generated comment line 838 self.process('GC', i) 839 i = i+1 840 841 if self.current_entry: 842 # since entries are added when another entry is found, we must add 843 # the last entry here (only if there are lines) 844 _listappend(self.instance, self.current_entry) 845 # before returning the instance, check if there's metadata and if 846 # so extract it in a dict 847 firstentry = self.instance[0] 848 if firstentry.msgid == '': # metadata found 849 # remove the entry 850 firstentry = _listpop(self.instance, 0) 851 self.instance.metadata_is_fuzzy = firstentry.flags 852 multiline_metadata = 0 853 for msg in _strsplit(firstentry.msgstr, '\n'): 854 if msg != '': 855 if multiline_metadata: 856 self.instance.metadata[key] += '\n' + msg 857 else: 858 try: 859 key, val = _strsplit(msg, ':', 1) 860 self.instance.metadata[key] = val 861 except: 862 pass 863 multiline_metadata = not msg.endswith('\\n') 864 # close opened file 865 self.fhandle.close() 866 return self.instance
867
868 - def add(self, symbol, states, next_state):
869 """ 870 Add a transition to the state machine. 871 Keywords arguments: 872 873 symbol -- string, the matched token (two chars symbol) 874 states -- list, a list of states (two chars symbols) 875 next_state -- the next state the fsm will have after the action 876 """ 877 for state in states: 878 action = getattr(self, 'handle_%s' % next_state.lower()) 879 self.transitions[(symbol, state)] = (action, next_state)
880
881 - def process(self, symbol, linenum):
882 """ 883 Process the transition corresponding to the current state and the 884 symbol provided. 885 886 Keywords arguments: 887 symbol -- string, the matched token (two chars symbol) 888 linenum -- integer, the current line number of the parsed file 889 """ 890 try: 891 (action, state) = self.transitions[(symbol, self.current_state)] 892 if action(): 893 self.current_state = state 894 except Exception, e: 895 raise IOError('Syntax error in po file (line %s): %s' % \ 896 (linenum, exc))
897 898 # state handlers 899
900 - def handle_he(self):
901 """Handle a header comment.""" 902 if self.instance.header != '': 903 self.instance.header += '\n' 904 self.instance.header += self.current_token[2:] 905 return 1
906
907 - def handle_tc(self):
908 """Handle a translator comment.""" 909 if self.current_state in ['MC', 'MS', 'MX']: 910 _listappend(self.instance, self.current_entry) 911 self.current_entry = POEntry() 912 if self.current_entry.tcomment != '': 913 self.current_entry.tcomment += '\n' 914 self.current_entry.tcomment += self.current_token[2:] 915 return True
916
917 - def handle_gc(self):
918 """Handle a generated comment.""" 919 if self.current_state in ['MC', 'MS', 'MX']: 920 _listappend(self.instance, self.current_entry) 921 self.current_entry = POEntry() 922 if self.current_entry.comment != '': 923 self.current_entry.comment += '\n' 924 self.current_entry.comment += self.current_token[3:] 925 return True
926
927 - def handle_oc(self):
928 """Handle a file:num occurence.""" 929 if self.current_state in ['MC', 'MS', 'MX']: 930 _listappend(self.instance, self.current_entry) 931 self.current_entry = POEntry() 932 occurences = _strsplit(self.current_token[3:]) 933 for occurence in occurences: 934 if occurence != '': 935 fil, line = _strsplit(occurence, ':') 936 _listappend(self.current_entry.occurences, (fil, line)) 937 return True
938
939 - def handle_fl(self):
940 """Handle a flags line.""" 941 if self.current_state in ['MC', 'MS', 'MX']: 942 _listappend(self.instance, self.current_entry) 943 self.current_entry = POEntry() 944 self.current_entry.flags += _strsplit(self.current_token[3:], ', ') 945 return True
946
947 - def handle_mi(self):
948 """Handle a msgid.""" 949 if self.current_state in ['MC', 'MS', 'MX']: 950 _listappend(self.instance, self.current_entry) 951 self.current_entry = POEntry() 952 self.current_entry.obsolete = self.entry_obsolete 953 self.current_entry.msgid = self.current_token[7:-1] 954 return True
955
956 - def handle_mp(self):
957 """Handle a msgid plural.""" 958 self.current_entry.msgid_plural = self.current_token[14:-1] 959 return True
960
961 - def handle_ms(self):
962 """Handle a msgstr.""" 963 self.current_entry.msgstr = self.current_token[8:-1] 964 return True
965
966 - def handle_mx(self):
967 """Handle a msgstr plural.""" 968 index, value = self.current_token[7], self.current_token[11:-1] 969 self.current_entry.msgstr_plural[index] = value 970 self.msgstr_index = index 971 return True
972
973 - def handle_mc(self):
974 """Handle a msgid or msgstr continuation line.""" 975 if self.current_state == 'MI': 976 self.current_entry.msgid += '\n' + self.current_token[1:-1] 977 elif self.current_state == 'MP': 978 self.current_entry.msgid_plural += '\n' + self.current_token[1:-1] 979 elif self.current_state == 'MS': 980 self.current_entry.msgstr += '\n' + self.current_token[1:-1] 981 elif self.current_state == 'MX': 982 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\ 983 '\n' + self.current_token[1:-1] 984 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr 985 # don't change the current state 986 return False
987 # }}} 988 989
990 -class _MOFileParser:
991 """ 992 A class to parse binary mo files. 993 """ 994 # class _MOFileParser {{{
995 - def __init__(self, fpath):
996 """_MOFileParser constructor.""" 997 self.fhandle = open(fpath, 'r+b') 998 self.instance = MOFile(fpath)
999
1000 - def parse_magicnumber(self):
1001 """ 1002 Parse the magic number and raise an exception if not valid. 1003 """ 1004 magic_number = self._readbinary(fmt='4s') 1005 # magic number must be 0xde120495 or 0x950412de 1006 if magic_number not in ['\xde\x12\x04\x95', '\x95\x04\x12\xde']: 1007 raise IOError('Invalid mo file, magic number is incorrect !') 1008 self.instance.magic_number = magic_number
1009
1010 - def parse(self):
1011 """ 1012 Build the instance with the file handle provided in the 1013 constructor. 1014 """ 1015 self.parse_magicnumber() 1016 # parse the version number 1017 self.instance.version = self._readbinary('L') 1018 # parse the number of strings 1019 numofstrings = self._readbinary('L') 1020 # original strings hash table offset 1021 msgids_hash_offset = self._readbinary('L') 1022 # translation strings hash table offset 1023 msgstrs_hash_offset = self._readbinary('P') 1024 # move to msgid hash table and read length and offset of msgids 1025 self.fhandle.seek(msgids_hash_offset) 1026 msgids_index = [] 1027 for i in range(numofstrings): 1028 _listappend(msgids_index, self._readbinary('LL')) 1029 # move to msgstr hash table and read length and offset of msgstrs 1030 self.fhandle.seek(msgstrs_hash_offset) 1031 msgstrs_index = [] 1032 for i in range(numofstrings): 1033 _listappend(msgstrs_index, self._readbinary('LL')) 1034 # build entries 1035 for i in range(numofstrings): 1036 self.fhandle.seek(msgids_index[i][1]) 1037 msgid = self.fhandle.read(msgids_index[i][0]) 1038 self.fhandle.seek(msgstrs_index[i][1]) 1039 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1040 if i == 0: # metadata 1041 raw_metadata, metadata = _strsplit(msgstr, '\n'), {} 1042 for line in raw_metadata: 1043 tokens = _strsplit(line, ':', 1) 1044 if tokens[0] != '': 1045 try: 1046 metadata[tokens[0]] = _strstrip(tokens[1]) 1047 except IndexError: 1048 metadata[tokens[0]] = '' 1049 self.instance.metadata = metadata 1050 continue 1051 entry = MOEntry(msgid=msgid, msgstr=msgstr) 1052 _listappend(self.instance, entry) 1053 # close opened file 1054 self.fhandle.close() 1055 return self.instance
1056
1057 - def _readbinary(self, fmt='c'):
1058 """ 1059 Private method that unpack n bytes of data using format <fmt>. 1060 It returns a tuple or a mixed value if the tuple length is 1. 1061 """ 1062 numbytes = struct.calcsize(fmt) 1063 bytes = self.fhandle.read(numbytes) 1064 tup = struct.unpack(fmt, bytes) 1065 if len(tup) == 1: 1066 return tup[0] 1067 return tup
1068 # }}} 1069 1070 1071 if __name__ == '__main__': 1072 """ 1073 **Main function**:: 1074 - to **test** the module just run: *python polib.py [-v]* 1075 - to **profile** the module: *python polib.py -p <some_pofile.po>* 1076 """ 1077 # main function {{{ 1078 import sys 1079 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1080 - def test(f):
1081 p = pofile(f) 1082 s = str(p)
1083 import profile 1084 profile.run('test("'+sys.argv[2]+'")') 1085 else: 1086 import doctest 1087 doctest.testmod() 1088 # }}} 1089