1
2
3
4
5
6
7
8 """
9 **polib** allows you to manipulate, create, modify gettext files (pot, po
10 and mo files). You can load existing files, iterate through it's entries,
11 add, modify entries, comments or metadata, etc... or create new po files
12 from scratch.
13
14 **polib** provides a simple and pythonic API, exporting only three
15 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
16 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
17 new files/entries.
18
19 **Basic example**:
20
21 >>> import polib
22 >>> # load an existing po file
23 >>> po = polib.pofile('tests/test_utf8.po')
24 >>> for entry in po:
25 ... # do something with entry...
26 ... pass
27 >>> # add an entry
28 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
29 >>> entry.occurences = [('welcome.py', '12'), ('anotherfile.py', '34')]
30 >>> po.append(entry)
31 >>> # to save our modified po file:
32 >>> # po.save()
33 >>> # or you may want to compile the po file
34 >>> # po.save_as_mofile('tests/test_utf8.mo')
35 """
36
37
38 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
39 __version__ = '0.3.0'
40
41
42
43 try:
44 import struct
45 import textwrap
46 import warnings
47 except ImportError, exc:
48 raise ImportError('polib requires python 2.3 or later with the standard' \
49 ' modules "struct", "textwrap" and "warnings" (details: %s)' % exc)
50
51
52 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
53 'detect_encoding']
54
55
56
57 _dictget = dict.get
58 _listappend = list.append
59 _listpop = list.pop
60 _strjoin = str.join
61 _strsplit = str.split
62 _strstrip = str.strip
63 _strreplace = str.replace
64 _textwrap = textwrap.wrap
65
66
67 encoding = 'utf-8'
68
69 -def pofile(fpath, wrapwidth=78, autodetect_encoding=True):
70 """
71 Convenience function that parse the po/pot file *fpath* and return
72 a POFile instance.
73
74 **Keyword arguments**:
75 - *fpath*: string, full or relative path to the po/pot file to parse
76 - *wrapwidth*: integer, the wrap width, only useful when -w option was
77 passed to xgettext, default to 78 (optional)
78 - *autodetect_encoding*: boolean, if set to False the function will
79 not try to detect the po file encoding
80
81 **Example**:
82
83 >>> import polib
84 >>> po = polib.pofile('tests/test_utf8.po')
85 >>> po #doctest: +ELLIPSIS
86 <POFile instance at ...>
87 """
88
89 if autodetect_encoding == True:
90 global encoding
91 encoding = detect_encoding(fpath)
92 parser = _POFileParser(fpath)
93 instance = parser.parse()
94 instance.wrapwidth = wrapwidth
95 return instance
96
97
98
99 -def mofile(fpath, wrapwidth=78, autodetect_encoding=True):
100 """
101 Convenience function that parse the mo file *fpath* and return
102 a MOFile instance.
103
104 **Keyword arguments**:
105 - *fpath*: string, full or relative path to the mo file to parse
106 - *wrapwidth*: integer, the wrap width, only useful when -w option was
107 passed to xgettext to generate the po file that was used to format
108 the mo file, default to 78 (optional)
109 - *autodetect_encoding*: boolean, if set to False the function will
110 not try to detect the po file encoding
111
112 **Example**:
113
114 >>> import polib
115 >>> mo = polib.mofile('tests/test_utf8.mo')
116 >>> mo #doctest: +ELLIPSIS
117 <MOFile instance at ...>
118 """
119
120 if autodetect_encoding == True:
121 global encoding
122 encoding = detect_encoding(fpath)
123 parser = _MOFileParser(fpath)
124 instance = parser.parse()
125 instance.wrapwidth = wrapwidth
126 return instance
127
128
129
131 """
132 Try to detect the encoding used by the file *fpath*. The function will
133 return polib default *encoding* if it's unable to detect it.
134
135 **Keyword argument**:
136 - *fpath*: string, full or relative path to the mo file to parse.
137
138 **Examples**:
139
140 >>> print detect_encoding('tests/test_noencoding.po')
141 utf-8
142 >>> print detect_encoding('tests/test_utf8.po')
143 UTF-8
144 >>> print detect_encoding('tests/test_utf8.mo')
145 UTF-8
146 >>> print detect_encoding('tests/test_iso-8859-15.po')
147 ISO_8859-15
148 >>> print detect_encoding('tests/test_iso-8859-15.mo')
149 ISO_8859-15
150 """
151
152 import re
153 global encoding
154 e = encoding
155 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
156 f = open(fpath)
157 for l in f:
158 match = rx.search(l)
159 if match:
160 e = _strstrip(match.group(1))
161 break
162 f.close()
163 return e
164
165
166
168 """
169 Common parent class for POFile and MOFile classes.
170 This class must **not** be instanciated directly.
171 """
172
173
174
175 - def __init__(self, fpath=None, wrapwidth=78):
176 """
177 Constructor.
178
179 **Keyword arguments**:
180 - *fpath*: string, path to po or mo file
181 - *wrapwidth*: integer, the wrap width, only useful when -w option
182 was passed to xgettext to generate the po file that was used to
183 format the mo file, default to 78 (optional).
184 """
185 list.__init__(self)
186
187 self.fpath = fpath
188
189 self.wrapwidth = wrapwidth
190
191 self.header = ''
192
193 self.metadata = {}
194 self.metadata_is_fuzzy = 0
195
197 """String representation of the file."""
198 ret = []
199 entries = [self.metadata_as_entry()] + self
200 for entry in entries:
201 _listappend(ret, entry.__str__(self.wrapwidth))
202 return _strjoin('\n', ret)
203
205 """Return the official string representation of the object."""
206 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
207
209 """Return the metadata as an entry"""
210 e = POEntry(msgid='')
211 mdata = self.ordered_metadata()
212 if mdata:
213 strs = ['']
214 for name, value in mdata:
215 values = _strsplit(value, '\n')
216 for i, value in enumerate(values):
217 if i == 0:
218 _listappend(strs, '%s: %s' % (name, _strstrip(value)))
219 else:
220 _listappend(strs, '%s' % _strstrip(value))
221 e.msgstr = _strjoin('\n', strs)
222 return e
223
224 - def save(self, fpath=None, repr_method='__str__'):
225 """
226 Save the po file to file *fpath* if no file handle exists for
227 the object. If there's already an open file and no fpath is
228 provided, then the existing file is rewritten with the modified
229 data.
230
231 **Keyword arguments**:
232 - *fpath*: string, full or relative path to the file.
233 - *repr_method*: string, the method to use for output.
234 """
235 if self.fpath is None and fpath is None:
236 raise IOError('You must provide a file path to save() method')
237 contents = getattr(self, repr_method)()
238 if fpath is None:
239 fpath = self.fpath
240 fhandle = open(fpath, 'w')
241 fhandle.write(contents)
242 fhandle.close()
243
276
278 """
279 Return the file encoding charset.
280 If the charset cannot be found in metadata, the function returns
281 None.
282 """
283 try:
284 charset = _strsplit(self.metadata['Content-Type'], 'charset=')[1]
285 charset = _strstrip(charset)
286 except LookupError:
287 charset = None
288 if charset == '':
289 charset = None
290 return charset
291
293 """Return the mofile binary representation."""
294 import struct
295 import array
296 output = ''
297 offsets = []
298 ids = strs = ''
299 entries = self.translated_entries()
300
301 def cmp(_self, other):
302 if _self.msgid > other.msgid:
303 return 1
304 elif _self.msgid < other.msgid:
305 return -1
306 else:
307 return 0
308 entries.sort(cmp)
309
310 mentry = self.metadata_as_entry()
311 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n'
312 entries = [mentry] + entries
313 entries_len = len(entries)
314 for e in entries:
315
316
317 offsets.append((len(ids), len(e.msgid), len(strs), len(e.msgstr)))
318 ids += e._decode(e.msgid) + '\0'
319 strs += e._decode(e.msgstr) + '\0'
320
321 keystart = 7*4+16*entries_len
322
323 valuestart = keystart + len(ids)
324 koffsets = []
325 voffsets = []
326
327
328 for o1, l1, o2, l2 in offsets:
329 koffsets += [l1, o1+keystart]
330 voffsets += [l2, o2+valuestart]
331 offsets = koffsets + voffsets
332 output = struct.pack("Iiiiiii",
333 0x950412de,
334 0,
335 entries_len,
336 7*4,
337 7*4+entries_len*8,
338 0, 0)
339 output += array.array("i", offsets).tostring()
340 output += ids
341 output += strs
342 return output
343
344
345
347 '''
348 Po (or Pot) file reader/writer.
349 POFile objects inherit the list objects methods.
350
351 **Example**:
352
353 >>> po = POFile()
354 >>> entry1 = POEntry(
355 ... msgid="Some english text",
356 ... msgstr="Un texte en anglais"
357 ... )
358 >>> entry1.occurences = [('testfile', 12),('another_file', 1)]
359 >>> entry1.comment = "Some useful comment"
360 >>> entry2 = POEntry(
361 ... msgid="I need my dirty cheese",
362 ... msgstr="Je veux mon sale fromage"
363 ... )
364 >>> entry2.occurences = [('testfile', 15),('another_file', 5)]
365 >>> entry2.comment = "Another useful comment"
366 >>> po.append(entry1)
367 >>> po.append(entry2)
368 >>> po.header = "Some Header"
369 >>> print po
370 # Some Header
371 msgid ""
372 msgstr ""
373 <BLANKLINE>
374 #. Some useful comment
375 #: testfile:12 another_file:1
376 msgid "Some english text"
377 msgstr "Un texte en anglais"
378 <BLANKLINE>
379 #. Another useful comment
380 #: testfile:15 another_file:5
381 msgid "I need my dirty cheese"
382 msgstr "Je veux mon sale fromage"
383 <BLANKLINE>
384 '''
385
386
388 """Return the string representation of the po file"""
389 ret, headers = '', _strsplit(self.header, '\n')
390 for header in headers:
391 if header[:1] in [',', ':']:
392 ret += '#%s\n' % header
393 else:
394 ret += '# %s\n' % header
395 return ret + _BaseFile.__str__(self)
396
398 """
399 Save the binary representation of the file to *fpath*.
400
401 **Keyword arguments**:
402 - *fpath*: string, full or relative path to the file.
403 """
404 _BaseFile.save(self, fpath, 'to_binary')
405
407 """
408 Convenience method that return the percentage of translated
409 messages.
410
411 **Example**:
412
413 >>> import polib
414 >>> po = polib.pofile('tests/test_pofile_helpers.po')
415 >>> po.percent_translated()
416 50
417 """
418 total = len([e for e in self if not e.obsolete])
419 translated = len(self.translated_entries())
420 return int((100.00 / float(total)) * translated)
421
423 """
424 Convenience method that return a list of translated entries.
425
426 **Example**:
427
428 >>> import polib
429 >>> po = polib.pofile('tests/test_pofile_helpers.po')
430 >>> len(po.translated_entries())
431 5
432 """
433 return [e for e in self if e.translated() and not e.obsolete]
434
436 """
437 Convenience method that return a list of untranslated entries.
438
439 **Example**:
440
441 >>> import polib
442 >>> po = polib.pofile('tests/test_pofile_helpers.po')
443 >>> len(po.untranslated_entries())
444 5
445 """
446 return [e for e in self if not e.translated() and not e.obsolete]
447
449 """
450 Convenience method that return the list of 'fuzzy' entries.
451
452 **Example**:
453
454 >>> import polib
455 >>> po = polib.pofile('tests/test_pofile_helpers.po')
456 >>> len(po.fuzzy_entries())
457 2
458 """
459 return [e for e in self if 'fuzzy' in e.flags]
460
462 """
463 Convenience method that return the list of obsolete entries.
464
465 **Example**:
466
467 >>> import polib
468 >>> po = polib.pofile('tests/test_pofile_helpers.po')
469 >>> len(po.obsolete_entries())
470 4
471 """
472 return [e for e in self if e.obsolete]
473
474
475
477 '''
478 Mo file reader/writer.
479 MOFile objects inherit the list objects methods.
480
481 **Example**:
482
483 >>> mo = MOFile()
484 >>> entry1 = POEntry(
485 ... msgid="Some english text",
486 ... msgstr="Un texte en anglais"
487 ... )
488 >>> entry2 = POEntry(
489 ... msgid="I need my dirty cheese",
490 ... msgstr="Je veux mon sale fromage"
491 ... )
492 >>> mo.append(entry1)
493 >>> mo.append(entry2)
494 >>> print mo
495 msgid ""
496 msgstr ""
497 <BLANKLINE>
498 msgid "Some english text"
499 msgstr "Un texte en anglais"
500 <BLANKLINE>
501 msgid "I need my dirty cheese"
502 msgstr "Je veux mon sale fromage"
503 <BLANKLINE>
504 '''
505
506
507 - def __init__(self, fpath=None, wrapwidth=78):
508 """
509 MOFile constructor.
510 See _BaseFile.__construct.
511 """
512 _BaseFile.__init__(self, fpath, wrapwidth)
513 self.magic_number = None
514 self.version = 0
515
517 """
518 Save the string representation of the file to *fpath*.
519
520 **Keyword argument**:
521 - *fpath*: string, full or relative path to the file.
522 """
523 _BaseFile.save(self, fpath)
524
525 - def save(self, fpath):
526 """
527 Save the binary representation of the file to *fpath*.
528
529 **Keyword argument**:
530 - *fpath*: string, full or relative path to the file.
531 """
532 _BaseFile.save(self, fpath, 'to_binary')
533
534
535
537 """
538 Base class for POEntry or MOEntry objects.
539 This class must *not* be instanciated directly.
540 """
541
542
543 - def __init__(self, *args, **kwargs):
544 """Base Entry constructor."""
545
546 try:
547 self.msgid = args[0]
548 warnings.warn('passing msgid as non keyword argument is ' \
549 'deprecated and will raise an error in version 0.4, pass ' \
550 'it as a keyword argument instead.', DeprecationWarning, 2)
551 except:
552 self.msgid = _dictget(kwargs, 'msgid', '')
553 try:
554 self.msgstr = args[1]
555 warnings.warn('passing msgstr as non keyword argument is ' \
556 'deprecated and will raise an error in version 0.4, pass ' \
557 'it as a keyword argument instead.', DeprecationWarning, 2)
558 except:
559 self.msgstr = _dictget(kwargs, 'msgstr', '')
560 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '')
561 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {})
562 self.obsolete = _dictget(kwargs, 'obsolete', False)
563
564 - def __repr__(self):
565 """Return the official string representation of the object."""
566 return '<%s instance at %d>' % (self.__class__.__name__, id(self))
567
568 - def __str__(self, wrapwidth=78):
569 """
570 Common string representation of the POEntry and MOEntry
571 objects.
572 """
573 if self.obsolete:
574 delflag = '#~ '
575 else:
576 delflag = ''
577
578 ret = []
579 ret += self._str_field("msgid", delflag, "", self.msgid)
580
581 if self.msgid_plural:
582 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
583 if self.msgstr_plural:
584 msgstrs = self.msgstr_plural
585 else:
586 msgstrs = {0:self.msgstr}
587 keys = msgstrs.keys()
588 keys.sort()
589 for index in keys:
590 msgstr = msgstrs[index]
591 plural_index = ''
592 if self.msgstr_plural:
593 plural_index = '[%s]' % index
594 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
595 _listappend(ret, '')
596 return _strjoin('\n', ret)
597
598 - def _str_field(self, fieldname, delflag, plural_index, field):
599 lines = _strsplit(self._decode(field), '\n')
600 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,\
601 _listpop(lines, 0))]
602 if lines:
603 for mstr in lines:
604 _listappend(ret, '%s"%s"' % (delflag, mstr))
605 return ret
606
607 - def _decode(self, st):
608 if isinstance(st, unicode):
609 return st.encode(encoding)
610 return st
611
612
613
614
615 -class POEntry(_BaseEntry):
616 """
617 Represents a po file entry.
618
619 **Examples**:
620
621 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
622 >>> entry.occurences = [('welcome.py', 12), ('anotherfile.py', 34)]
623 >>> print entry
624 #: welcome.py:12 anotherfile.py:34
625 msgid "Welcome"
626 msgstr "Bienvenue"
627 <BLANKLINE>
628 >>> entry = POEntry()
629 >>> entry.occurences = [('src/spam.c', 32), ('src/eggs.c', 45)]
630 >>> entry.tcomment = 'A plural translation'
631 >>> entry.flags.append('c-format')
632 >>> entry.msgid = 'I have spam but no egg !'
633 >>> entry.msgid_plural = 'I have spam and %d eggs !'
634 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
635 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
636 >>> print entry
637 # A plural translation
638 #: src/spam.c:32 src/eggs.c:45
639 #, c-format
640 msgid "I have spam but no egg !"
641 msgid_plural "I have spam and %d eggs !"
642 msgstr[0] "J'ai du jambon mais aucun oeuf !"
643 msgstr[1] "J'ai du jambon et %d oeufs !"
644 <BLANKLINE>
645 """
646
647
648 - def __init__(self, *args, **kwargs):
649 """POEntry constructor."""
650 _BaseEntry.__init__(self, *args, **kwargs)
651 self.comment = _dictget(kwargs, 'comment', '')
652 self.tcomment = _dictget(kwargs, 'tcomment', '')
653 self.occurences = _dictget(kwargs, 'occurences', [])
654 self.flags = _dictget(kwargs, 'flags', [])
655
656 - def __str__(self, wrapwidth=78):
657 """
658 Return the string representation of the entry.
659 """
660 ret = []
661
662 if self.comment != '':
663 comments = _strsplit(self._decode(self.comment), '\n')
664 for comment in comments:
665 if wrapwidth > 0 and len(comment) > wrapwidth-3:
666 lines = _textwrap(comment, wrapwidth,
667 initial_indent='#. ',
668 subsequent_indent='#. ',
669 break_long_words=False)
670 _listappend(ret, lines)
671 else:
672 _listappend(ret, '#. %s' % comment)
673
674 if self.tcomment != '':
675 tcomments = _strsplit(self._decode(self.tcomment), '\n')
676 for tcomment in tcomments:
677 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
678 lines = _textwrap(tcomment, wrapwidth,
679 initial_indent='# ',
680 subsequent_indent='# ',
681 break_long_words=False)
682 _listappend(ret, lines)
683 else:
684 _listappend(ret, '# %s' % tcomment)
685
686 if self.occurences:
687 filelist = []
688 for fpath, lineno in self.occurences:
689 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno))
690 filestr = _strjoin(' ', filelist)
691 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
692
693
694
695
696 lines = _strreplace(filestr, '-', '*')
697 lines = _textwrap(filestr, wrapwidth,
698 initial_indent='#: ',
699 subsequent_indent='#: ',
700 break_long_words=False)
701
702 for line in lines:
703 _listappend(ret, _strreplace(line, '*', '-'))
704 else:
705 _listappend(ret, '#: '+filestr)
706
707 if self.flags:
708 flags = []
709 for flag in self.flags:
710 _listappend(flags, flag)
711 _listappend(ret, '#, %s' % _strjoin(', ', flags))
712 _listappend(ret, _BaseEntry.__str__(self))
713 return _strjoin('\n', ret)
714
715 - def translated(self):
716 """Return True if the entry has been translated or False"""
717 return ((self.msgstr != '' or self.msgstr_plural) and \
718 (not self.obsolete and 'fuzzy' not in self.flags))
719
720
721
722 -class MOEntry(_BaseEntry):
723 """
724 Represents a mo file entry.
725
726 **Examples**:
727
728 >>> entry = MOEntry()
729 >>> entry.msgid = 'translate me !'
730 >>> entry.msgstr = 'traduisez moi !'
731 >>> print entry
732 msgid "translate me !"
733 msgstr "traduisez moi !"
734 <BLANKLINE>
735 """
736
737
738 - def __str__(self, wrapwidth=78):
739 """
740 Return the string representation of the entry.
741 """
742 return _BaseEntry.__str__(self, wrapwidth)
743
744
745
747 """
748 A finite state machine to parse efficiently and correctly po
749 file format.
750 """
751
753 """
754 Constructor.
755
756 **Keyword argument**:
757 - *fpath*: string, path to the po file
758 """
759 self.fhandle = open(fpath, 'r+')
760 self.instance = POFile(fpath=fpath)
761 self.transitions = {}
762 self.current_entry = POEntry()
763 self.current_state = 'ST'
764 self.current_token = None
765
766 self.msgstr_index = 0
767 self.entry_obsolete = 0
768
769
770
771
772
773
774
775
776
777
778
779
780
781 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI']
782
783 self.add('TC', ['ST', 'HE'], 'HE')
784 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
785 self.add('GC', all_, 'GC')
786 self.add('OC', all_, 'OC')
787 self.add('FL', all_, 'FL')
788 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI')
789 self.add('MP', ['TC', 'GC', 'MI'], 'MP')
790 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
791 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
792 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
793
795 """
796 Run the state machine, parse the file line by line and call process()
797 with the current matched symbol.
798 """
799 i, lastlen = 1, 0
800 for line in self.fhandle:
801 line = _strstrip(line)
802 if line == '':
803 i = i+1
804 continue
805 if line[:3] == '#~ ':
806 line = line[3:]
807 self.entry_obsolete = 1
808 else:
809 self.entry_obsolete = 0
810 self.current_token = line
811 if line[:2] == '#:':
812
813 self.process('OC', i)
814 elif line[:7] == 'msgid "':
815
816 self.process('MI', i)
817 elif line[:8] == 'msgstr "':
818
819 self.process('MS', i)
820 elif line[:1] == '"':
821
822 self.process('MC', i)
823 elif line[:14] == 'msgid_plural "':
824
825 self.process('MP', i)
826 elif line[:7] == 'msgstr[':
827
828 self.process('MX', i)
829 elif line[:3] == '#, ':
830
831 self.process('FL', i)
832 elif line[:2] == '# ' or line == '#':
833 if line == '#': line = line + ' '
834
835 self.process('TC', i)
836 elif line[:2] == '#.':
837
838 self.process('GC', i)
839 i = i+1
840
841 if self.current_entry:
842
843
844 _listappend(self.instance, self.current_entry)
845
846
847 firstentry = self.instance[0]
848 if firstentry.msgid == '':
849
850 firstentry = _listpop(self.instance, 0)
851 self.instance.metadata_is_fuzzy = firstentry.flags
852 multiline_metadata = 0
853 for msg in _strsplit(firstentry.msgstr, '\n'):
854 if msg != '':
855 if multiline_metadata:
856 self.instance.metadata[key] += '\n' + msg
857 else:
858 try:
859 key, val = _strsplit(msg, ':', 1)
860 self.instance.metadata[key] = val
861 except:
862 pass
863 multiline_metadata = not msg.endswith('\\n')
864
865 self.fhandle.close()
866 return self.instance
867
868 - def add(self, symbol, states, next_state):
869 """
870 Add a transition to the state machine.
871 Keywords arguments:
872
873 symbol -- string, the matched token (two chars symbol)
874 states -- list, a list of states (two chars symbols)
875 next_state -- the next state the fsm will have after the action
876 """
877 for state in states:
878 action = getattr(self, 'handle_%s' % next_state.lower())
879 self.transitions[(symbol, state)] = (action, next_state)
880
881 - def process(self, symbol, linenum):
882 """
883 Process the transition corresponding to the current state and the
884 symbol provided.
885
886 Keywords arguments:
887 symbol -- string, the matched token (two chars symbol)
888 linenum -- integer, the current line number of the parsed file
889 """
890 try:
891 (action, state) = self.transitions[(symbol, self.current_state)]
892 if action():
893 self.current_state = state
894 except Exception, e:
895 raise IOError('Syntax error in po file (line %s): %s' % \
896 (linenum, exc))
897
898
899
901 """Handle a header comment."""
902 if self.instance.header != '':
903 self.instance.header += '\n'
904 self.instance.header += self.current_token[2:]
905 return 1
906
908 """Handle a translator comment."""
909 if self.current_state in ['MC', 'MS', 'MX']:
910 _listappend(self.instance, self.current_entry)
911 self.current_entry = POEntry()
912 if self.current_entry.tcomment != '':
913 self.current_entry.tcomment += '\n'
914 self.current_entry.tcomment += self.current_token[2:]
915 return True
916
918 """Handle a generated comment."""
919 if self.current_state in ['MC', 'MS', 'MX']:
920 _listappend(self.instance, self.current_entry)
921 self.current_entry = POEntry()
922 if self.current_entry.comment != '':
923 self.current_entry.comment += '\n'
924 self.current_entry.comment += self.current_token[3:]
925 return True
926
928 """Handle a file:num occurence."""
929 if self.current_state in ['MC', 'MS', 'MX']:
930 _listappend(self.instance, self.current_entry)
931 self.current_entry = POEntry()
932 occurences = _strsplit(self.current_token[3:])
933 for occurence in occurences:
934 if occurence != '':
935 fil, line = _strsplit(occurence, ':')
936 _listappend(self.current_entry.occurences, (fil, line))
937 return True
938
940 """Handle a flags line."""
941 if self.current_state in ['MC', 'MS', 'MX']:
942 _listappend(self.instance, self.current_entry)
943 self.current_entry = POEntry()
944 self.current_entry.flags += _strsplit(self.current_token[3:], ', ')
945 return True
946
948 """Handle a msgid."""
949 if self.current_state in ['MC', 'MS', 'MX']:
950 _listappend(self.instance, self.current_entry)
951 self.current_entry = POEntry()
952 self.current_entry.obsolete = self.entry_obsolete
953 self.current_entry.msgid = self.current_token[7:-1]
954 return True
955
957 """Handle a msgid plural."""
958 self.current_entry.msgid_plural = self.current_token[14:-1]
959 return True
960
962 """Handle a msgstr."""
963 self.current_entry.msgstr = self.current_token[8:-1]
964 return True
965
967 """Handle a msgstr plural."""
968 index, value = self.current_token[7], self.current_token[11:-1]
969 self.current_entry.msgstr_plural[index] = value
970 self.msgstr_index = index
971 return True
972
974 """Handle a msgid or msgstr continuation line."""
975 if self.current_state == 'MI':
976 self.current_entry.msgid += '\n' + self.current_token[1:-1]
977 elif self.current_state == 'MP':
978 self.current_entry.msgid_plural += '\n' + self.current_token[1:-1]
979 elif self.current_state == 'MS':
980 self.current_entry.msgstr += '\n' + self.current_token[1:-1]
981 elif self.current_state == 'MX':
982 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
983 '\n' + self.current_token[1:-1]
984 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
985
986 return False
987
988
989
991 """
992 A class to parse binary mo files.
993 """
994
996 """_MOFileParser constructor."""
997 self.fhandle = open(fpath, 'r+b')
998 self.instance = MOFile(fpath)
999
1001 """
1002 Parse the magic number and raise an exception if not valid.
1003 """
1004 magic_number = self._readbinary(fmt='4s')
1005
1006 if magic_number not in ['\xde\x12\x04\x95', '\x95\x04\x12\xde']:
1007 raise IOError('Invalid mo file, magic number is incorrect !')
1008 self.instance.magic_number = magic_number
1009
1011 """
1012 Build the instance with the file handle provided in the
1013 constructor.
1014 """
1015 self.parse_magicnumber()
1016
1017 self.instance.version = self._readbinary('L')
1018
1019 numofstrings = self._readbinary('L')
1020
1021 msgids_hash_offset = self._readbinary('L')
1022
1023 msgstrs_hash_offset = self._readbinary('P')
1024
1025 self.fhandle.seek(msgids_hash_offset)
1026 msgids_index = []
1027 for i in range(numofstrings):
1028 _listappend(msgids_index, self._readbinary('LL'))
1029
1030 self.fhandle.seek(msgstrs_hash_offset)
1031 msgstrs_index = []
1032 for i in range(numofstrings):
1033 _listappend(msgstrs_index, self._readbinary('LL'))
1034
1035 for i in range(numofstrings):
1036 self.fhandle.seek(msgids_index[i][1])
1037 msgid = self.fhandle.read(msgids_index[i][0])
1038 self.fhandle.seek(msgstrs_index[i][1])
1039 msgstr = self.fhandle.read(msgstrs_index[i][0])
1040 if i == 0:
1041 raw_metadata, metadata = _strsplit(msgstr, '\n'), {}
1042 for line in raw_metadata:
1043 tokens = _strsplit(line, ':', 1)
1044 if tokens[0] != '':
1045 try:
1046 metadata[tokens[0]] = _strstrip(tokens[1])
1047 except IndexError:
1048 metadata[tokens[0]] = ''
1049 self.instance.metadata = metadata
1050 continue
1051 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1052 _listappend(self.instance, entry)
1053
1054 self.fhandle.close()
1055 return self.instance
1056
1058 """
1059 Private method that unpack n bytes of data using format <fmt>.
1060 It returns a tuple or a mixed value if the tuple length is 1.
1061 """
1062 numbytes = struct.calcsize(fmt)
1063 bytes = self.fhandle.read(numbytes)
1064 tup = struct.unpack(fmt, bytes)
1065 if len(tup) == 1:
1066 return tup[0]
1067 return tup
1068
1069
1070
1071 if __name__ == '__main__':
1072 """
1073 **Main function**::
1074 - to **test** the module just run: *python polib.py [-v]*
1075 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1076 """
1077
1078 import sys
1079 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1081 p = pofile(f)
1082 s = str(p)
1083 import profile
1084 profile.run('test("'+sys.argv[2]+'")')
1085 else:
1086 import doctest
1087 doctest.testmod()
1088
1089