Module polib
[hide private]
[frames] | no frames]

Source Code for Module polib

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # License: MIT (see LICENSE file provided) 
   5  # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: 
   6   
   7  """ 
   8  **polib** allows you to manipulate, create, modify gettext files (pot, po 
   9  and mo files).  You can load existing files, iterate through it's entries, 
  10  add, modify entries, comments or metadata, etc... or create new po files 
  11  from scratch. 
  12   
  13  **polib** provides a simple and pythonic API, exporting only three 
  14  convenience functions (*pofile*, *mofile* and *detect_encoding*), and the 
  15  four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating 
  16  new files/entries. 
  17   
  18  **Basic example**: 
  19   
  20  >>> import polib 
  21  >>> # load an existing po file 
  22  >>> po = polib.pofile('tests/test_utf8.po') 
  23  >>> for entry in po: 
  24  ...     # do something with entry... 
  25  ...     pass 
  26  >>> # add an entry 
  27  >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') 
  28  >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')] 
  29  >>> po.append(entry) 
  30  >>> # to save our modified po file: 
  31  >>> # po.save() 
  32  >>> # or you may want to compile the po file 
  33  >>> # po.save_as_mofile('tests/test_utf8.mo') 
  34  """ 
  35   
  36  __author__    = 'David JEAN LOUIS <izimobil@gmail.com>' 
  37  __version__   = '0.3.1' 
  38  __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 
  39                   'detect_encoding', 'escape', 'unescape'] 
  40   
  41  import struct 
  42  import textwrap 
  43  import warnings 
  44   
  45  default_encoding = 'utf-8' 
  46   
  47  # shortcuts for performance improvement {{{ 
  48   
  49  # this is quite ugly but *very* efficient 
  50  _dictget    = dict.get 
  51  _listappend = list.append 
  52  _listpop    = list.pop 
  53  _strjoin    = str.join 
  54  _strsplit   = str.split 
  55  _strstrip   = str.strip 
  56  _strreplace = str.replace 
  57  _textwrap   = textwrap.wrap 
  58   
  59  # }}} 
  60  # function pofile() {{{ 
  61   
62 -def pofile(fpath, **kwargs):
63 """ 64 Convenience function that parse the po/pot file *fpath* and return 65 a POFile instance. 66 67 **Keyword arguments**: 68 - *fpath*: string, full or relative path to the po/pot file to parse 69 - *wrapwidth*: integer, the wrap width, only useful when -w option was 70 passed to xgettext (optional, default to 78) 71 - *autodetect_encoding*: boolean, if set to False the function will 72 not try to detect the po file encoding (optional, default to True) 73 - *encoding*: string, an encoding, only relevant if autodetect_encoding 74 is set to False 75 76 **Example**: 77 78 >>> import polib 79 >>> po = polib.pofile('tests/test_utf8.po') 80 >>> po #doctest: +ELLIPSIS 81 <POFile instance at ...> 82 >>> import os, tempfile 83 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']: 84 ... orig_po = polib.pofile('tests/'+fname) 85 ... tmpf = tempfile.NamedTemporaryFile().name 86 ... orig_po.save(tmpf) 87 ... try: 88 ... new_po = polib.pofile(tmpf) 89 ... for old, new in zip(orig_po, new_po): 90 ... if old.msgid != new.msgid: 91 ... old.msgid 92 ... new.msgid 93 ... if old.msgstr != new.msgstr: 94 ... old.msgid 95 ... new.msgid 96 ... finally: 97 ... os.unlink(tmpf) 98 """ 99 if _dictget(kwargs, 'autodetect_encoding', True) == True: 100 enc = detect_encoding(fpath) 101 else: 102 enc = _dictget(kwargs, 'encoding', default_encoding) 103 parser = _POFileParser(fpath) 104 instance = parser.parse() 105 instance.wrapwidth = _dictget(kwargs, 'wrapwidth', 78) 106 instance.encoding = enc 107 return instance
108 109 # }}} 110 # function mofile() {{{ 111
112 -def mofile(fpath, **kwargs):
113 """ 114 Convenience function that parse the mo file *fpath* and return 115 a MOFile instance. 116 117 **Keyword arguments**: 118 - *fpath*: string, full or relative path to the mo file to parse 119 - *wrapwidth*: integer, the wrap width, only useful when -w option was 120 passed to xgettext to generate the po file that was used to format 121 the mo file (optional, default to 78) 122 - *autodetect_encoding*: boolean, if set to False the function will 123 not try to detect the po file encoding (optional, default to True) 124 - *encoding*: string, an encoding, only relevant if autodetect_encoding 125 is set to False 126 127 **Example**: 128 129 >>> import polib 130 >>> mo = polib.mofile('tests/test_utf8.mo') 131 >>> mo #doctest: +ELLIPSIS 132 <MOFile instance at ...> 133 >>> import os, tempfile 134 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']: 135 ... orig_mo = polib.mofile('tests/'+fname) 136 ... tmpf = tempfile.NamedTemporaryFile().name 137 ... orig_mo.save(tmpf) 138 ... try: 139 ... new_mo = polib.mofile(tmpf) 140 ... for old, new in zip(orig_mo, new_mo): 141 ... if old.msgid != new.msgid: 142 ... old.msgstr 143 ... new.msgstr 144 ... finally: 145 ... os.unlink(tmpf) 146 """ 147 if _dictget(kwargs, 'autodetect_encoding', True) == True: 148 enc = detect_encoding(fpath) 149 else: 150 enc = _dictget(kwargs, 'encoding', default_encoding) 151 parser = _MOFileParser(fpath) 152 instance = parser.parse() 153 instance.wrapwidth = _dictget(kwargs, 'wrapwidth', 78) 154 instance.encoding = enc 155 return instance
156 157 # }}} 158 # function detect_encoding() {{{ 159
160 -def detect_encoding(fpath):
161 """ 162 Try to detect the encoding used by the file *fpath*. The function will 163 return polib default *encoding* if it's unable to detect it. 164 165 **Keyword argument**: 166 - *fpath*: string, full or relative path to the mo file to parse. 167 168 **Examples**: 169 170 >>> print(detect_encoding('tests/test_noencoding.po')) 171 utf-8 172 >>> print(detect_encoding('tests/test_utf8.po')) 173 UTF-8 174 >>> print(detect_encoding('tests/test_utf8.mo')) 175 UTF-8 176 >>> print(detect_encoding('tests/test_iso-8859-15.po')) 177 ISO_8859-15 178 >>> print(detect_encoding('tests/test_iso-8859-15.mo')) 179 ISO_8859-15 180 """ 181 import re 182 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') 183 f = open(fpath) 184 for l in f: 185 match = rx.search(l) 186 if match: 187 f.close() 188 return _strstrip(match.group(1)) 189 f.close() 190 return default_encoding
191 192 # }}} 193 # function escape() {{{ 194
195 -def escape(st):
196 """ 197 Escape special chars and return the given string *st*. 198 199 **Examples**: 200 201 >>> escape('\\t and \\n and \\r and " and \\\\') 202 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\' 203 """ 204 st = _strreplace(st, '\\', r'\\') 205 st = _strreplace(st, '\t', r'\t') 206 st = _strreplace(st, '\r', r'\r') 207 st = _strreplace(st, '\n', r'\n') 208 st = _strreplace(st, '\"', r'\"') 209 return st
210 211 # }}} 212 # function unescape() {{{ 213
214 -def unescape(st):
215 """ 216 Unescape special chars and return the given string *st*. 217 218 **Examples**: 219 220 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\') 221 '\\t and \\n and \\r and " and \\\\' 222 """ 223 st = _strreplace(st, r'\"', '"') 224 st = _strreplace(st, r'\n', '\n') 225 st = _strreplace(st, r'\r', '\r') 226 st = _strreplace(st, r'\t', '\t') 227 st = _strreplace(st, r'\\', '\\') 228 return st
229 230 # }}} 231 # class _BaseFile {{{ 232
233 -class _BaseFile(list):
234 """ 235 Common parent class for POFile and MOFile classes. 236 This class must **not** be instanciated directly. 237 """ 238
239 - def __init__(self, fpath=None, wrapwidth=78, encoding=default_encoding):
240 """ 241 Constructor. 242 243 **Keyword arguments**: 244 - *fpath*: string, path to po or mo file 245 - *wrapwidth*: integer, the wrap width, only useful when -w option 246 was passed to xgettext to generate the po file that was used to 247 format the mo file, default to 78 (optional). 248 """ 249 list.__init__(self) 250 # the opened file handle 251 self.fpath = fpath 252 # the width at which lines should be wrapped 253 self.wrapwidth = wrapwidth 254 # the file encoding 255 self.encoding = encoding 256 # header 257 self.header = '' 258 # both po and mo files have metadata 259 self.metadata = {} 260 self.metadata_is_fuzzy = 0
261
262 - def __str__(self):
263 """String representation of the file.""" 264 ret = [] 265 entries = [self.metadata_as_entry()] + \ 266 [e for e in self if not e.obsolete] 267 for entry in entries: 268 _listappend(ret, entry.__str__(self.wrapwidth)) 269 for entry in self.obsolete_entries(): 270 _listappend(ret, entry.__str__(self.wrapwidth)) 271 return _strjoin('\n', ret)
272
273 - def __repr__(self):
274 """Return the official string representation of the object.""" 275 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
276
277 - def metadata_as_entry(self):
278 """Return the metadata as an entry""" 279 e = POEntry(msgid='') 280 mdata = self.ordered_metadata() 281 if mdata: 282 strs = [] 283 for name, value in mdata: 284 # Strip whitespace off each line in a multi-line entry 285 value = _strjoin('\n', [_strstrip(v) 286 for v in _strsplit(value, '\n')]) 287 _listappend(strs, '%s: %s' % (name, value)) 288 e.msgstr = _strjoin('\n', strs) + '\n' 289 return e
290
291 - def save(self, fpath=None, repr_method='__str__'):
292 """ 293 Save the po file to file *fpath* if no file handle exists for 294 the object. If there's already an open file and no fpath is 295 provided, then the existing file is rewritten with the modified 296 data. 297 298 **Keyword arguments**: 299 - *fpath*: string, full or relative path to the file. 300 - *repr_method*: string, the method to use for output. 301 """ 302 if self.fpath is None and fpath is None: 303 raise IOError('You must provide a file path to save() method') 304 contents = getattr(self, repr_method)() 305 if fpath is None: 306 fpath = self.fpath 307 mode = 'w' 308 if repr_method == 'to_binary': 309 mode += 'b' 310 fhandle = open(fpath, mode) 311 fhandle.write(contents) 312 fhandle.close()
313
314 - def find(self, st, by='msgid'):
315 """ 316 Find entry which msgid (or property identified by the *by* 317 attribute) matches the string *st*. 318 319 **Keyword arguments**: 320 - *st*: string, the string to search for 321 - *by*: string, the comparison attribute 322 323 **Examples**: 324 325 >>> po = pofile('tests/test_utf8.po') 326 >>> entry = po.find('Thursday') 327 >>> entry.msgstr 328 'Jueves' 329 >>> entry = po.find('Some unexistant msgid') 330 >>> entry is None 331 True 332 >>> entry = po.find('Jueves', 'msgstr') 333 >>> entry.msgid 334 'Thursday' 335 """ 336 try: 337 return [e for e in self if getattr(e, by) == st][0] 338 except IndexError: 339 return None
340
341 - def ordered_metadata(self):
342 """ 343 Convenience method that return the metadata ordered. The return 344 value is list of tuples (metadata name, metadata_value). 345 """ 346 # copy the dict first 347 metadata = self.metadata.copy() 348 data_order = [ 349 'Project-Id-Version', 350 'Report-Msgid-Bugs-To', 351 'POT-Creation-Date', 352 'PO-Revision-Date', 353 'Last-Translator', 354 'Language-Team', 355 'MIME-Version', 356 'Content-Type', 357 'Content-Transfer-Encoding' 358 ] 359 ordered_data = [] 360 for data in data_order: 361 try: 362 value = metadata.pop(data) 363 _listappend(ordered_data, (data, value)) 364 except KeyError: 365 pass 366 # the rest of the metadata won't be ordered there are no specs for this 367 keys = metadata.keys() 368 list(keys).sort() 369 for data in keys: 370 value = metadata[data] 371 _listappend(ordered_data, (data, value)) 372 return ordered_data
373
374 - def to_binary(self):
375 """Return the mofile binary representation.""" 376 import struct 377 import array 378 output = '' 379 offsets = [] 380 ids = strs = '' 381 entries = self.translated_entries() 382 # the keys are sorted in the .mo file 383 def cmp(_self, other): 384 if _self.msgid > other.msgid: 385 return 1 386 elif _self.msgid < other.msgid: 387 return -1 388 else: 389 return 0
390 entries.sort(cmp) 391 # add metadata entry 392 mentry = self.metadata_as_entry() 393 mentry.msgstr = _strreplace(mentry.msgstr, '\\n', '').lstrip() + '\n' 394 entries = [mentry] + entries 395 entries_len = len(entries) 396 for e in entries: 397 # For each string, we need size and file offset. Each string is 398 # NUL terminated; the NUL does not count into the size. 399 msgid = e._decode(e.msgid) 400 msgstr = e._decode(e.msgstr) 401 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) 402 ids += msgid + '\0' 403 strs += msgstr + '\0' 404 # The header is 7 32-bit unsigned integers. 405 keystart = 7*4+16*entries_len 406 # and the values start after the keys 407 valuestart = keystart + len(ids) 408 koffsets = [] 409 voffsets = [] 410 # The string table first has the list of keys, then the list of values. 411 # Each entry has first the size of the string, then the file offset. 412 for o1, l1, o2, l2 in offsets: 413 koffsets += [l1, o1+keystart] 414 voffsets += [l2, o2+valuestart] 415 offsets = koffsets + voffsets 416 output = struct.pack("IIIIIII", 417 0x950412de, # Magic number 418 0, # Version 419 entries_len, # # of entries 420 7*4, # start of key index 421 7*4+entries_len*8, # start of value index 422 0, 0) # size and offset of hash table 423 output += array.array("I", offsets).tostring() 424 output += ids 425 output += strs 426 return output
427 428 # }}} 429 # class POFile {{{ 430
431 -class POFile(_BaseFile):
432 ''' 433 Po (or Pot) file reader/writer. 434 POFile objects inherit the list objects methods. 435 436 **Example**: 437 438 >>> po = POFile() 439 >>> entry1 = POEntry( 440 ... msgid="Some english text", 441 ... msgstr="Un texte en anglais" 442 ... ) 443 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)] 444 >>> entry1.comment = "Some useful comment" 445 >>> entry2 = POEntry( 446 ... msgid="Peace in some languages", 447 ... msgstr="Pace سلام שלום Hasîtî 和平" 448 ... ) 449 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)] 450 >>> entry2.comment = "Another useful comment" 451 >>> entry3 = POEntry( 452 ... msgid='Some entry with quotes " \\"', 453 ... msgstr='Un message unicode avec des quotes " \\"' 454 ... ) 455 >>> entry3.comment = "Test string quoting" 456 >>> po.append(entry1) 457 >>> po.append(entry2) 458 >>> po.append(entry3) 459 >>> po.header = "Some Header" 460 >>> print(po) 461 # Some Header 462 msgid "" 463 msgstr "" 464 <BLANKLINE> 465 #. Some useful comment 466 #: testfile:12 another_file:1 467 msgid "Some english text" 468 msgstr "Un texte en anglais" 469 <BLANKLINE> 470 #. Another useful comment 471 #: testfile:15 another_file:5 472 msgid "Peace in some languages" 473 msgstr "Pace سلام שלום Hasîtî 和平" 474 <BLANKLINE> 475 #. Test string quoting 476 msgid "Some entry with quotes \\" \\"" 477 msgstr "Un message unicode avec des quotes \\" \\"" 478 <BLANKLINE> 479 ''' 480
481 - def __str__(self):
482 """Return the string representation of the po file""" 483 ret, headers = '', _strsplit(self.header, '\n') 484 for header in headers: 485 if header[:1] in [',', ':']: 486 ret += '#%s\n' % header 487 else: 488 ret += '# %s\n' % header 489 return ret + _BaseFile.__str__(self)
490
491 - def save_as_mofile(self, fpath):
492 """ 493 Save the binary representation of the file to *fpath*. 494 495 **Keyword arguments**: 496 - *fpath*: string, full or relative path to the file. 497 """ 498 _BaseFile.save(self, fpath, 'to_binary')
499
500 - def percent_translated(self):
501 """ 502 Convenience method that return the percentage of translated 503 messages. 504 505 **Example**: 506 507 >>> import polib 508 >>> po = polib.pofile('tests/test_pofile_helpers.po') 509 >>> po.percent_translated() 510 50 511 >>> po = POFile() 512 >>> po.percent_translated() 513 100 514 """ 515 total = len([e for e in self if not e.obsolete]) 516 if total == 0: 517 return 100 518 translated = len(self.translated_entries()) 519 return int((100.00 / float(total)) * translated)
520
521 - def translated_entries(self):
522 """ 523 Convenience method that return a list of translated entries. 524 525 **Example**: 526 527 >>> import polib 528 >>> po = polib.pofile('tests/test_pofile_helpers.po') 529 >>> len(po.translated_entries()) 530 6 531 """ 532 return [e for e in self if e.translated() and not e.obsolete]
533
534 - def untranslated_entries(self):
535 """ 536 Convenience method that return a list of untranslated entries. 537 538 **Example**: 539 540 >>> import polib 541 >>> po = polib.pofile('tests/test_pofile_helpers.po') 542 >>> len(po.untranslated_entries()) 543 6 544 """ 545 return [e for e in self if not e.translated() and not e.obsolete]
546
547 - def fuzzy_entries(self):
548 """ 549 Convenience method that return the list of 'fuzzy' entries. 550 551 **Example**: 552 553 >>> import polib 554 >>> po = polib.pofile('tests/test_pofile_helpers.po') 555 >>> len(po.fuzzy_entries()) 556 2 557 """ 558 return [e for e in self if 'fuzzy' in e.flags]
559
560 - def obsolete_entries(self):
561 """ 562 Convenience method that return the list of obsolete entries. 563 564 **Example**: 565 566 >>> import polib 567 >>> po = polib.pofile('tests/test_pofile_helpers.po') 568 >>> len(po.obsolete_entries()) 569 4 570 """ 571 return [e for e in self if e.obsolete]
572
573 - def merge(self, refpot):
574 """ 575 XXX this could not work if encodings are different, needs thinking 576 and general refactoring of how polib handles encoding... 577 578 Convenience method that merge the current pofile with the pot file 579 provided. It behaves exactly as the gettext msgmerge utility: 580 581 - comments of this file will be preserved, but extracted comments 582 and occurrences will be discarded 583 - any translations or comments in the file will be discarded, 584 however dot comments and file positions will be preserved 585 586 **Keyword argument**: 587 - *refpot*: object POFile, the reference catalog. 588 589 **Example**: 590 591 >>> import polib 592 >>> refpot = polib.pofile('tests/test_merge.pot') 593 >>> po = polib.pofile('tests/test_merge_before.po') 594 >>> po.merge(refpot) 595 >>> expected_po = polib.pofile('tests/test_merge_after.po') 596 >>> str(po) == str(expected_po) 597 True 598 """ 599 for entry in refpot: 600 e = self.find(entry.msgid) 601 if e is None: 602 # entry is not in the po file, we must add it 603 # entry is created with msgid, occurrences and comment 604 self.append(POEntry( 605 msgid=entry.msgid, 606 occurrences=entry.occurrences, 607 comment=entry.comment 608 )) 609 else: 610 # entry found, we update it... 611 e.occurrences = entry.occurrences 612 e.comment = entry.comment 613 # ok, now we must "obsolete" entries that are not in the refpot 614 # anymore 615 for entry in self: 616 if refpot.find(entry.msgid) is None: 617 entry.obsolete = True
618 619 # }}} 620 # class MOFile {{{ 621
622 -class MOFile(_BaseFile):
623 ''' 624 Mo file reader/writer. 625 MOFile objects inherit the list objects methods. 626 627 **Example**: 628 629 >>> mo = MOFile() 630 >>> entry1 = POEntry( 631 ... msgid="Some english text", 632 ... msgstr="Un texte en anglais" 633 ... ) 634 >>> entry2 = POEntry( 635 ... msgid="I need my dirty cheese", 636 ... msgstr="Je veux mon sale fromage" 637 ... ) 638 >>> entry3 = MOEntry( 639 ... msgid='Some entry with quotes " \\"', 640 ... msgstr='Un message unicode avec des quotes " \\"' 641 ... ) 642 >>> mo.append(entry1) 643 >>> mo.append(entry2) 644 >>> mo.append(entry3) 645 >>> print(mo) 646 msgid "" 647 msgstr "" 648 <BLANKLINE> 649 msgid "Some english text" 650 msgstr "Un texte en anglais" 651 <BLANKLINE> 652 msgid "I need my dirty cheese" 653 msgstr "Je veux mon sale fromage" 654 <BLANKLINE> 655 msgid "Some entry with quotes \\" \\"" 656 msgstr "Un message unicode avec des quotes \\" \\"" 657 <BLANKLINE> 658 ''' 659
660 - def __init__(self, fpath=None, wrapwidth=78):
661 """ 662 MOFile constructor. 663 See _BaseFile.__construct. 664 """ 665 _BaseFile.__init__(self, fpath, wrapwidth) 666 self.magic_number = None 667 self.version = 0
668
669 - def save_as_pofile(self, fpath):
670 """ 671 Save the string representation of the file to *fpath*. 672 673 **Keyword argument**: 674 - *fpath*: string, full or relative path to the file. 675 """ 676 _BaseFile.save(self, fpath)
677
678 - def save(self, fpath):
679 """ 680 Save the binary representation of the file to *fpath*. 681 682 **Keyword argument**: 683 - *fpath*: string, full or relative path to the file. 684 """ 685 _BaseFile.save(self, fpath, 'to_binary')
686
687 - def percent_translated(self):
688 """ 689 Convenience method to keep the same interface with POFile instances. 690 """ 691 return 100
692
693 - def translated_entries(self):
694 """ 695 Convenience method to keep the same interface with POFile instances. 696 """ 697 return self
698
699 - def untranslated_entries(self):
700 """ 701 Convenience method to keep the same interface with POFile instances. 702 """ 703 return []
704
705 - def fuzzy_entries(self):
706 """ 707 Convenience method to keep the same interface with POFile instances. 708 """ 709 return []
710
711 - def obsolete_entries(self):
712 """ 713 Convenience method to keep the same interface with POFile instances. 714 """ 715 return []
716 717 # }}} 718 # class _BaseEntry {{{ 719
720 -class _BaseEntry(object):
721 """ 722 Base class for POEntry or MOEntry objects. 723 This class must *not* be instanciated directly. 724 """ 725
726 - def __init__(self, *args, **kwargs):
727 """Base Entry constructor.""" 728 self.msgid = _dictget(kwargs, 'msgid', '') 729 self.msgstr = _dictget(kwargs, 'msgstr', '') 730 self.msgid_plural = _dictget(kwargs, 'msgid_plural', '') 731 self.msgstr_plural = _dictget(kwargs, 'msgstr_plural', {}) 732 self.obsolete = _dictget(kwargs, 'obsolete', False) 733 self.encoding = _dictget(kwargs, 'encoding', default_encoding)
734
735 - def __repr__(self):
736 """Return the official string representation of the object.""" 737 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
738
739 - def __str__(self, wrapwidth=78):
740 """ 741 Common string representation of the POEntry and MOEntry 742 objects. 743 """ 744 if self.obsolete: 745 delflag = '#~ ' 746 else: 747 delflag = '' 748 # write the msgid 749 ret = [] 750 ret += self._str_field("msgid", delflag, "", self.msgid) 751 # write the msgid_plural if any 752 if self.msgid_plural: 753 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) 754 if self.msgstr_plural: 755 # write the msgstr_plural if any 756 msgstrs = self.msgstr_plural 757 keys = msgstrs.keys() 758 list(keys).sort() 759 for index in keys: 760 msgstr = msgstrs[index] 761 plural_index = '[%s]' % index 762 ret += self._str_field("msgstr", delflag, plural_index, msgstr) 763 else: 764 # otherwise write the msgstr 765 ret += self._str_field("msgstr", delflag, "", self.msgstr) 766 _listappend(ret, '') 767 return _strjoin('\n', ret)
768
769 - def _str_field(self, fieldname, delflag, plural_index, field):
770 field = self._decode(field) 771 lines = field.splitlines(True) # keep line breaks in strings 772 # potentially, we could do line-wrapping here, but textwrap.wrap 773 # treats whitespace too carelessly for us to use it. 774 if len(lines) > 1: 775 lines = ['']+lines # start with initial empty line 776 else: 777 lines = [field] # needed for the empty string case 778 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 779 escape(_listpop(lines, 0)))] 780 for mstr in lines: 781 _listappend(ret, '%s"%s"' % (delflag, escape(mstr))) 782 return ret
783
784 - def _decode(self, st):
785 try: 786 if isinstance(st, unicode): 787 st = st.encode(self.encoding) 788 return st 789 except: 790 return st
791 792 # }}} 793 # class POEntry {{{ 794
795 -class POEntry(_BaseEntry):
796 """ 797 Represents a po file entry. 798 799 **Examples**: 800 801 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') 802 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)] 803 >>> print(entry) 804 #: welcome.py:12 anotherfile.py:34 805 msgid "Welcome" 806 msgstr "Bienvenue" 807 <BLANKLINE> 808 >>> entry = POEntry() 809 >>> entry.occurrences = [('src/spam.c', 32), ('src/eggs.c', 45)] 810 >>> entry.tcomment = 'A plural translation' 811 >>> entry.flags.append('c-format') 812 >>> entry.msgid = 'I have spam but no egg !' 813 >>> entry.msgid_plural = 'I have spam and %d eggs !' 814 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" 815 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" 816 >>> print(entry) 817 # A plural translation 818 #: src/spam.c:32 src/eggs.c:45 819 #, c-format 820 msgid "I have spam but no egg !" 821 msgid_plural "I have spam and %d eggs !" 822 msgstr[0] "J'ai du jambon mais aucun oeuf !" 823 msgstr[1] "J'ai du jambon et %d oeufs !" 824 <BLANKLINE> 825 """ 826
827 - def __init__(self, *args, **kwargs):
828 """POEntry constructor.""" 829 _BaseEntry.__init__(self, *args, **kwargs) 830 self.comment = _dictget(kwargs, 'comment', '') 831 self.tcomment = _dictget(kwargs, 'tcomment', '') 832 self.occurrences = _dictget(kwargs, 'occurrences', []) 833 self.flags = _dictget(kwargs, 'flags', [])
834
835 - def __str__(self, wrapwidth=78):
836 """ 837 Return the string representation of the entry. 838 """ 839 if self.obsolete: 840 return _BaseEntry.__str__(self) 841 ret = [] 842 # comment first, if any (with text wrapping as xgettext does) 843 if self.comment != '': 844 comments = _strsplit(self._decode(self.comment), '\n') 845 for comment in comments: 846 if wrapwidth > 0 and len(comment) > wrapwidth-3: 847 lines = _textwrap(comment, wrapwidth, 848 initial_indent='#. ', 849 subsequent_indent='#. ', 850 break_long_words=False) 851 _listappend(ret, lines) 852 else: 853 _listappend(ret, '#. %s' % comment) 854 # translator comment, if any (with text wrapping as xgettext does) 855 if self.tcomment != '': 856 tcomments = _strsplit(self._decode(self.tcomment), '\n') 857 for tcomment in tcomments: 858 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: 859 lines = _textwrap(tcomment, wrapwidth, 860 initial_indent='# ', 861 subsequent_indent='# ', 862 break_long_words=False) 863 _listappend(ret, lines) 864 else: 865 _listappend(ret, '# %s' % tcomment) 866 # occurrences (with text wrapping as xgettext does) 867 if self.occurrences: 868 filelist = [] 869 for fpath, lineno in self.occurrences: 870 _listappend(filelist, '%s:%s' % (self._decode(fpath), lineno)) 871 filestr = _strjoin(' ', filelist) 872 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: 873 # XXX textwrap split words that contain hyphen, this is not 874 # what we want for filenames, so the dirty hack is to 875 # temporally replace hyphens with a char that a file cannot 876 # contain, like "*" 877 lines = _textwrap(_strreplace(filestr, '-', '*'), 878 wrapwidth, 879 initial_indent='#: ', 880 subsequent_indent='#: ', 881 break_long_words=False) 882 # end of the replace hack 883 for line in lines: 884 _listappend(ret, _strreplace(line, '*', '-')) 885 else: 886 _listappend(ret, '#: '+filestr) 887 # flags 888 if self.flags: 889 flags = [] 890 for flag in self.flags: 891 _listappend(flags, flag) 892 _listappend(ret, '#, %s' % _strjoin(', ', flags)) 893 _listappend(ret, _BaseEntry.__str__(self)) 894 return _strjoin('\n', ret)
895
896 - def __cmp__(self, other):
897 ''' 898 Called by comparison operations if rich comparison is not defined. 899 900 **Tests**: 901 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)]) 902 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)]) 903 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)]) 904 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)]) 905 >>> po = POFile() 906 >>> po.append(a) 907 >>> po.append(b) 908 >>> po.append(c1) 909 >>> po.append(c2) 910 >>> po.sort() 911 >>> print(po) 912 # 913 msgid "" 914 msgstr "" 915 <BLANKLINE> 916 #: a.py:1 a.py:3 917 msgid "c2" 918 msgstr "" 919 <BLANKLINE> 920 #: a.py:1 b.py:1 921 msgid "c1" 922 msgstr "" 923 <BLANKLINE> 924 #: b.py:1 b.py:3 925 msgid "a" 926 msgstr "" 927 <BLANKLINE> 928 #: b.py:1 b.py:3 929 msgid "b" 930 msgstr "" 931 <BLANKLINE> 932 ''' 933 def compare_occurrences(a, b): 934 """ 935 Compare an entry occurrence with another one. 936 """ 937 if a[0] != b[0]: 938 return a[0] < b[0] 939 if a[1] != b[1]: 940 return a[1] < b[1] 941 return 0
942 943 # First: Obsolete test 944 if self.obsolete != other.obsolete: 945 if self.obsolete: 946 return -1 947 else: 948 return 1 949 # Work on a copy to protect original 950 occ1 = self.occurrences[:] 951 occ2 = other.occurrences[:] 952 # Sorting using compare method 953 occ1.sort(compare_occurrences) 954 occ2.sort(compare_occurrences) 955 # Comparing sorted occurrences 956 pos = 0 957 for entry1 in occ1: 958 try: 959 entry2 = occ2[pos] 960 except IndexError: 961 return 1 962 pos = pos + 1 963 if entry1[0] != entry2[0]: 964 if entry1[0] > entry2[0]: 965 return 1 966 else: 967 return -1 968 if entry1[1] != entry2[1]: 969 if entry1[1] > entry2[1]: 970 return 1 971 else: 972 return -1 973 # Finally: Compare message ID 974 if self.msgid > other.msgid: return 1 975 else: return -1
976
977 - def translated(self):
978 """Return True if the entry has been translated or False""" 979 if self.obsolete or 'fuzzy' in self.flags: 980 return False 981 if self.msgstr != '': 982 return True 983 if self.msgstr_plural: 984 for pos in self.msgstr_plural: 985 if self.msgstr_plural[pos] == '': 986 return False 987 return True 988 return False
989 990 # }}} 991 # class MOEntry {{{ 992
993 -class MOEntry(_BaseEntry):
994 """ 995 Represents a mo file entry. 996 997 **Examples**: 998 999 >>> entry = MOEntry() 1000 >>> entry.msgid = 'translate me !' 1001 >>> entry.msgstr = 'traduisez moi !' 1002 >>> print(entry) 1003 msgid "translate me !" 1004 msgstr "traduisez moi !" 1005 <BLANKLINE> 1006 """ 1007
1008 - def __str__(self, wrapwidth=78):
1009 """ 1010 Return the string representation of the entry. 1011 """ 1012 return _BaseEntry.__str__(self, wrapwidth)
1013 1014 # }}} 1015 # class _POFileParser {{{ 1016
1017 -class _POFileParser(object):
1018 """ 1019 A finite state machine to parse efficiently and correctly po 1020 file format. 1021 """ 1022
1023 - def __init__(self, fpath):
1024 """ 1025 Constructor. 1026 1027 **Keyword argument**: 1028 - *fpath*: string, path to the po file 1029 """ 1030 self.fhandle = open(fpath, 'r') 1031 self.instance = POFile(fpath=fpath) 1032 self.transitions = {} 1033 self.current_entry = POEntry() 1034 self.current_state = 'ST' 1035 self.current_token = None 1036 # two memo flags used in handlers 1037 self.msgstr_index = 0 1038 self.entry_obsolete = 0 1039 # Configure the state machine, by adding transitions. 1040 # Signification of symbols: 1041 # * ST: Beginning of the file (start) 1042 # * HE: Header 1043 # * TC: a translation comment 1044 # * GC: a generated comment 1045 # * OC: a file/line occurence 1046 # * FL: a flags line 1047 # * MI: a msgid 1048 # * MP: a msgid plural 1049 # * MS: a msgstr 1050 # * MX: a msgstr plural 1051 # * MC: a msgid or msgstr continuation line 1052 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'] 1053 1054 self.add('TC', ['ST', 'HE'], 'HE') 1055 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC') 1056 self.add('GC', all_, 'GC') 1057 self.add('OC', all_, 'OC') 1058 self.add('FL', all_, 'FL') 1059 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI') 1060 self.add('MP', ['TC', 'GC', 'MI'], 'MP') 1061 self.add('MS', ['MI', 'MP', 'TC'], 'MS') 1062 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') 1063 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1064
1065 - def parse(self):
1066 """ 1067 Run the state machine, parse the file line by line and call process() 1068 with the current matched symbol. 1069 """ 1070 i, lastlen = 1, 0 1071 for line in self.fhandle: 1072 line = _strstrip(line) 1073 if line == '': 1074 i = i+1 1075 continue 1076 if line[:3] == '#~ ': 1077 line = line[3:] 1078 self.entry_obsolete = 1 1079 else: 1080 self.entry_obsolete = 0 1081 self.current_token = line 1082 if line[:2] == '#:': 1083 # we are on a occurrences line 1084 self.process('OC', i) 1085 elif line[:7] == 'msgid "': 1086 # we are on a msgid 1087 self.process('MI', i) 1088 elif line[:8] == 'msgstr "': 1089 # we are on a msgstr 1090 self.process('MS', i) 1091 elif line[:1] == '"': 1092 # we are on a continuation line or some metadata 1093 self.process('MC', i) 1094 elif line[:14] == 'msgid_plural "': 1095 # we are on a msgid plural 1096 self.process('MP', i) 1097 elif line[:7] == 'msgstr[': 1098 # we are on a msgstr plural 1099 self.process('MX', i) 1100 elif line[:3] == '#, ': 1101 # we are on a flags line 1102 self.process('FL', i) 1103 elif line[:2] == '# ' or line == '#': 1104 if line == '#': line = line + ' ' 1105 # we are on a translator comment line 1106 self.process('TC', i) 1107 elif line[:2] == '#.': 1108 # we are on a generated comment line 1109 self.process('GC', i) 1110 i = i+1 1111 1112 if self.current_entry: 1113 # since entries are added when another entry is found, we must add 1114 # the last entry here (only if there are lines) 1115 _listappend(self.instance, self.current_entry) 1116 # before returning the instance, check if there's metadata and if 1117 # so extract it in a dict 1118 firstentry = self.instance[0] 1119 if firstentry.msgid == '': # metadata found 1120 # remove the entry 1121 firstentry = _listpop(self.instance, 0) 1122 self.instance.metadata_is_fuzzy = firstentry.flags 1123 key = None 1124 for msg in firstentry.msgstr.splitlines(): 1125 try: 1126 key, val = _strsplit(msg, ':', 1) 1127 self.instance.metadata[key] = _strstrip(val) 1128 except: 1129 if key is not None: 1130 self.instance.metadata[key] += '\n'+_strstrip(msg) 1131 # close opened file 1132 self.fhandle.close() 1133 return self.instance
1134
1135 - def add(self, symbol, states, next_state):
1136 """ 1137 Add a transition to the state machine. 1138 Keywords arguments: 1139 1140 symbol -- string, the matched token (two chars symbol) 1141 states -- list, a list of states (two chars symbols) 1142 next_state -- the next state the fsm will have after the action 1143 """ 1144 for state in states: 1145 action = getattr(self, 'handle_%s' % next_state.lower()) 1146 self.transitions[(symbol, state)] = (action, next_state)
1147
1148 - def process(self, symbol, linenum):
1149 """ 1150 Process the transition corresponding to the current state and the 1151 symbol provided. 1152 1153 Keywords arguments: 1154 symbol -- string, the matched token (two chars symbol) 1155 linenum -- integer, the current line number of the parsed file 1156 """ 1157 try: 1158 (action, state) = self.transitions[(symbol, self.current_state)] 1159 if action(): 1160 self.current_state = state 1161 except Exception: 1162 raise IOError('Syntax error in po file (line %s)' % linenum)
1163 1164 # state handlers 1165
1166 - def handle_he(self):
1167 """Handle a header comment.""" 1168 if self.instance.header != '': 1169 self.instance.header += '\n' 1170 self.instance.header += self.current_token[2:] 1171 return 1
1172
1173 - def handle_tc(self):
1174 """Handle a translator comment.""" 1175 if self.current_state in ['MC', 'MS', 'MX']: 1176 _listappend(self.instance, self.current_entry) 1177 self.current_entry = POEntry() 1178 if self.current_entry.tcomment != '': 1179 self.current_entry.tcomment += '\n' 1180 self.current_entry.tcomment += self.current_token[2:] 1181 return True
1182
1183 - def handle_gc(self):
1184 """Handle a generated comment.""" 1185 if self.current_state in ['MC', 'MS', 'MX']: 1186 _listappend(self.instance, self.current_entry) 1187 self.current_entry = POEntry() 1188 if self.current_entry.comment != '': 1189 self.current_entry.comment += '\n' 1190 self.current_entry.comment += self.current_token[3:] 1191 return True
1192
1193 - def handle_oc(self):
1194 """Handle a file:num occurence.""" 1195 if self.current_state in ['MC', 'MS', 'MX']: 1196 _listappend(self.instance, self.current_entry) 1197 self.current_entry = POEntry() 1198 occurrences = _strsplit(self.current_token[3:]) 1199 for occurrence in occurrences: 1200 if occurrence != '': 1201 fil, line = _strsplit(occurrence, ':') 1202 _listappend(self.current_entry.occurrences, (fil, line)) 1203 return True
1204
1205 - def handle_fl(self):
1206 """Handle a flags line.""" 1207 if self.current_state in ['MC', 'MS', 'MX']: 1208 _listappend(self.instance, self.current_entry) 1209 self.current_entry = POEntry() 1210 self.current_entry.flags += _strsplit(self.current_token[3:], ', ') 1211 return True
1212
1213 - def handle_mi(self):
1214 """Handle a msgid.""" 1215 if self.current_state in ['MC', 'MS', 'MX']: 1216 _listappend(self.instance, self.current_entry) 1217 self.current_entry = POEntry() 1218 self.current_entry.obsolete = self.entry_obsolete 1219 self.current_entry.msgid = unescape(self.current_token[7:-1]) 1220 return True
1221
1222 - def handle_mp(self):
1223 """Handle a msgid plural.""" 1224 self.current_entry.msgid_plural = unescape(self.current_token[14:-1]) 1225 return True
1226
1227 - def handle_ms(self):
1228 """Handle a msgstr.""" 1229 self.current_entry.msgstr = unescape(self.current_token[8:-1]) 1230 return True
1231
1232 - def handle_mx(self):
1233 """Handle a msgstr plural.""" 1234 index, value = self.current_token[7], self.current_token[11:-1] 1235 self.current_entry.msgstr_plural[index] = unescape(value) 1236 self.msgstr_index = index 1237 return True
1238
1239 - def handle_mc(self):
1240 """Handle a msgid or msgstr continuation line.""" 1241 if self.current_state == 'MI': 1242 self.current_entry.msgid += unescape(self.current_token[1:-1]) 1243 elif self.current_state == 'MP': 1244 self.current_entry.msgid_plural += \ 1245 unescape(self.current_token[1:-1]) 1246 elif self.current_state == 'MS': 1247 self.current_entry.msgstr += unescape(self.current_token[1:-1]) 1248 elif self.current_state == 'MX': 1249 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\ 1250 unescape(self.current_token[1:-1]) 1251 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr 1252 # don't change the current state 1253 return False
1254 1255 # }}} 1256 # class _MOFileParser {{{ 1257
1258 -class _MOFileParser(object):
1259 """ 1260 A class to parse binary mo files. 1261 """ 1262 BIG_ENDIAN = 0xde120495 1263 LITTLE_ENDIAN = 0x950412de 1264
1265 - def __init__(self, fpath):
1266 """_MOFileParser constructor.""" 1267 self.fhandle = open(fpath, 'rb') 1268 self.instance = MOFile(fpath)
1269
1270 - def parse_magicnumber(self):
1271 """ 1272 Parse the magic number and raise an exception if not valid. 1273 """
1274
1275 - def parse(self):
1276 """ 1277 Build the instance with the file handle provided in the 1278 constructor. 1279 """ 1280 magic_number = self._readbinary('<I', 4) 1281 if magic_number == self.LITTLE_ENDIAN: 1282 ii = '<II' 1283 elif magic_number == self.BIG_ENDIAN: 1284 ii = '>II' 1285 else: 1286 raise IOError('Invalid mo file, magic number is incorrect !') 1287 self.instance.magic_number = magic_number 1288 # parse the version number and the number of strings 1289 self.instance.version, numofstrings = self._readbinary(ii, 8) 1290 # original strings and translation strings hash table offset 1291 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) 1292 # move to msgid hash table and read length and offset of msgids 1293 self.fhandle.seek(msgids_hash_offset) 1294 msgids_index = [] 1295 for i in range(numofstrings): 1296 _listappend(msgids_index, self._readbinary(ii, 8)) 1297 # move to msgstr hash table and read length and offset of msgstrs 1298 self.fhandle.seek(msgstrs_hash_offset) 1299 msgstrs_index = [] 1300 for i in range(numofstrings): 1301 _listappend(msgstrs_index, self._readbinary(ii, 8)) 1302 # build entries 1303 for i in range(numofstrings): 1304 self.fhandle.seek(msgids_index[i][1]) 1305 msgid = self.fhandle.read(msgids_index[i][0]) 1306 self.fhandle.seek(msgstrs_index[i][1]) 1307 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1308 if i == 0: # metadata 1309 raw_metadata, metadata = _strsplit(msgstr, '\n'), {} 1310 for line in raw_metadata: 1311 tokens = _strsplit(line, ':', 1) 1312 if tokens[0] != '': 1313 try: 1314 metadata[tokens[0]] = _strstrip(tokens[1]) 1315 except IndexError: 1316 metadata[tokens[0]] = '' 1317 self.instance.metadata = metadata 1318 continue 1319 entry = MOEntry(msgid=msgid, msgstr=msgstr) 1320 _listappend(self.instance, entry) 1321 # close opened file 1322 self.fhandle.close() 1323 return self.instance
1324
1325 - def _readbinary(self, fmt, numbytes):
1326 """ 1327 Private method that unpack n bytes of data using format <fmt>. 1328 It returns a tuple or a mixed value if the tuple length is 1. 1329 """ 1330 bytes = self.fhandle.read(numbytes) 1331 tup = struct.unpack(fmt, bytes) 1332 if len(tup) == 1: 1333 return tup[0] 1334 return tup
1335 1336 # }}} 1337 # __main__ {{{ 1338 1339 if __name__ == '__main__': 1340 """ 1341 **Main function**:: 1342 - to **test** the module just run: *python polib.py [-v]* 1343 - to **profile** the module: *python polib.py -p <some_pofile.po>* 1344 """ 1345 import sys 1346 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1347 - def test(f):
1348 if f.endswith('po'): 1349 p = pofile(f) 1350 else: 1351 p = mofile(f) 1352 s = str(p)
1353 import profile 1354 profile.run('test("'+sys.argv[2]+'")') 1355 else: 1356 import doctest 1357 doctest.testmod() 1358 1359 # }}} 1360