Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2011 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  import re 
 32  import copy 
 33  import cStringIO 
 34   
 35  from translate.lang import data 
 36  from translate.misc.multistring import multistring 
 37  from translate.storage import pocommon, base, cpo, poparser 
 38  from translate.storage.pocommon import encodingToUse 
 39   
 40  lsep = " " 
 41  """Seperator for #: entries""" 
 42   
 43  basic_header = r'''msgid "" 
 44  msgstr "" 
 45  "Content-Type: text/plain; charset=UTF-8\n" 
 46  "Content-Transfer-Encoding: 8bit\n" 
 47  ''' 
 48   
 49   
50 -class pounit(pocommon.pounit):
51 # othercomments = [] # # this is another comment 52 # automaticcomments = [] # #. comment extracted from the source code 53 # sourcecomments = [] # #: sourcefile.xxx:35 54 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 55 # prev_msgid = [] # 56 # prev_msgid_plural = [] # 57 # typecomments = [] # #, fuzzy 58 # msgidcomment = u"" # _: within msgid 59 # msgctxt 60 # msgid = [] 61 # msgstr = [] 62 63 # Our homegrown way to indicate what must be copied in a shallow 64 # fashion 65 __shallow__ = ['_store'] 66
67 - def __init__(self, source=None, encoding="UTF-8"):
68 pocommon.pounit.__init__(self, source) 69 self._encoding = encodingToUse(encoding) 70 self._initallcomments(blankall=True) 71 self._msgctxt = u"" 72 73 self.target = u""
74
75 - def _initallcomments(self, blankall=False):
76 """Initialises allcomments""" 77 if blankall: 78 self.othercomments = [] 79 self.automaticcomments = [] 80 self.sourcecomments = [] 81 self.typecomments = [] 82 self.msgidcomment = u""
83
84 - def getsource(self):
85 return self._source
86
87 - def setsource(self, source):
88 self._rich_source = None 89 # assert isinstance(source, unicode) 90 source = data.forceunicode(source or u"") 91 source = source or u"" 92 if isinstance(source, multistring): 93 self._source = source 94 elif isinstance(source, unicode): 95 self._source = source 96 else: 97 #unicode, list, dict 98 self._source = multistring(source)
99 source = property(getsource, setsource) 100
101 - def gettarget(self):
102 """Returns the unescaped msgstr""" 103 return self._target
104
105 - def settarget(self, target):
106 """Sets the msgstr to the given (unescaped) value""" 107 self._rich_target = None 108 # assert isinstance(target, unicode) 109 # target = data.forceunicode(target) 110 if self.hasplural(): 111 if isinstance(target, multistring): 112 self._target = target 113 else: 114 #unicode, list, dict 115 self._target = multistring(target) 116 elif isinstance(target, (dict, list)): 117 if len(target) == 1: 118 self._target = target[0] 119 else: 120 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 121 else: 122 self._target = target
123 target = property(gettarget, settarget) 124
125 - def getnotes(self, origin=None):
126 """Return comments based on origin value (programmer, developer, source code and translator)""" 127 if origin == None: 128 comments = u"\n".join(self.othercomments) 129 comments += u"\n".join(self.automaticcomments) 130 elif origin == "translator": 131 comments = u"\n".join(self.othercomments) 132 elif origin in ["programmer", "developer", "source code"]: 133 comments = u"\n".join(self.automaticcomments) 134 else: 135 raise ValueError("Comment type not valid") 136 return comments
137
138 - def addnote(self, text, origin=None, position="append"):
139 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 140 # ignore empty strings and strings without non-space characters 141 if not (text and text.strip()): 142 return 143 text = data.forceunicode(text) 144 commentlist = self.othercomments 145 autocomments = False 146 if origin in ["programmer", "developer", "source code"]: 147 autocomments = True 148 commentlist = self.automaticcomments 149 if text.endswith(u'\n'): 150 text = text[:-1] 151 newcomments = text.split(u"\n") 152 if position == "append": 153 newcomments = commentlist + newcomments 154 elif position == "prepend": 155 newcomments = newcomments + commentlist 156 157 if autocomments: 158 self.automaticcomments = newcomments 159 else: 160 self.othercomments = newcomments
161
162 - def removenotes(self):
163 """Remove all the translator's notes (other comments)""" 164 self.othercomments = []
165
166 - def __deepcopy__(self, memo={}):
167 # Make an instance to serve as the copy 168 new_unit = self.__class__() 169 # We'll be testing membership frequently, so make a set from 170 # self.__shallow__ 171 shallow = set(self.__shallow__) 172 # Make deep copies of all members which are not in shallow 173 for key, value in self.__dict__.iteritems(): 174 if key not in shallow: 175 setattr(new_unit, key, copy.deepcopy(value)) 176 # Make shallow copies of all members which are in shallow 177 for key in set(shallow): 178 setattr(new_unit, key, getattr(self, key)) 179 # Mark memo with ourself, so that we won't get deep copied 180 # again 181 memo[id(self)] = self 182 # Return our copied unit 183 return new_unit
184
185 - def copy(self):
186 return copy.deepcopy(self)
187
188 - def _msgidlen(self):
189 if self.hasplural(): 190 len("".join([string for string in self.source.strings])) 191 else: 192 return len(self.source)
193
194 - def _msgstrlen(self):
195 if self.hasplural(): 196 len("".join([string for string in self.target.strings])) 197 else: 198 return len(self.target)
199
200 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
201 """Merges the otherpo (with the same msgid) into this one. 202 203 Overwrite non-blank self.msgstr only if overwrite is True 204 merge comments only if comments is True 205 """ 206 207 def mergelists(list1, list2, split=False): 208 #decode where necessary 209 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 210 for position, item in enumerate(list1): 211 if isinstance(item, str): 212 list1[position] = item.decode("utf-8") 213 for position, item in enumerate(list2): 214 if isinstance(item, str): 215 list2[position] = item.decode("utf-8") 216 217 #Determine the newline style of list2 218 lineend = "" 219 if list2 and list2[0]: 220 for candidate in ["\n", "\r", "\n\r"]: 221 if list2[0].endswith(candidate): 222 lineend = candidate 223 if not lineend: 224 lineend = "" 225 226 #Split if directed to do so: 227 if split: 228 splitlist1 = [] 229 splitlist2 = [] 230 for item in list1: 231 splitlist1.extend(item.split()) 232 for item in list2: 233 splitlist2.extend(item.split()) 234 list1.extend([item for item in splitlist2 if not item in splitlist1]) 235 else: 236 #Normal merge, but conform to list1 newline style 237 if list1 != list2: 238 for item in list2: 239 item = item.rstrip(lineend) 240 # avoid duplicate comment lines (this might cause some problems) 241 if item not in list1 or len(item) < 5: 242 list1.append(item)
243 244 if not isinstance(otherpo, pounit): 245 super(pounit, self).merge(otherpo, overwrite, comments) 246 return 247 if comments: 248 mergelists(self.othercomments, otherpo.othercomments) 249 mergelists(self.typecomments, otherpo.typecomments) 250 if not authoritative: 251 # We don't bring across otherpo.automaticcomments as we consider ourself 252 # to be the the authority. Same applies to otherpo.msgidcomments 253 mergelists(self.automaticcomments, otherpo.automaticcomments) 254 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 255 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 256 if not self.istranslated() or overwrite: 257 # Remove kde-style comments from the translation (if any). XXX - remove 258 if pocommon.extract_msgid_comment(otherpo.target): 259 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 260 self.target = otherpo.target 261 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 262 self.markfuzzy() 263 else: 264 self.markfuzzy(otherpo.isfuzzy()) 265 elif not otherpo.istranslated(): 266 if self.source != otherpo.source: 267 self.markfuzzy() 268 else: 269 if self.target != otherpo.target: 270 self.markfuzzy()
271
272 - def isheader(self):
273 #TODO: fix up nicely 274 return not self.getid() and len(self.target) > 0
275
276 - def isblank(self):
277 if self.isheader() or self.msgidcomment: 278 return False 279 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 280 return True 281 return False
282
283 - def hastypecomment(self, typecomment):
284 """Check whether the given type comment is present""" 285 # check for word boundaries properly by using a regular expression... 286 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
287
288 - def hasmarkedcomment(self, commentmarker):
289 """Check whether the given comment marker is present as # (commentmarker) ...""" 290 # raise DeprecationWarning 291 commentmarker = "(%s)" % commentmarker 292 for comment in self.othercomments: 293 if comment.startswith(commentmarker): 294 return True 295 return False
296
297 - def settypecomment(self, typecomment, present=True):
298 """Alters whether a given typecomment is present""" 299 if self.hastypecomment(typecomment) != present: 300 if present: 301 self.typecomments.append("#, %s\n" % typecomment) 302 else: 303 # this should handle word boundaries properly ... 304 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 305 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
306
307 - def istranslated(self):
308 return super(pounit, self).istranslated() and not self.isobsolete()
309
310 - def istranslatable(self):
311 return not (self.isheader() or self.isblank() or self.isobsolete())
312
313 - def isfuzzy(self):
314 return self.hastypecomment("fuzzy")
315
316 - def _domarkfuzzy(self, present=True):
317 self.settypecomment("fuzzy", present)
318
319 - def makeobsolete(self):
320 """Makes this unit obsolete""" 321 self.sourcecomments = [] 322 self.automaticcomments = [] 323 super(pounit, self).makeobsolete()
324
325 - def hasplural(self):
326 """returns whether this pounit contains plural strings...""" 327 source = self.source 328 return isinstance(source, multistring) and len(source.strings) > 1
329
330 - def parse(self, src):
331 raise DeprecationWarning("Should not be parsing with a unit") 332 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
333
334 - def __str__(self):
335 """convert to a string. double check that unicode is handled somehow here""" 336 _cpo_unit = cpo.pounit.buildfromunit(self) 337 return str(_cpo_unit)
338
339 - def getlocations(self):
340 """Get a list of locations from sourcecomments in the PO unit 341 342 rtype: List 343 return: A list of the locations with '#: ' stripped 344 345 """ 346 #TODO: rename to .locations 347 return [pocommon.unquote_plus(loc) for loc in self.sourcecomments]
348
349 - def addlocation(self, location):
350 """Add a location to sourcecomments in the PO unit 351 352 @param location: Text location e.g. 'file.c:23' does not include #: 353 @type location: String 354 """ 355 if location.find(" ") != -1: 356 location = pocommon.quote_plus(location) 357 self.sourcecomments.extend(location.split())
358
359 - def _extract_msgidcomments(self, text=None):
360 """Extract KDE style msgid comments from the unit. 361 362 @rtype: String 363 @return: Returns the extracted msgidcomments found in this unit's msgid. 364 """ 365 if text: 366 return pocommon.extract_msgid_comment(text) 367 else: 368 return self.msgidcomment
369
370 - def getcontext(self):
371 """Get the message context.""" 372 return self._msgctxt + self.msgidcomment
373
374 - def setcontext(self, context):
375 context = data.forceunicode(context or u"") 376 self._msgctxt = context
377
378 - def getid(self):
379 """Returns a unique identifier for this unit.""" 380 context = self.getcontext() 381 # Gettext does not consider the plural to determine duplicates, only 382 # the msgid. For generation of .mo files, we might want to use this 383 # code to generate the entry for the hash table, but for now, it is 384 # commented out for conformance to gettext. 385 # id = '\0'.join(self.source.strings) 386 id = self.source 387 if self.msgidcomment: 388 id = u"_: %s\n%s" % (context, id) 389 elif context: 390 id = u"%s\04%s" % (context, id) 391 return id
392
393 - def buildfromunit(cls, unit):
394 """Build a native unit from a foreign unit, preserving as much 395 information as possible.""" 396 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 397 return unit.copy() 398 elif isinstance(unit, pocommon.pounit): 399 newunit = cls(unit.source) 400 newunit.target = unit.target 401 #context 402 newunit.msgidcomment = unit._extract_msgidcomments() 403 if not newunit.msgidcomment: 404 newunit._msgctxt = unit.getcontext() 405 406 locations = unit.getlocations() 407 if locations: 408 newunit.addlocations(locations) 409 notes = unit.getnotes("developer") 410 if notes: 411 newunit.addnote(notes, "developer") 412 notes = unit.getnotes("translator") 413 if notes: 414 newunit.addnote(notes, "translator") 415 newunit.markfuzzy(unit.isfuzzy()) 416 if unit.isobsolete(): 417 newunit.makeobsolete() 418 for tc in ['python-format', 'c-format', 'php-format']: 419 if unit.hastypecomment(tc): 420 newunit.settypecomment(tc) 421 break 422 return newunit 423 else: 424 return base.TranslationUnit.buildfromunit(unit)
425 buildfromunit = classmethod(buildfromunit) 426 427
428 -class pofile(pocommon.pofile):
429 """A .po file containing various units""" 430 UnitClass = pounit 431
432 - def changeencoding(self, newencoding):
433 """Deprecated: changes the encoding on the file.""" 434 # This should not be here but in poheader. It also shouldn't mangle the 435 # header itself, but use poheader methods. All users are removed, so 436 # we can deprecate after one release. 437 raise DeprecationWarning 438 439 self._encoding = encodingToUse(newencoding) 440 if not self.units: 441 return 442 header = self.header() 443 if not header or header.isblank(): 444 return 445 charsetline = None 446 headerstr = header.target 447 for line in headerstr.split("\n"): 448 if not ":" in line: 449 continue 450 key, value = line.strip().split(":", 1) 451 if key.strip() != "Content-Type": 452 continue 453 charsetline = line 454 if charsetline is None: 455 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 456 else: 457 charset = re.search("charset=([^ ]*)", charsetline) 458 if charset is None: 459 newcharsetline = charsetline 460 if not newcharsetline.strip().endswith(";"): 461 newcharsetline += ";" 462 newcharsetline += " charset=%s" % self._encoding 463 else: 464 charset = charset.group(1) 465 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 466 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 467 header.target = headerstr
468
469 - def _build_self_from_cpo(self):
470 """Builds up this store from the internal cpo store. 471 472 A user must ensure that self._cpo_store already exists, and that it is 473 deleted afterwards.""" 474 for unit in self._cpo_store.units: 475 self.addunit(self.UnitClass.buildfromunit(unit)) 476 self._encoding = self._cpo_store._encoding
477
478 - def _build_cpo_from_self(self):
479 """Builds the internal cpo store from the data in self. 480 481 A user must ensure that self._cpo_store does not exist, and should 482 delete it after using it.""" 483 self._cpo_store = cpo.pofile(noheader=True) 484 for unit in self.units: 485 if not unit.isblank(): 486 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit, self._encoding)) 487 if not self._cpo_store.header(): 488 #only add a temporary header 489 self._cpo_store.makeheader(charset=self._encoding, encoding="8bit")
490
491 - def parse(self, input):
492 """Parses the given file or file source string.""" 493 try: 494 if hasattr(input, 'name'): 495 self.filename = input.name 496 elif not getattr(self, 'filename', ''): 497 self.filename = '' 498 tmp_header_added = False 499 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 500 # input = basic_header + input 501 # tmp_header_added = True 502 self.units = [] 503 self._cpo_store = cpo.pofile(input, noheader=True) 504 self._build_self_from_cpo() 505 del self._cpo_store 506 if tmp_header_added: 507 self.units = self.units[1:] 508 except Exception, e: 509 raise base.ParseError(e)
510
511 - def removeduplicates(self, duplicatestyle="merge"):
512 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 513 # TODO: can we handle consecutive calls to removeduplicates()? What 514 # about files already containing msgctxt? - test 515 id_dict = {} 516 uniqueunits = [] 517 # TODO: this is using a list as the pos aren't hashable, but this is slow. 518 # probably not used frequently enough to worry about it, though. 519 markedpos = [] 520 521 def addcomment(thepo): 522 thepo.msgidcomment = " ".join(thepo.getlocations()) 523 markedpos.append(thepo)
524 for thepo in self.units: 525 id = thepo.getid() 526 if thepo.isheader() and not thepo.getlocations(): 527 # header msgids shouldn't be merged... 528 uniqueunits.append(thepo) 529 elif id in id_dict: 530 if duplicatestyle == "merge": 531 if id: 532 id_dict[id].merge(thepo) 533 else: 534 addcomment(thepo) 535 uniqueunits.append(thepo) 536 elif duplicatestyle == "msgctxt": 537 origpo = id_dict[id] 538 if origpo not in markedpos: 539 origpo._msgctxt += " ".join(origpo.getlocations()) 540 markedpos.append(thepo) 541 thepo._msgctxt += " ".join(thepo.getlocations()) 542 uniqueunits.append(thepo) 543 else: 544 if not id: 545 if duplicatestyle == "merge": 546 addcomment(thepo) 547 else: 548 thepo._msgctxt += u" ".join(thepo.getlocations()) 549 id_dict[id] = thepo 550 uniqueunits.append(thepo) 551 self.units = uniqueunits
552
553 - def __str__(self):
554 """Convert to a string. double check that unicode is handled somehow here""" 555 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True) 556 try: 557 self._build_cpo_from_self() 558 except UnicodeEncodeError, e: 559 self._encoding = "utf-8" 560 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 561 self._build_cpo_from_self() 562 output = str(self._cpo_store) 563 del self._cpo_store 564 return output
565