Switch i18n scripts to use Pology. Fixes #2539.

Still requires a patch to Pology to keep the source file ordering.

This was SVN commit r15894.
This commit is contained in:
leper 2014-10-26 00:28:35 +00:00
parent cf3e42f6fb
commit ae10d34d99
14 changed files with 148 additions and 1884 deletions

View file

@ -12,6 +12,7 @@
"excludeMasks": ["third_party/**", "tools/**"]
},
"options": {
"format": "c-format",
"keywords": {
"Translate": [1],
"TranslatePlural": [1, 2],

View file

@ -11,6 +11,7 @@
"gui/modmod/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],

View file

@ -11,7 +11,6 @@
"civs/**.json"
],
"options": {
"format": "none",
"keywords": [
"Name",
"Description",
@ -35,6 +34,7 @@
"gui/session/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -54,7 +54,6 @@
"gui/session/**.xml"
],
"options": {
"format": "none",
"keywords": {
"translatableAttribute": {
"locationAttributes": ["id"]
@ -79,6 +78,7 @@
"gui/loading/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -100,7 +100,6 @@
"gui/loading/**.xml"
],
"options": {
"format": "none",
"keywords": {
"translatableAttribute": {
"locationAttributes": ["id"]
@ -115,7 +114,6 @@
"gui/text/quotes.txt"
],
"options": {
"format": "none"
}
}
]
@ -132,6 +130,7 @@
"gui/lobby/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -151,7 +150,6 @@
"gui/lobby/**.xml"
],
"options": {
"format": "none",
"keywords": {
"translatableAttribute": {
"locationAttributes": ["id"]
@ -167,7 +165,6 @@
"gui/lobby/Terms_of_Use.txt"
],
"options": {
"format": "none"
}
}
]
@ -184,6 +181,7 @@
"gui/manual/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -203,7 +201,6 @@
"gui/manual/**.xml"
],
"options": {
"format": "none",
"keywords": {
"translatableAttribute": {
"locationAttributes": ["id"]
@ -219,7 +216,6 @@
"gui/manual/userreport.txt"
],
"options": {
"format": "none"
}
}
]
@ -245,6 +241,7 @@
"gui/summary/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -273,7 +270,6 @@
"gui/summary/**.xml"
],
"options": {
"format": "none",
"keywords": {
"translatableAttribute": {
"locationAttributes": ["id"]
@ -289,7 +285,6 @@
"gui/text/tips/**.txt"
],
"options": {
"format": "none"
}
}
]
@ -307,7 +302,6 @@
"simulation/templates/units/**.xml"
],
"options": {
"format": "none",
"keywords": {
"GenericName": {},
"SpecificName": {},
@ -342,7 +336,6 @@
"simulation/templates/structures/**.xml"
],
"options": {
"format": "none",
"keywords": {
"GenericName": {},
"SpecificName": {},
@ -384,7 +377,6 @@
]
},
"options": {
"format": "none",
"keywords": {
"GenericName": {},
"SpecificName": {},
@ -418,7 +410,6 @@
"simulation/data/technologies/**.json"
],
"options": {
"format": "none",
"keywords": [
"specificName",
"genericName",
@ -443,6 +434,7 @@
"simulation/components/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"translate": [1],
"translatePlural": [1, 2],
@ -463,7 +455,6 @@
"simulation/data/player_defaults.json"
],
"options": {
"format": "none",
"keywords": [
"Name"
]
@ -475,7 +466,6 @@
"simulation/data/map_sizes.json"
],
"options": {
"format": "none",
"keywords": [
"Name",
"LongName"
@ -488,7 +478,6 @@
"simulation/ai/**.json"
],
"options": {
"format": "none",
"keywords": [
"name",
"description"
@ -509,7 +498,6 @@
"maps/random/**.json"
],
"options": {
"format": "none",
"keywords": [
"Name",
"Description"
@ -524,6 +512,7 @@
"maps/random/**.js"
],
"options": {
"format": "javascript-format",
"keywords": {
"markForTranslation": [1],
"markForTranslationWithContext": [[1], 2]
@ -540,7 +529,6 @@
"maps/skirmishes/**.xml"
],
"options": {
"format": "none",
"keywords": {
"ScriptSettings": {
"extractJson": {

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
#!/usr/bin/env python2
# -*- coding:utf-8 -*-
#
# Copyright (C) 2013 Wildfire Games.
# Copyright (C) 2014 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
@ -21,9 +21,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import codecs, os, re, sys
from potter.catalog import Catalog, Message
from potter.extract import getExtractorInstance
from potter.pofile import read_po, write_po
from pology.catalog import Catalog
from pology.message import Message
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
@ -31,50 +30,12 @@ projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardi
l10nFolderName = "l10n"
#def getAverageExpansionForEnglishString(string):
#"""
#Based on http://www.w3.org/International/articles/article-text-size.en
#"""
#length = len(string)
#if len <= 10:
#return length*3 # 200300%
#if len <= 20:
#return length*2 # 180200%
#if len <= 30:
#return length*1.8 # 160180%
#if len <= 50:
#return length*1.6 # 140160%
#if len <= 70:
#return length*1.7 # 151-170%
#return length*1.3 # 130%
#def enlarge(string, surroundWithSpaces):
#halfExpansion = int(getAverageExpansionForEnglishString(string)/2)
#if surroundWithSpaces: halfExpansion -= 1
#outputString = "x"*halfExpansion
#if surroundWithSpaces:
#outputString += " "
#outputString += string
#if surroundWithSpaces:
#outputString += " "
#outputString += "x"*halfExpansion
#return outputString
def checkTranslationsForSpam(inputFilePath):
with codecs.open(inputFilePath, 'r', 'utf-8') as fileObject:
templateCatalog = read_po(fileObject)
print(u"Checking", inputFilePath)
templateCatalog = Catalog(inputFilePath)
longStringCatalog = Catalog()
# If language codes were specified on the command line, filder by those.
# If language codes were specified on the command line, filter by those.
filters = sys.argv[1:]
# Load existing translation catalogs.
@ -88,30 +49,29 @@ def checkTranslationsForSpam(inputFilePath):
for filename in os.listdir(l10nFolderPath):
if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
if not filters or filename[:-charactersToSkip] in filters:
with codecs.open(os.path.join(l10nFolderPath, filename), 'r', 'utf-8') as fileObject:
existingTranslationCatalogs.append([filename[:-charactersToSkip], read_po(fileObject)])
if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)])
urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")
# Check the URLs in translations against the URLs in the translation template.
for languageCode, translationCatalog in existingTranslationCatalogs:
for languageCode, pofile in existingTranslationCatalogs:
translationCatalog = Catalog(pofile)
for templateMessage in templateCatalog:
translationMessage = translationCatalog.get(templateMessage.id, templateMessage.context)
translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid)
if translationMessage:
if templateMessage.pluralizable:
templateSingularString, templatePluralString = templateMessage.id
templateUrls = urlPattern.findall(templateSingularString) # We assume that the same URL is used in both the plural and singular forms.
for translationString in translationMessage.string:
translationUrls = urlPattern.findall(translationString)
for translationUrl in translationUrls:
if translationUrl not in templateUrls:
print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format(
languageCode,
translationUrl,
u", ".join(templateUrls)))
else:
templateUrls = urlPattern.findall(templateMessage.id)
translationUrls = urlPattern.findall(translationMessage.string)
templateSingularString = templateMessage.msgid
templateUrls = urlPattern.findall(templateMessage.msgid)
# Assert that the same URL is used in both the plural and singular forms.
if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1:
pluralUrls = urlPattern.findall(templateMessage.msgstr[0])
for url in pluralUrls:
if url not in templateUrls:
print(u"Different URLs in singular and plural source strings for {} in {}".format(
templateMessage.msgid,
inputFilePath))
for translationString in translationMessage[0].msgstr:
translationUrls = urlPattern.findall(translationString)
for translationUrl in translationUrls:
if translationUrl not in templateUrls:
print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format(

View file

@ -1,6 +1,6 @@
# -*- coding:utf-8 -*-
#
# Copyright (C) 2013 Wildfire Games
# Copyright (C) 2014 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
@ -19,27 +19,35 @@
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
from __future__ import absolute_import, division, print_function, unicode_literals
import codecs, os, sys
import codecs, re, os, sys
import json as jsonParser
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
from textwrap import dedent
from potter.util import parse_encoding, pathmatch, relpath
def pathmatch(mask, path):
""" Matches paths to a mask, where the mask supports * and **.
try:
stringType = unicode
except:
stringType = str
Paths use / as the separator
* matches a sequence of characters without /.
** matches a sequence of characters without / followed by a / and
sequence of characters without /
:return: true iff path matches the mask, false otherwise
"""
s = re.split(r"([*][*]?)", mask)
p = ""
for i in xrange(len(s)):
if i % 2 != 0:
p = p + "[^/]+"
if len(s[i]) == 2:
p = p + "(/[^/]+)*"
else:
p = p + re.escape(s[i])
p = p + "$"
return re.match(p, path) != None
class Extractor(object):
@ -60,7 +68,7 @@ class Extractor(object):
def run(self):
""" Extracts messages.
:return: An iterator over ``(message, context, location, comment)`` tuples.
:return: An iterator over ``(message, plural, context, (location, pos), comment)`` tuples.
:rtype: ``iterator``
"""
directoryAbsolutePath = os.path.abspath(self.directoryPath)
@ -71,7 +79,7 @@ class Extractor(object):
folders.sort()
filenames.sort()
for filename in filenames:
filename = relpath(os.path.join(root, filename).replace(os.sep, '/'), self.directoryPath)
filename = os.path.relpath(os.path.join(root, filename).replace(os.sep, '/'), self.directoryPath)
for filemask in self.excludeMasks:
if pathmatch(filemask, filename):
break
@ -79,14 +87,14 @@ class Extractor(object):
for filemask in self.includeMasks:
if pathmatch(filemask, filename):
filepath = os.path.join(directoryAbsolutePath, filename)
for message, context, position, comments in self.extractFromFile(filepath):
yield message, context, filename + ":" + str(position), comments
for message, plural, context, breadcrumb, position, comments in self.extractFromFile(filepath):
yield message, plural, context, (filename + (":"+breadcrumb if breadcrumb else ""), position), comments
def extractFromFile(self, filepath):
""" Extracts messages from a specific file.
:return: An iterator over ``(message, context, position, comments)`` tuples.
:return: An iterator over ``(message, plural, context, position, comments)`` tuples.
:rtype: ``iterator``
"""
pass
@ -102,7 +110,7 @@ class javascript(Extractor):
def extractJavascriptFromFile(self, fileObject):
from potter.jslexer import tokenize, unquote_string
from extractors.jslexer import tokenize, unquote_string
funcname = message_lineno = None
messages = []
last_argument = None
@ -257,10 +265,12 @@ class javascript(Extractor):
continue
messages = tuple(msgs)
if len(messages) == 1:
messages = messages[0]
message = messages[0]
plural = None
if len(messages) == 2:
plural = messages[1]
yield messages, context, lineno, comments
yield message, plural, context, None, lineno, comments
@ -281,7 +291,7 @@ class txt(Extractor):
for line in [line.strip("\n\r") for line in fileObject.readlines()]:
lineCount += 1
if line:
yield line, None, str(lineCount), []
yield line, None, None, None, lineCount, []
@ -311,7 +321,7 @@ class json(Extractor):
def extractFromFile(self, filepath):
with codecs.open(filepath, "r", 'utf-8') as fileObject:
for message, breadcrumbs in self.extractFromString(fileObject.read()):
yield message, None, self.formatBreadcrumbs(breadcrumbs), []
yield message, None, None, self.formatBreadcrumbs(breadcrumbs), -1, []
def extractFromString(self, string):
self.breadcrumbs = []
@ -344,7 +354,7 @@ class json(Extractor):
for keyword in dictionary:
self.breadcrumbs.append(keyword)
if keyword in self.keywords:
if isinstance(dictionary[keyword], stringType):
if isinstance(dictionary[keyword], unicode):
yield dictionary[keyword], self.breadcrumbs
elif isinstance(dictionary[keyword], list):
for message, breadcrumbs in self.extractList(dictionary[keyword]):
@ -364,7 +374,7 @@ class json(Extractor):
index = 0
for listItem in itemsList:
self.breadcrumbs.append(index)
if isinstance(listItem, stringType):
if isinstance(listItem, unicode):
yield listItem, self.breadcrumbs
del self.breadcrumbs[-1]
index += 1
@ -372,7 +382,7 @@ class json(Extractor):
def extractDictionary(self, dictionary):
for keyword in dictionary:
self.breadcrumbs.append(keyword)
if isinstance(dictionary[keyword], stringType):
if isinstance(dictionary[keyword], unicode):
yield dictionary[keyword], self.breadcrumbs
del self.breadcrumbs[-1]
@ -398,7 +408,7 @@ class xml(Extractor):
xmlDocument = etree.parse(fileObject)
for keyword in self.keywords:
for element in xmlDocument.iter(keyword):
position = str(element.sourceline)
position = element.sourceline
if element.text is not None:
context = None
comments = []
@ -406,15 +416,16 @@ class xml(Extractor):
jsonExtractor = self.getJsonExtractor()
jsonExtractor.setOptions(self.keywords[keyword]["extractJson"])
for message, breadcrumbs in jsonExtractor.extractFromString(element.text):
yield message, context, position + ":" + json.formatBreadcrumbs(breadcrumbs), comments
yield message, None, context, json.formatBreadcrumbs(breadcrumbs), position, comments
else:
breadcrumb = None
if "locationAttributes" in self.keywords[keyword]:
attributes = [element.get(attribute) for attribute in self.keywords[keyword]["locationAttributes"] if attribute in element.attrib]
position += " ({attributes})".format(attributes=", ".join(attributes))
breadcrumb = "({attributes})".format(attributes=", ".join(attributes))
if "tagAsContext" in self.keywords[keyword]:
context = keyword
if "context" in element.attrib:
context = element.get("context")
context = unicode(element.get("context"))
if "comment" in element.attrib:
comment = element.get("comment")
comment = u" ".join(comment.split()) # Remove tabs, line breaks and unecessary spaces.
@ -423,9 +434,9 @@ class xml(Extractor):
for splitText in element.text.split():
# split on whitespace is used for token lists, there, a leading '-' means the token has to be removed, so it's not to be processed here either
if splitText[0] != "-":
yield splitText, context, position, comments
yield unicode(splitText), None, context, breadcrumb, position, comments
else:
yield element.text, context, position, comments
yield unicode(element.text), None, context, breadcrumb, position, comments
# Hack from http://stackoverflow.com/a/2819788
@ -459,4 +470,4 @@ class ini(Extractor):
context = None
position = " ({})".format(keyword)
comments = []
yield message, context, position, comments
yield message, None, context, None, position, comments

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2008-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# Copyright (C) 2013-2014 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
@ -25,7 +25,7 @@
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/extractors/jslexer.py
"""A simple JavaScript 1.5 lexer which is used for the JavaScript
extractor.

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
#!/usr/bin/env python2
# -*- coding:utf-8 -*-
#
# Copyright (C) 2013 Wildfire Games.
# Copyright (C) 2014 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
@ -21,9 +21,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import codecs, json, os, sys, textwrap
from potter.catalog import Catalog, Message
from potter.extract import getExtractorInstance
from potter.pofile import read_po, write_po
from pology.catalog import Catalog
from pology.message import Message
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
@ -31,57 +30,14 @@ projectRootDirectory = os.path.abspath(os.path.join(l10nToolsDirectory, os.pardi
l10nFolderName = "l10n"
#def getAverageExpansionForEnglishString(string):
#"""
#Based on http://www.w3.org/International/articles/article-text-size.en
#"""
#length = len(string)
#if len <= 10:
#return length*3 # 200300%
#if len <= 20:
#return length*2 # 180200%
#if len <= 30:
#return length*1.8 # 160180%
#if len <= 50:
#return length*1.6 # 140160%
#if len <= 70:
#return length*1.7 # 151-170%
#return length*1.3 # 130%
#def enlarge(string, surroundWithSpaces):
#halfExpansion = int(getAverageExpansionForEnglishString(string)/2)
#if surroundWithSpaces: halfExpansion -= 1
#outputString = "x"*halfExpansion
#if surroundWithSpaces:
#outputString += " "
#outputString += string
#if surroundWithSpaces:
#outputString += " "
#outputString += "x"*halfExpansion
#return outputString
def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):
with codecs.open(inputFilePath, 'r', 'utf-8') as fileObject:
templateCatalog = read_po(fileObject)
longStringCatalog = Catalog()
templateCatalog = Catalog(inputFilePath)
longStringCatalog = Catalog(outputFilePath, create=True, truncate=True)
# Fill catalog with English strings.
for message in templateCatalog:
if message.pluralizable:
singularString, pluralString = message.id
message.string = (singularString, pluralString)
else:
message.string = message.id
longStringCatalog[message.id] = message
longStringCatalog.add(message)
# If language codes were specified on the command line, filder by those.
filters = sys.argv[1:]
@ -97,44 +53,46 @@ def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):
for filename in os.listdir(l10nFolderPath):
if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
if not filters or filename[:-charactersToSkip] in filters:
with codecs.open(os.path.join(l10nFolderPath, filename), 'r', 'utf-8') as fileObject:
existingTranslationCatalogs.append(read_po(fileObject))
if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
existingTranslationCatalogs.append(os.path.join(l10nFolderPath, filename))
# If any existing translation has more characters than the average expansion, use that instead.
for translationCatalog in existingTranslationCatalogs:
for pofile in existingTranslationCatalogs:
print(u"Merging", pofile)
translationCatalog = Catalog(pofile)
for longStringCatalogMessage in longStringCatalog:
translationMessage = translationCatalog.get(longStringCatalogMessage.id, longStringCatalogMessage.context)
if translationMessage:
if longStringCatalogMessage.pluralizable:
currentSingularString, currentPluralString = longStringCatalogMessage.string
longestSingularString = currentSingularString
longestPluralString = currentPluralString
translationMessage = translationCatalog.select_by_key(longStringCatalogMessage.msgctxt, longStringCatalogMessage.msgid)
if not translationMessage:
continue
candidateSingularString = translationMessage.string[0]
candidatePluralString = "" # There might be between 0 and infinite plural forms.
for candidateString in translationMessage.string[1:]:
if len(candidateString) > len(candidatePluralString): candidatePluralString = candidateString
if not longStringCatalogMessage.msgid_plural:
if len(translationMessage[0].msgstr[0]) > len(longStringCatalogMessage.msgstr[0]):
longStringCatalogMessage.msgstr = translationMessage[0].msgstr
translationMessage = longStringCatalogMessage
continue
changed = False
if len(candidateSingularString) > len(currentSingularString):
longestSingularString = candidateSingularString
changed = True
if len(candidatePluralString) > len(currentPluralString):
longestPluralString = candidatePluralString
changed = True
longestSingularString = translationMessage[0].msgstr[0]
longestPluralString = translationMessage[0].msgstr[1] if len(translationMessage[0].msgstr) > 1 else longestSingularString
if changed:
longStringCatalogMessage.string = (longestSingularString, longestPluralString)
longStringCatalog[longStringCatalogMessage.id] = longStringCatalogMessage
candidateSingularString = longStringCatalogMessage.msgstr[0]
candidatePluralString = "" # There might be between 0 and infinite plural forms.
for candidateString in longStringCatalogMessage.msgstr[1:]:
if len(candidateString) > len(candidatePluralString): candidatePluralString = candidateString
else:
if len(translationMessage.string) > len(longStringCatalogMessage.string):
longStringCatalogMessage.string = translationMessage.string
longStringCatalog[longStringCatalogMessage.id] = longStringCatalogMessage
changed = False
if len(candidateSingularString) > len(longestSingularString):
longestSingularString = candidateSingularString
changed = True
if len(candidatePluralString) > len(longestPluralString):
longestPluralString = candidatePluralString
changed = True
if changed:
longStringCatalogMessage.msgstr = [longestSingularString, longestPluralString]
translationMessage = longStringCatalogMessage
with codecs.open(outputFilePath, 'w', 'utf-8') as fileObject:
write_po(fileObject, longStringCatalog)
longStringCatalog.set_encoding("utf-8")
longStringCatalog.sync()
def main():
@ -145,6 +103,7 @@ def main():
for filename in filenames:
if len(filename) > 4 and filename[-4:] == ".pot" and os.path.basename(root) == "l10n":
foundPots += 1
print(u"Generating", "long." + filename[:-1])
generateLongStringTranslationFromPotIntoPo(os.path.join(root, filename), os.path.join(root, "long." + filename[:-1]))
if foundPots == 0:
print(u"This script did not work because no .pot files were found.")

View file

@ -1,575 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
"""Data structures for message catalogs."""
from __future__ import absolute_import, division, print_function, unicode_literals
from cgi import parse_header
from datetime import datetime, time as time_
from difflib import get_close_matches
from email import message_from_string
from copy import copy
import re
import time
from collections import OrderedDict
from potter.util import distinct, LOCALTZ, UTC, FixedOffsetTimezone
__all__ = ['Message', 'Catalog']
__docformat__ = 'restructuredtext en'
PYTHON_FORMAT = re.compile(r"""(?x)
\%
(?:\(([\w]*)\))?
(
[-#0\ +]?(?:\*|[\d]+)?
(?:\.(?:\*|[\d]+))?
[hlL]?
)
([diouxXeEfFgGcrs%])
""")
C_FORMAT = re.compile(r"""(?x)
\%
(\d+\$)?
([-+ 0#]+)?
(v|\*(\d+\$)?v)?
0*
(\d+|\*(\d+\$)?)?
(\.(\d*|\*(\d+\$)?))?
[hlqLV]?
([%bcdefginopsuxDFOUX])
""")
class Message(object):
"""Representation of a single message in a catalog."""
def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
user_comments=(), previous_id=(), lineno=None, context=None, formatFlag=None):
"""Create the message object.
:param id: the message ID, or a ``(singular, plural)`` tuple for
pluralizable messages
:param string: the translated message string, or a
``(singular, plural)`` tuple for pluralizable messages
:param locations: a sequence of ``(filenname, lineno)`` tuples
:param flags: a set or sequence of flags
:param auto_comments: a sequence of automatic comments for the message
:param user_comments: a sequence of user comments for the message
:param previous_id: the previous message ID, or a ``(singular, plural)``
tuple for pluralizable messages
:param lineno: the line number on which the msgid line was found in the
PO file, if any
:param context: the message context
"""
self.id = id #: The message ID
if not string and self.pluralizable:
string = (u'', u'')
self.string = string #: The message translation
self.locations = list(distinct(locations))
self.flags = set(flags)
if id and formatFlag is None:
formatFlag = self.guessFormatFlag();
if formatFlag:
self.flags.add(formatFlag)
self.auto_comments = list(distinct(auto_comments))
self.user_comments = list(distinct(user_comments))
if isinstance(previous_id, str):
self.previous_id = [previous_id]
else:
self.previous_id = list(previous_id)
self.lineno = lineno
self.context = context
def __repr__(self):
return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
list(self.flags))
def __cmp__(self, obj):
"""Compare Messages, taking into account plural ids"""
def values_to_compare():
if isinstance(obj, Message):
plural = self.pluralizable
obj_plural = obj.pluralizable
if plural and obj_plural:
return self.id[0], obj.id[0]
elif plural:
return self.id[0], obj.id
elif obj_plural:
return self.id, obj.id[0]
return self.id, obj.id
this, other = values_to_compare()
return cmp(this, other)
def __gt__(self, other):
return self.__cmp__(other) > 0
def __lt__(self, other):
return self.__cmp__(other) < 0
def __ge__(self, other):
return self.__cmp__(other) >= 0
def __le__(self, other):
return self.__cmp__(other) <= 0
def __eq__(self, other):
return self.__cmp__(other) == 0
def __ne__(self, other):
return self.__cmp__(other) != 0
def clone(self):
return Message(*map(copy, (self.id, self.string, self.locations,
self.flags, self.auto_comments,
self.user_comments, self.previous_id,
self.lineno, self.context)))
@property
def pluralizable(self):
"""Whether the message is plurizable.
>>> Message('foo').pluralizable
False
>>> Message(('foo', 'bar')).pluralizable
True
:type: `bool`"""
return isinstance(self.id, (list, tuple))
def guessFormatFlag(self):
""" If the message contains parameters, this function returns a string with the flag that represents the format
of those parameters.
:type: `string`"""
ids = self.id
if not isinstance(ids, (list, tuple)):
ids = [ids]
for id in ids:
if C_FORMAT.search(id) is not None:
return "c-format"
for id in ids:
if PYTHON_FORMAT.search(id) is not None:
return "python-format"
return None
DEFAULT_HEADER = u"""\
# Translation template for PROJECT.
# Copyright © YEAR ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
#"""
class Catalog(object):
"""Representation of a message catalog."""
def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
project=None, version=None, copyright_holder=None,
msgid_bugs_address=None, creation_date=None,
revision_date=None, charset='utf-8'):
"""Initialize the catalog object.
:param domain: the message domain
:param header_comment: the header comment as string, or `None` for the
default header
:param project: the project's name
:param version: the project's version
:param copyright_holder: the copyright holder of the catalog
:param msgid_bugs_address: the email address or URL to submit bug
reports to
:param creation_date: the date the catalog was created
:param revision_date: the date the catalog was revised
:param charset: the encoding to use in the output
"""
self.domain = domain #: The message domain
self._header_comment = header_comment
self._messages = OrderedDict()
self.project = project or 'PROJECT' #: The project name
self.version = version #: The project version
self.copyright_holder = copyright_holder or 'ORGANIZATION'
self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
self.charset = charset or 'utf-8'
if creation_date is None:
creation_date = datetime.now(LOCALTZ)
elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
creation_date = creation_date.replace(tzinfo=LOCALTZ)
self.creation_date = creation_date #: Creation date of the template
if revision_date is None:
revision_date = 'YEAR-MO-DA HO:MI+ZONE'
elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
revision_date = revision_date.replace(tzinfo=LOCALTZ)
self.revision_date = revision_date #: Last revision date of the catalog
self.obsolete = OrderedDict() #: Dictionary of obsolete messages
self._num_plurals = None
self._plural_expr = None
def _get_header_comment(self):
comment = self._header_comment
year = datetime.now(LOCALTZ).strftime('%Y')
if hasattr(self.revision_date, 'strftime'):
year = self.revision_date.strftime('%Y')
comment = comment.replace('PROJECT', self.project) \
.replace('YEAR', year) \
.replace('ORGANIZATION', self.copyright_holder)
return comment
def _set_header_comment(self, string):
self._header_comment = string
header_comment = property(_get_header_comment, _set_header_comment, doc="""\
The header comment for the catalog.
>>> catalog = Catalog(project='Foobar', version='1.0',
... copyright_holder='Foo Company')
>>> print catalog.header_comment #doctest: +ELLIPSIS
# Translations template for Foobar.
# Copyright (C) ... Foo Company
# This file is distributed under the same license as the Foobar project.
# FIRST AUTHOR <EMAIL@ADDRESS>, ....
#
The header can also be set from a string. Any known upper-case variables
will be replaced when the header is retrieved again:
>>> catalog = Catalog(project='Foobar', version='1.0',
... copyright_holder='Foo Company')
>>> catalog.header_comment = '''\\
... # The POT for my really cool PROJECT project.
... # Copyright (C) 1990-2003 ORGANIZATION
... # This file is distributed under the same license as the PROJECT
... # project.
... #'''
>>> print catalog.header_comment
# The POT for my really cool Foobar project.
# Copyright (C) 1990-2003 Foo Company
# This file is distributed under the same license as the Foobar
# project.
#
:type: `unicode`
""")
def _get_mime_headers(self):
headers = []
projectIdVersion = self.project
if self.version:
projectIdVersion += " " + self.version
headers.append(('Project-Id-Version', projectIdVersion))
headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
headers.append(('POT-Creation-Date', self.creation_date.strftime('%Y-%m-%d %H:%M%z')))
if isinstance(self.revision_date, (datetime, time_, int, float)):
headers.append(('PO-Revision-Date', self.revision_date.strftime('%Y-%m-%d %H:%M%z')))
else:
headers.append(('PO-Revision-Date', self.revision_date))
headers.append(('MIME-Version', '1.0'))
headers.append(('Content-Type',
'text/plain; charset=%s' % self.charset))
headers.append(('Content-Transfer-Encoding', '8bit'))
headers.append(('Generated-By', 'Potter 1.0\n'))
return headers
def _set_mime_headers(self, headers):
for name, value in headers:
name = name.lower()
if name == 'project-id-version':
parts = value.split(' ')
self.project = u' '.join(parts[:-1])
self.version = parts[-1]
elif name == 'report-msgid-bugs-to':
self.msgid_bugs_address = value
elif name == 'content-type':
mimetype, params = parse_header(value)
if 'charset' in params:
self.charset = params['charset'].lower()
elif name == 'plural-forms':
_, params = parse_header(' ;' + value)
try:
self._num_plurals = int(params.get('nplurals', 2))
except ValueError:
self._num_plurals = 2
self._plural_expr = params.get('plural', '(n != 1)')
elif name == 'pot-creation-date':
# FIXME: this should use dates.parse_datetime as soon as that
# is ready
value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
tt = time.strptime(value, '%Y-%m-%d %H:%M')
ts = time.mktime(tt)
# Separate the offset into a sign component, hours, and minutes
plus_minus_s, rest = tzoffset[0], tzoffset[1:]
hours_offset_s, mins_offset_s = rest[:2], rest[2:]
# Make them all integers
plus_minus = int(plus_minus_s + '1')
hours_offset = int(hours_offset_s)
mins_offset = int(mins_offset_s)
# Calculate net offset
net_mins_offset = hours_offset * 60
net_mins_offset += mins_offset
net_mins_offset *= plus_minus
# Create an offset object
tzoffset = FixedOffsetTimezone(net_mins_offset)
# Store the offset in a datetime object
dt = datetime.fromtimestamp(ts)
self.creation_date = dt.replace(tzinfo=tzoffset)
elif name == 'po-revision-date':
# Keep the value if it's not the default one
if 'YEAR' not in value:
# FIXME: this should use dates.parse_datetime as soon as
# that is ready
value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
tt = time.strptime(value, '%Y-%m-%d %H:%M')
ts = time.mktime(tt)
# Separate the offset into a sign component, hours, and
# minutes
plus_minus_s, rest = tzoffset[0], tzoffset[1:]
hours_offset_s, mins_offset_s = rest[:2], rest[2:]
# Make them all integers
plus_minus = int(plus_minus_s + '1')
hours_offset = int(hours_offset_s)
mins_offset = int(mins_offset_s)
# Calculate net offset
net_mins_offset = hours_offset * 60
net_mins_offset += mins_offset
net_mins_offset *= plus_minus
# Create an offset object
tzoffset = FixedOffsetTimezone(net_mins_offset)
# Store the offset in a datetime object
dt = datetime.fromtimestamp(ts)
self.revision_date = dt.replace(tzinfo=tzoffset)
mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
The MIME headers of the catalog, used for the special ``msgid ""`` entry.
Here's an example of the output for such a catalog template:
>>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
>>> catalog = Catalog(project='Foobar', version='1.0',
... creation_date=created)
>>> for name, value in catalog.mime_headers:
... print '%s: %s' % (name, value)
Project-Id-Version: Foobar 1.0
Report-Msgid-Bugs-To: EMAIL@ADDRESS
POT-Creation-Date: 1990-04-01 15:30+0000
PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
Generated-By: Potter ...
:type: `list`
""")
def __contains__(self, id):
"""Return whether the catalog has a message with the specified ID."""
return self._key_for(id) in self._messages
def __len__(self):
"""The number of messages in the catalog.
This does not include the special ``msgid ""`` entry."""
return len(self._messages)
def __iter__(self):
"""Iterates through all the entries in the catalog, in the order they
were added, yielding a `Message` object for every entry.
:rtype: ``iterator``"""
buf = []
for name, value in self.mime_headers:
buf.append('%s: %s' % (name, value))
yield Message(u'', '\n'.join(buf), flags=set())
for key in self._messages:
yield self._messages[key]
def __repr__(self):
return '<%s %r>' % (type(self).__name__, self.domain)
def __delitem__(self, id):
"""Delete the message with the specified ID."""
self.delete(id)
def __getitem__(self, id):
"""Return the message with the specified ID.
:param id: the message ID
:return: the message with the specified ID, or `None` if no such
message is in the catalog
:rtype: `Message`
"""
return self.get(id)
def __setitem__(self, id, message):
"""Add or update the message with the specified ID.
>>> catalog = Catalog()
>>> catalog[u'foo'] = Message(u'foo')
>>> catalog[u'foo']
<Message u'foo' (flags: [])>
If a message with that ID is already in the catalog, it is updated
to include the locations and flags of the new message.
>>> catalog = Catalog()
>>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
>>> catalog[u'foo'].locations
[('main.py', 1)]
>>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
>>> catalog[u'foo'].locations
[('main.py', 1), ('utils.py', 5)]
:param id: the message ID
:param message: the `Message` object
"""
assert isinstance(message, Message), 'expected a Message object'
key = self._key_for(id, message.context)
current = self._messages.get(key)
if current:
if message.pluralizable and not current.pluralizable:
# The new message adds pluralization
current.id = message.id
current.string = message.string
current.locations = list(distinct(current.locations + message.locations))
current.auto_comments = list(distinct(current.auto_comments + message.auto_comments))
current.user_comments = list(distinct(current.user_comments + message.user_comments))
current.flags |= message.flags
message = current
elif id == '':
# special treatment for the header message
def _parse_header(header_string):
# message_from_string only works for str, not for unicode
headers = message_from_string(header_string.encode('utf8'))
decoded_headers = {}
for name, value in headers.items():
name = name.decode('utf8')
value = value.decode('utf8')
decoded_headers[name] = value
return decoded_headers
self.mime_headers = _parse_header(message.string).items()
self.header_comment = '\n'.join(['# %s' % comment for comment
in message.user_comments])
else:
if isinstance(id, (list, tuple)):
assert isinstance(message.string, (list, tuple)), \
'Expected sequence but got %s' % type(message.string)
self._messages[key] = message
def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
user_comments=(), previous_id=(), lineno=None, context=None, formatFlag=None):
"""Add or update the message with the specified ID.
>>> catalog = Catalog()
>>> catalog.add(u'foo')
<Message ...>
>>> catalog[u'foo']
<Message u'foo' (flags: [])>
This method simply constructs a `Message` object with the given
arguments and invokes `__setitem__` with that object.
:param id: the message ID, or a ``(singular, plural)`` tuple for
pluralizable messages
:param string: the translated message string, or a
``(singular, plural)`` tuple for pluralizable messages
:param locations: a sequence of strings that determine where a message was found
:param flags: a set or sequence of flags
:param auto_comments: a sequence of automatic comments
:param user_comments: a sequence of user comments
:param previous_id: the previous message ID, or a ``(singular, plural)``
tuple for pluralizable messages
:param lineno: the line number on which the msgid line was found in the
PO file, if any
:param context: the message context
:return: the newly added message
:rtype: `Message`
"""
message = Message(id, string, locations, flags, auto_comments,
user_comments, previous_id, lineno=lineno,
context=context, formatFlag=formatFlag)
self[id] = message
return message
def get(self, id, context=None):
"""Return the message with the specified ID and context.
:param id: the message ID
:param context: the message context, or ``None`` for no context
:return: the message with the specified ID, or `None` if no such
message is in the catalog
:rtype: `Message`
"""
return self._messages.get(self._key_for(id, context))
def delete(self, id, context=None):
"""Delete the message with the specified ID and context.
:param id: the message ID
:param context: the message context, or ``None`` for no context
"""
key = self._key_for(id, context)
if key in self._messages:
del self._messages[key]
@property
def num_plurals(self):
if self._num_plurals is not None:
return self._num_plurals
else:
return 2
def _key_for(self, id, context=None):
"""The key for a message is just the singular ID even for pluralizable
messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
messages.
"""
key = id
if isinstance(key, (list, tuple)):
key = id[0]
if context is not None:
key = (key, context)
return key

View file

@ -1,48 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
"""Basic infrastructure for extracting localizable messages from source files.
This module defines an extensible system for collecting localizable message
strings from a variety of sources. A native extractor for Python source files
is builtin, extractors for other sources can be added using very simple plugins.
The main entry points into the extraction functionality are the functions
`extract_from_dir` and `extract_from_file`.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
__all__ = ['getExtractorInstance']
__docformat__ = 'restructuredtext en'
def getExtractorInstance(code, directoryPath, filemasks, options={}):
extractorClass = getattr(__import__("potter.extractors", {}, {}, [code,]), code)
return extractorClass(directoryPath, filemasks, options)

View file

@ -1,236 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
"""Writing of files in the ``gettext`` MO (machine object) format.
:since: version 0.9
:see: `The Format of MO Files
<http://www.gnu.org/software/gettext/manual/gettext.html#MO-Files>`_
"""
import array
import struct
from catalog import Catalog, Message
__all__ = ['read_mo', 'write_mo']
__docformat__ = 'restructuredtext en'
LE_MAGIC = 0x950412deL
BE_MAGIC = 0xde120495L
def read_mo(fileobj):
"""Read a binary MO file from the given file-like object and return a
corresponding `Catalog` object.
:param fileobj: the file-like object to read the MO file from
:return: a catalog object representing the parsed MO file
:rtype: `Catalog`
:note: The implementation of this function is heavily based on the
``GNUTranslations._parse`` method of the ``gettext`` module in the
standard library.
"""
catalog = Catalog()
headers = {}
filename = getattr(fileobj, 'name', '')
buf = fileobj.read()
buflen = len(buf)
unpack = struct.unpack
# Parse the .mo file header, which consists of 5 little endian 32
# bit words.
magic = unpack('<I', buf[:4])[0] # Are we big endian or little endian?
if magic == LE_MAGIC:
version, msgcount, origidx, transidx = unpack('<4I', buf[4:20])
ii = '<II'
elif magic == BE_MAGIC:
version, msgcount, origidx, transidx = unpack('>4I', buf[4:20])
ii = '>II'
else:
raise IOError(0, 'Bad magic number', filename)
# Now put all messages from the .mo file buffer into the catalog
# dictionary
for i in xrange(0, msgcount):
mlen, moff = unpack(ii, buf[origidx:origidx + 8])
mend = moff + mlen
tlen, toff = unpack(ii, buf[transidx:transidx + 8])
tend = toff + tlen
if mend < buflen and tend < buflen:
msg = buf[moff:mend]
tmsg = buf[toff:tend]
else:
raise IOError(0, 'File is corrupt', filename)
# See if we're looking at GNU .mo conventions for metadata
if mlen == 0:
# Catalog description
lastkey = key = None
for item in tmsg.splitlines():
item = item.strip()
if not item:
continue
if ':' in item:
key, value = item.split(':', 1)
lastkey = key = key.strip().lower()
headers[key] = value.strip()
elif lastkey:
headers[lastkey] += '\n' + item
if '\x04' in msg: # context
ctxt, msg = msg.split('\x04')
else:
ctxt = None
if '\x00' in msg: # plural forms
msg = msg.split('\x00')
tmsg = tmsg.split('\x00')
if catalog.charset:
msg = [x.decode(catalog.charset) for x in msg]
tmsg = [x.decode(catalog.charset) for x in tmsg]
else:
if catalog.charset:
msg = msg.decode(catalog.charset)
tmsg = tmsg.decode(catalog.charset)
catalog[msg] = Message(msg, tmsg, context=ctxt)
# advance to next entry in the seek tables
origidx += 8
transidx += 8
catalog.mime_headers = headers.items()
return catalog
def write_mo(fileobj, catalog, use_fuzzy=False):
"""Write a catalog to the specified file-like object using the GNU MO file
format.
>>> from babel.messages import Catalog
>>> from gettext import GNUTranslations
>>> from StringIO import StringIO
>>> catalog = Catalog(locale='en_US')
>>> catalog.add('foo', 'Voh')
<Message ...>
>>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
<Message ...>
>>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
<Message ...>
>>> catalog.add('Fizz', '')
<Message ...>
>>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
<Message ...>
>>> buf = StringIO()
>>> write_mo(buf, catalog)
>>> buf.seek(0)
>>> translations = GNUTranslations(fp=buf)
>>> translations.ugettext('foo')
u'Voh'
>>> translations.ungettext('bar', 'baz', 1)
u'Bahr'
>>> translations.ungettext('bar', 'baz', 2)
u'Batz'
>>> translations.ugettext('fuz')
u'fuz'
>>> translations.ugettext('Fizz')
u'Fizz'
>>> translations.ugettext('Fuzz')
u'Fuzz'
>>> translations.ugettext('Fuzzes')
u'Fuzzes'
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param use_fuzzy: whether translations marked as "fuzzy" should be included
in the output
"""
messages = list(catalog)
if not use_fuzzy:
messages[1:] = [m for m in messages[1:] if not m.fuzzy]
messages.sort()
ids = strs = ''
offsets = []
for message in messages:
# For each string, we need size and file offset. Each string is NUL
# terminated; the NUL does not count into the size.
if message.pluralizable:
msgid = '\x00'.join([
msgid.encode(catalog.charset) for msgid in message.id
])
msgstrs = []
for idx, string in enumerate(message.string):
if not string:
msgstrs.append(message.id[min(int(idx), 1)])
else:
msgstrs.append(string)
msgstr = '\x00'.join([
msgstr.encode(catalog.charset) for msgstr in msgstrs
])
else:
msgid = message.id.encode(catalog.charset)
if not message.string:
msgstr = message.id.encode(catalog.charset)
else:
msgstr = message.string.encode(catalog.charset)
if message.context:
msgid = '\x04'.join([message.context.encode(catalog.charset),
msgid])
offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
ids += msgid + '\x00'
strs += msgstr + '\x00'
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
# the keys start right after the index tables.
keystart = 7 * 4 + 16 * len(messages)
valuestart = keystart + len(ids)
# The string table first has the list of keys, then the list of values.
# Each entry has first the size of the string, then the file offset.
koffsets = []
voffsets = []
for o1, l1, o2, l2 in offsets:
koffsets += [l1, o1 + keystart]
voffsets += [l2, o2 + valuestart]
offsets = koffsets + voffsets
fileobj.write(struct.pack('Iiiiiii',
LE_MAGIC, # magic
0, # version
len(messages), # number of entries
7 * 4, # start of key index
7 * 4 + len(messages) * 8, # start of value index
0, 0 # size and offset of hash table
) + array.array("i", offsets).tostring() + ids + strs)

View file

@ -1,506 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
"""Reading and writing of files in the ``gettext`` PO (portable object)
format.
:see: `The Format of PO Files
<http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
"""
from __future__ import absolute_import, division, print_function, unicode_literals
from datetime import datetime
import os
import re
from potter.util import wraptext
from potter.catalog import Catalog, Message
__all__ = ['read_po', 'write_po']
__docformat__ = 'restructuredtext en'
def unescape(string):
r"""Reverse `escape` the given string.
>>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
Say:
"hello, world!"
<BLANKLINE>
:param string: the string to unescape
"""
def replace_escapes(match):
m = match.group(1)
if m == 'n':
return '\n'
elif m == 't':
return '\t'
elif m == 'r':
return '\r'
# m is \ or "
return m
return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
def denormalize(string):
r"""Reverse the normalization done by the `normalize` function.
>>> print denormalize(r'''""
... "Say:\n"
... " \"hello, world!\"\n"''')
Say:
"hello, world!"
<BLANKLINE>
>>> print denormalize(r'''""
... "Say:\n"
... " \"Lorem ipsum dolor sit "
... "amet, consectetur adipisicing"
... " elit, \"\n"''')
Say:
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
<BLANKLINE>
:param string: the string to denormalize
"""
if '\n' in string:
escaped_lines = string.splitlines()
if string.startswith('""'):
escaped_lines = escaped_lines[1:]
lines = map(unescape, escaped_lines)
return ''.join(lines)
else:
return unescape(string)
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset="utf-8"):
"""Read messages from a ``gettext`` PO (portable object) file from the given
file-like object and return a `Catalog`.
>>> from datetime import datetime
>>> from StringIO import StringIO
>>> buf = StringIO('''
... #: main.py:1
... #, fuzzy, python-format
... msgid "foo %(name)s"
... msgstr "quux %(name)s"
...
... # A user comment
... #. An auto comment
... #: main.py:3
... msgid "bar"
... msgid_plural "baz"
... msgstr[0] "bar"
... msgstr[1] "baaz"
... ''')
>>> catalog = read_po(buf)
>>> catalog.revision_date = datetime(2007, 04, 01)
>>> for message in catalog:
... if message.id:
... print (message.id, message.string)
... print ' ', (message.locations, message.flags)
... print ' ', (message.user_comments, message.auto_comments)
(u'foo %(name)s', u'quux %(name)s')
([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
([], [])
((u'bar', u'baz'), (u'bar', u'baaz'))
([(u'main.py', 3)], set([]))
([u'A user comment'], [u'An auto comment'])
.. versionadded:: 1.0
Added support for explicit charset argument.
:param fileobj: the file-like object to read the PO file from
:param locale: the locale identifier or `Locale` object, or `None`
if the catalog is not bound to a locale (which basically
means it's a template)
:param domain: the message domain
:param ignore_obsolete: whether to ignore obsolete messages in the input
:param charset: the character set of the catalog.
"""
catalog = Catalog(locale=locale, domain=domain, charset=charset)
counter = [0]
offset = [0]
messages = []
translations = []
locations = []
flags = []
user_comments = []
auto_comments = []
obsolete = [False]
context = []
in_msgid = [False]
in_msgstr = [False]
in_msgctxt = [False]
def _add_message():
translations.sort()
if len(messages) > 1:
msgid = tuple([denormalize(m) for m in messages])
else:
msgid = denormalize(messages[0])
if isinstance(msgid, (list, tuple)):
string = []
for idx in range(catalog.num_plurals):
try:
string.append(translations[idx])
except IndexError:
string.append((idx, ''))
string = tuple([denormalize(t[1]) for t in string])
else:
string = denormalize(translations[0][1])
if context:
msgctxt = denormalize('\n'.join(context))
else:
msgctxt = None
message = Message(msgid, string, list(locations), set(flags),
auto_comments, user_comments, lineno=offset[0] + 1,
context=msgctxt)
if obsolete[0]:
if not ignore_obsolete:
catalog.obsolete[msgid] = message
else:
catalog[msgid] = message
del messages[:]; del translations[:]; del context[:]; del locations[:];
del flags[:]; del auto_comments[:]; del user_comments[:];
obsolete[0] = False
counter[0] += 1
def _process_message_line(lineno, line):
if line.startswith('msgid_plural'):
in_msgid[0] = True
msg = line[12:].lstrip()
messages.append(msg)
elif line.startswith('msgid'):
in_msgid[0] = True
offset[0] = lineno
txt = line[5:].lstrip()
if messages:
_add_message()
messages.append(txt)
elif line.startswith('msgstr'):
in_msgid[0] = False
in_msgstr[0] = True
msg = line[6:].lstrip()
if msg.startswith('['):
idx, msg = msg[1:].split(']', 1)
translations.append([int(idx), msg.lstrip()])
else:
translations.append([0, msg])
elif line.startswith('msgctxt'):
if messages:
_add_message()
in_msgid[0] = in_msgstr[0] = False
context.append(line[7:].lstrip())
elif line.startswith('"'):
if in_msgid[0]:
messages[-1] += u'\n' + line.rstrip()
elif in_msgstr[0]:
translations[-1][1] += u'\n' + line.rstrip()
elif in_msgctxt[0]:
context.append(line.rstrip())
for lineno, line in enumerate(fileobj.readlines()):
line = line.strip()
if not isinstance(line, unicode):
line = line.decode(catalog.charset)
if line.startswith('#'):
in_msgid[0] = in_msgstr[0] = False
if messages and translations:
_add_message()
if line[1:].startswith(':'):
for location in line[2:].lstrip().split():
pos = location.rfind(':')
if pos >= 0:
try:
lineno = int(location[pos + 1:])
except ValueError:
continue
locations.append((location[:pos], lineno))
elif line[1:].startswith(','):
for flag in line[2:].lstrip().split(','):
flags.append(flag.strip())
elif line[1:].startswith('~'):
obsolete[0] = True
_process_message_line(lineno, line[2:].lstrip())
elif line[1:].startswith('.'):
# These are called auto-comments
comment = line[2:].strip()
if comment: # Just check that we're not adding empty comments
auto_comments.append(comment)
else:
# These are called user comments
user_comments.append(line[1:].strip())
else:
_process_message_line(lineno, line)
if messages:
_add_message()
# No actual messages found, but there was some info in comments, from which
# we'll construct an empty header message
elif not counter[0] and (flags or user_comments or auto_comments):
messages.append(u'')
translations.append([0, u''])
_add_message()
return catalog
WORD_SEP = re.compile('('
r'\s+|' # any whitespace
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
')')
def escape(string):
r"""Escape the given string so that it can be included in double-quoted
strings in ``PO`` files.
>>> escape('''Say:
... "hello, world!"
... ''')
'"Say:\\n \\"hello, world!\\"\\n"'
:param string: the string to escape
:return: the escaped string
:rtype: `str` or `unicode`
"""
return '"%s"' % string.replace('\\', '\\\\') \
.replace('\t', '\\t') \
.replace('\r', '\\r') \
.replace('\n', '\\n') \
.replace('\"', '\\"')
def normalize(string, prefix='', width=80):
r"""Convert a string into a format that is appropriate for .po files.
>>> print normalize('''Say:
... "hello, world!"
... ''', width=None)
""
"Say:\n"
" \"hello, world!\"\n"
>>> print normalize('''Say:
... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
... ''', width=32)
""
"Say:\n"
" \"Lorem ipsum dolor sit "
"amet, consectetur adipisicing"
" elit, \"\n"
:param string: the string to normalize
:param prefix: a string that should be prepended to every line
:param width: the maximum line width; use `None`, 0, or a negative number
to completely disable line wrapping
:return: the normalized string
:rtype: `unicode`
"""
if width and width > 0:
prefixlen = len(prefix)
lines = []
for line in string.splitlines(True):
if len(escape(line)) + prefixlen > width:
chunks = WORD_SEP.split(line)
chunks.reverse()
while chunks:
buf = []
size = 2
while chunks:
l = len(escape(chunks[-1])) - 2 + prefixlen
if size + l < width:
buf.append(chunks.pop())
size += l
else:
if not buf:
# handle long chunks by putting them on a
# separate line
buf.append(chunks.pop())
break
lines.append(u''.join(buf))
else:
lines.append(line)
else:
lines = string.splitlines(True)
if len(lines) <= 1:
return escape(string)
# Remove empty trailing line
if lines and not lines[-1]:
del lines[-1]
lines[-1] += '\n'
return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
def write_po(fileobj, catalog, width=80, no_location=False, omit_header=False,
sort_output=False, sort_by_file=False, ignore_obsolete=False,
include_previous=False):
r"""Write a ``gettext`` PO (portable object) template file for a given
message catalog to the provided file-like object.
>>> catalog = Catalog()
>>> catalog.add(u'foo %(name)s', locations=['main.py:1',],
... flags=('fuzzy',))
<Message...>
>>> catalog.add((u'bar', u'baz'), locations=['main.py:3',])
<Message...>
>>> from StringIO import StringIO
>>> buf = StringIO()
>>> write_po(buf, catalog, omit_header=True)
>>> print buf.getvalue()
#: main.py:1
#, fuzzy, python-format
msgid "foo %(name)s"
msgstr ""
<BLANKLINE>
#: main.py:3
msgid "bar"
msgid_plural "baz"
msgstr[0] ""
msgstr[1] ""
<BLANKLINE>
<BLANKLINE>
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param width: the maximum line width for the generated output; use `None`,
0, or a negative number to completely disable line wrapping
:param no_location: do not emit a location comment for every message
:param omit_header: do not include the ``msgid ""`` entry at the top of the
output
:param sort_output: whether to sort the messages in the output by msgid
:param sort_by_file: whether to sort the messages in the output by their
locations
:param ignore_obsolete: whether to ignore obsolete messages and not include
them in the output; by default they are included as
comments
:param include_previous: include the old msgid as a comment when
updating the catalog
"""
def _normalize(key, prefix=''):
return normalize(key, prefix=prefix, width=width)
def _write(text):
fileobj.write(text)
def _write_comment(comment, prefix=''):
# xgettext always wraps comments even if --no-wrap is passed;
# provide the same behaviour
if width and width > 0:
_width = width
else:
_width = 80
if isinstance(comment, (tuple, list)):
commentText = str(comment[0])
for piece in comment[1:]:
commentText += ":" + str(piece)
comment = commentText
for line in wraptext(comment, _width):
_write('#%s %s\n' % (prefix, line.strip()))
def _write_message(message, prefix=''):
if isinstance(message.id, (list, tuple)):
if message.context:
_write('%smsgctxt %s\n' % (prefix,
_normalize(message.context, prefix)))
_write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
_write('%smsgid_plural %s\n' % (
prefix, _normalize(message.id[1], prefix)
))
for idx in range(2):
try:
string = message.string[idx]
except IndexError:
string = ''
_write('%smsgstr[%d] %s\n' % (
prefix, idx, _normalize(string, prefix)
))
else:
if message.context:
_write('%smsgctxt %s\n' % (prefix,
_normalize(message.context, prefix)))
_write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
_write('%smsgstr %s\n' % (
prefix, _normalize(message.string or '', prefix)
))
messages = list(catalog)
if sort_output:
messages.sort()
elif sort_by_file:
messages.sort(lambda x,y: cmp(x.locations, y.locations))
for message in messages:
if not message.id: # This is the header "message"
if omit_header:
continue
comment_header = catalog.header_comment
if width and width > 0:
lines = []
for line in comment_header.splitlines():
lines += wraptext(line, width=width,
subsequent_indent='# ')
comment_header = u'\n'.join(lines)
_write(comment_header + u'\n')
for comment in message.user_comments:
_write_comment(comment)
for comment in message.auto_comments:
_write_comment(comment, prefix='.')
if not no_location:
for location in message.locations:
_write_comment(location, prefix=':')
if message.flags:
_write('#%s\n' % ', '.join([''] + list(message.flags)))
if message.previous_id and include_previous:
_write_comment('msgid %s' % _normalize(message.previous_id[0]),
prefix='|')
if len(message.previous_id) > 1:
_write_comment('msgid_plural %s' % _normalize(
message.previous_id[1]
), prefix='|')
_write_message(message)
_write('\n')
if not ignore_obsolete:
for message in catalog.obsolete.values():
for comment in message.user_comments:
_write_comment(comment)
_write_message(message, prefix='#~ ')
_write('\n')

View file

@ -1,300 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2011 Edgewall Software
# Copyright (C) 2013 Wildfire Games
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following
# disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
# The name of the author may not be used to endorse or promote products derived from this software without specific
# prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs:
# • http://babel.edgewall.org/log/trunk/babel/messages
# • http://trac.wildfiregames.com/browser/ps/trunk/source/tools/i18n/potter
"""Various utility classes and functions."""
from __future__ import absolute_import, division, print_function, unicode_literals
import codecs
from datetime import timedelta, tzinfo
import os
import re
import textwrap
import time
try:
from itertools import izip as zip
from itertools import imap as map
except ImportError:
pass # Python 3
missing = object()
__all__ = ['distinct', 'pathmatch', 'relpath', 'wraptext', 'UTC',
'LOCALTZ']
__docformat__ = 'restructuredtext en'
def distinct(iterable):
"""Yield all items in an iterable collection that are distinct.
Unlike when using sets for a similar effect, the original ordering of the
items in the collection is preserved by this function.
>>> print list(distinct([1, 2, 1, 3, 4, 4]))
[1, 2, 3, 4]
>>> print list(distinct('foobar'))
['f', 'o', 'b', 'a', 'r']
:param iterable: the iterable collection providing the data
:return: the distinct items in the collection
:rtype: ``iterator``
"""
seen = set()
for item in iter(iterable):
if item not in seen:
yield item
seen.add(item)
# Regexp to match python magic encoding line
PYTHON_MAGIC_COMMENT_re = re.compile(
r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
def parse_encoding(fp):
"""Deduce the encoding of a source file from magic comment.
It does this in the same way as the `Python interpreter`__
.. __: http://docs.python.org/ref/encodings.html
The ``fp`` argument should be a seekable file object.
(From Jeff Dairiki)
"""
pos = fp.tell()
fp.seek(0)
try:
line1 = fp.readline()
has_bom = line1.startswith(codecs.BOM_UTF8)
if has_bom:
line1 = line1[len(codecs.BOM_UTF8):]
m = PYTHON_MAGIC_COMMENT_re.match(line1)
if not m:
try:
import parser
parser.suite(line1)
except (ImportError, SyntaxError):
# Either it's a real syntax error, in which case the source is
# not valid python source, or line2 is a continuation of line1,
# in which case we don't want to scan line2 for a magic
# comment.
pass
else:
line2 = fp.readline()
m = PYTHON_MAGIC_COMMENT_re.match(line2)
if has_bom:
if m:
raise SyntaxError(
"python refuses to compile code with both a UTF8 "
"byte-order-mark and a magic encoding comment")
return 'utf_8'
elif m:
return m.group(1)
else:
return None
finally:
fp.seek(pos)
def pathmatch(pattern, filename):
"""Extended pathname pattern matching.
This function is similar to what is provided by the ``fnmatch`` module in
the Python standard library, but:
* can match complete (relative or absolute) path names, and not just file
names, and
* also supports a convenience pattern ("**") to match files at any
directory level.
Examples:
>>> pathmatch('**.py', 'bar.py')
True
>>> pathmatch('**.py', 'foo/bar/baz.py')
True
>>> pathmatch('**.py', 'templates/index.html')
False
>>> pathmatch('**/templates/*.html', 'templates/index.html')
True
>>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
False
:param pattern: the glob pattern
:param filename: the path name of the file to match against
:return: `True` if the path name matches the pattern, `False` otherwise
:rtype: `bool`
"""
symbols = {
'?': '[^/]',
'?/': '[^/]/',
'*': '[^/]+',
'*/': '[^/]+/',
'**/': '(?:.+/)*?',
'**': '(?:.+/)*?[^/]+',
}
buf = []
for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
if idx % 2:
buf.append(symbols[part])
elif part:
buf.append(re.escape(part))
match = re.match(''.join(buf) + '$', filename.replace(os.sep, '/'))
return match is not None
class TextWrapper(textwrap.TextWrapper):
wordsep_re = re.compile(
r'(\s+|' # any whitespace
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' # em-dash
)
def wraptext(text, width=70, initial_indent='', subsequent_indent=''):
"""Simple wrapper around the ``textwrap.wrap`` function in the standard
library. This version does not wrap lines on hyphens in words.
:param text: the text to wrap
:param width: the maximum line width
:param initial_indent: string that will be prepended to the first line of
wrapped output
:param subsequent_indent: string that will be prepended to all lines save
the first of wrapped output
:return: a list of lines
:rtype: `list`
"""
wrapper = TextWrapper(width=width, initial_indent=initial_indent,
subsequent_indent=subsequent_indent,
break_long_words=False)
return wrapper.wrap(text)
try:
relpath = os.path.relpath
except AttributeError:
def relpath(path, start='.'):
"""Compute the relative path to one path from another.
>>> relpath('foo/bar.txt', '').replace(os.sep, '/')
'foo/bar.txt'
>>> relpath('foo/bar.txt', 'foo').replace(os.sep, '/')
'bar.txt'
>>> relpath('foo/bar.txt', 'baz').replace(os.sep, '/')
'../foo/bar.txt'
:return: the relative path
:rtype: `basestring`
"""
start_list = os.path.abspath(start).split(os.sep)
path_list = os.path.abspath(path).split(os.sep)
# Work out how much of the filepath is shared by start and path.
i = len(os.path.commonprefix([start_list, path_list]))
rel_list = [os.path.pardir] * (len(start_list) - i) + path_list[i:]
return os.path.join(*rel_list)
ZERO = timedelta(0)
class FixedOffsetTimezone(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name=None):
self._offset = timedelta(minutes=offset)
if name is None:
name = 'Etc/GMT+%d' % offset
self.zone = name
def __str__(self):
return self.zone
def __repr__(self):
return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
def utcoffset(self, dt):
return self._offset
def tzname(self, dt):
return self.zone
def dst(self, dt):
return ZERO
try:
from pytz import UTC
except ImportError:
UTC = FixedOffsetTimezone(0, 'UTC')
"""`tzinfo` object for UTC (Universal Time).
:type: `tzinfo`
"""
STDOFFSET = timedelta(seconds = -time.timezone)
if time.daylight:
DSTOFFSET = timedelta(seconds = -time.altzone)
else:
DSTOFFSET = STDOFFSET
DSTDIFF = DSTOFFSET - STDOFFSET
class LocalTimezone(tzinfo):
def utcoffset(self, dt):
if self._isdst(dt):
return DSTOFFSET
else:
return STDOFFSET
def dst(self, dt):
if self._isdst(dt):
return DSTDIFF
else:
return ZERO
def tzname(self, dt):
return time.tzname[self._isdst(dt)]
def _isdst(self, dt):
tt = (dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
dt.weekday(), 0, -1)
stamp = time.mktime(tt)
tt = time.localtime(stamp)
return tt.tm_isdst > 0
LOCALTZ = LocalTimezone()
"""`tzinfo` object for local time-zone.
:type: `tzinfo`
"""

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python
#!/usr/bin/env python2
# -*- coding:utf-8 -*-
#
# Copyright (C) 2013 Wildfire Games.
# Copyright (C) 2014 Wildfire Games.
# This file is part of 0 A.D.
#
# 0 A.D. is free software: you can redistribute it and/or modify
@ -19,11 +19,13 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import codecs, json, os, textwrap
import codecs, json, string, os, textwrap
from potter.catalog import Catalog, Message
from potter.extract import getExtractorInstance
from potter.pofile import write_po
from pology.catalog import Catalog
from pology.message import Message
from pology.monitored import Monpair, Monlist
from lxml import etree
l10nToolsDirectory = os.path.dirname(os.path.realpath(__file__))
@ -67,7 +69,6 @@ def generateTemplatesForMessagesFile(messagesFilePath):
rootPath = os.path.dirname(messagesFilePath)
for templateSettings in settings:
if "skip" in templateSettings and templateSettings["skip"] == "yes":
continue
@ -75,26 +76,34 @@ def generateTemplatesForMessagesFile(messagesFilePath):
if "inputRoot" in templateSettings:
inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])
template = Catalog()
template.project = templateSettings["project"]
template.copyright_holder = templateSettings["copyrightHolder"]
template = Catalog(os.path.join(rootPath, templateSettings["output"]), create=True, truncate=True)
h = template.update_header(templateSettings["project"], "Translation template for %project.", "Copyright © "+"2014"+" "+templateSettings["copyrightHolder"], "This file is distributed under the same license as the %project project.")
h.remove_field("Report-Msgid-Bugs-To")
h.remove_field("Last-Translator")
h.remove_field("Plural-Forms")
h.remove_field("Language-Team")
h.remove_field("Language")
h.author = Monlist()
for rule in templateSettings["rules"]:
if "skip" in rule and rule["skip"] == "yes":
continue
options = rule.get("options", {})
extractor = getExtractorInstance(rule["extractor"], inputRootPath, rule["filemasks"], options)
for message, context, location, comments in extractor.run():
formatFlag = None
if "format" in options:
formatFlag = options["format"]
template.add(message, context=context, locations=[location], auto_comments=comments, formatFlag=formatFlag)
with codecs.open(os.path.join(rootPath, templateSettings["output"]), 'w', 'utf-8') as fileObject:
write_po(fileObject, template)
extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"])
extractor = extractorClass(inputRootPath, rule["filemasks"], options)
formatFlag = None
if "format" in options:
formatFlag = options["format"]
for message, plural, context, location, comments in extractor.run():
msg = Message({"msgid": message, "msgid_plural": plural, "msgctxt": context, "auto_comment": comments, "flag": [formatFlag] if formatFlag and string.find(message, "%") != -1 else None, "source": [location]})
if template.get(msg):
template.get(msg).source.append(Monpair(location))
else:
template.add(msg)
template.set_encoding("utf-8")
template.sync()
print(u"Generated “{}” with {} messages.".format(templateSettings["output"], len(template)))