mirror of
https://gitea.wildfiregames.com/0ad/0ad
synced 2026-06-18 22:33:56 -07:00
Updates tinygettext to upstream commit 0e34a55e514763a48f8abe7812405ffe722a31f4. No actual code changes from the previously bundled version, but it was relicensed to zlib. Keeps the addition of precompiled.h and win32 dirent. This was SVN commit r16705.
500 lines
12 KiB
C++
500 lines
12 KiB
C++
// tinygettext - A gettext replacement that works directly on .po files
|
|
// Copyright (c) 2009 Ingo Ruhnke <grumbel@gmail.com>
|
|
//
|
|
// This software is provided 'as-is', without any express or implied
|
|
// warranty. In no event will the authors be held liable for any damages
|
|
// arising from the use of this software.
|
|
//
|
|
// Permission is granted to anyone to use this software for any purpose,
|
|
// including commercial applications, and to alter it and redistribute it
|
|
// freely, subject to the following restrictions:
|
|
//
|
|
// 1. The origin of this software must not be misrepresented; you must not
|
|
// claim that you wrote the original software. If you use this software
|
|
// in a product, an acknowledgement in the product documentation would be
|
|
// appreciated but is not required.
|
|
// 2. Altered source versions must be plainly marked as such, and must not be
|
|
// misrepresented as being the original software.
|
|
// 3. This notice may not be removed or altered from any source distribution.
|
|
|
|
#include "precompiled.h"
|
|
|
|
#include "tinygettext/po_parser.hpp"
|
|
|
|
#include <iostream>
|
|
#include <ctype.h>
|
|
#include <string>
|
|
#include <istream>
|
|
#include <string.h>
|
|
#include <unordered_map>
|
|
#include <stdlib.h>
|
|
|
|
#include "tinygettext/language.hpp"
|
|
#include "tinygettext/log_stream.hpp"
|
|
#include "tinygettext/iconv.hpp"
|
|
#include "tinygettext/dictionary.hpp"
|
|
#include "tinygettext/plural_forms.hpp"
|
|
|
|
namespace tinygettext {
|
|
|
|
bool POParser::pedantic = true;
|
|
|
|
void
|
|
POParser::parse(const std::string& filename, std::istream& in, Dictionary& dict)
|
|
{
|
|
POParser parser(filename, in, dict);
|
|
parser.parse();
|
|
}
|
|
|
|
class POParserError {};
|
|
|
|
POParser::POParser(const std::string& filename_, std::istream& in_, Dictionary& dict_, bool use_fuzzy_) :
|
|
filename(filename_),
|
|
in(in_),
|
|
dict(dict_),
|
|
use_fuzzy(use_fuzzy_),
|
|
running(false),
|
|
eof(false),
|
|
big5(false),
|
|
line_number(0),
|
|
current_line(),
|
|
conv()
|
|
{
|
|
}
|
|
|
|
POParser::~POParser()
|
|
{
|
|
}
|
|
|
|
void
|
|
POParser::warning(const std::string& msg)
|
|
{
|
|
log_warning << filename << ":" << line_number << ": warning: " << msg << ": " << current_line << std::endl;
|
|
//log_warning << "Line: " << current_line << std::endl;
|
|
}
|
|
|
|
void
|
|
POParser::error(const std::string& msg)
|
|
{
|
|
log_error << filename << ":" << line_number << ": error: " << msg << ": " << current_line << std::endl;
|
|
|
|
// Try to recover from an error by searching for start of another entry
|
|
do
|
|
next_line();
|
|
while(!eof && !is_empty_line());
|
|
|
|
throw POParserError();
|
|
}
|
|
|
|
void
|
|
POParser::next_line()
|
|
{
|
|
line_number += 1;
|
|
if (!std::getline(in, current_line))
|
|
eof = true;
|
|
}
|
|
|
|
void
|
|
POParser::get_string_line(std::ostringstream& out, size_t skip)
|
|
{
|
|
if (skip+1 >= static_cast<unsigned int>(current_line.size()))
|
|
error("unexpected end of line");
|
|
|
|
if (current_line[skip] != '"')
|
|
error("expected start of string '\"'");
|
|
|
|
std::string::size_type i;
|
|
for(i = skip+1; current_line[i] != '\"'; ++i)
|
|
{
|
|
if (big5 && static_cast<unsigned char>(current_line[i]) >= 0x81 && static_cast<unsigned char>(current_line[i]) <= 0xfe)
|
|
{
|
|
out << current_line[i];
|
|
|
|
i += 1;
|
|
|
|
if (i >= current_line.size())
|
|
error("invalid big5 encoding");
|
|
|
|
out << current_line[i];
|
|
}
|
|
else if (i >= current_line.size())
|
|
{
|
|
error("unexpected end of string");
|
|
}
|
|
else if (current_line[i] == '\\')
|
|
{
|
|
i += 1;
|
|
|
|
if (i >= current_line.size())
|
|
error("unexpected end of string in handling '\\'");
|
|
|
|
switch (current_line[i])
|
|
{
|
|
case 'a': out << '\a'; break;
|
|
case 'b': out << '\b'; break;
|
|
case 'v': out << '\v'; break;
|
|
case 'n': out << '\n'; break;
|
|
case 't': out << '\t'; break;
|
|
case 'r': out << '\r'; break;
|
|
case '"': out << '"'; break;
|
|
case '\\': out << '\\'; break;
|
|
default:
|
|
std::ostringstream err;
|
|
err << "unhandled escape '\\" << current_line[i] << "'";
|
|
warning(err.str());
|
|
|
|
out << current_line[i-1] << current_line[i];
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
out << current_line[i];
|
|
}
|
|
}
|
|
|
|
// process trailing garbage in line and warn if there is any
|
|
for(i = i+1; i < current_line.size(); ++i)
|
|
if (!isspace(current_line[i]))
|
|
{
|
|
warning("unexpected garbage after string ignoren");
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::string
|
|
POParser::get_string(unsigned int skip)
|
|
{
|
|
std::ostringstream out;
|
|
|
|
if (skip+1 >= static_cast<unsigned int>(current_line.size()))
|
|
error("unexpected end of line");
|
|
|
|
if (current_line[skip] == ' ' && current_line[skip+1] == '"')
|
|
{
|
|
get_string_line(out, skip+1);
|
|
}
|
|
else
|
|
{
|
|
if (pedantic)
|
|
warning("keyword and string must be seperated by a single space");
|
|
|
|
for(;;)
|
|
{
|
|
if (skip >= static_cast<unsigned int>(current_line.size()))
|
|
error("unexpected end of line");
|
|
else if (current_line[skip] == '\"')
|
|
{
|
|
get_string_line(out, skip);
|
|
break;
|
|
}
|
|
else if (!isspace(current_line[skip]))
|
|
{
|
|
error("string must start with '\"'");
|
|
}
|
|
else
|
|
{
|
|
// skip space
|
|
}
|
|
|
|
skip += 1;
|
|
}
|
|
}
|
|
|
|
next:
|
|
next_line();
|
|
for(std::string::size_type i = 0; i < current_line.size(); ++i)
|
|
{
|
|
if (current_line[i] == '"')
|
|
{
|
|
if (i == 1)
|
|
if (pedantic)
|
|
warning("leading whitespace before string");
|
|
|
|
get_string_line(out, i);
|
|
goto next;
|
|
}
|
|
else if (isspace(current_line[i]))
|
|
{
|
|
// skip
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return out.str();
|
|
}
|
|
|
|
static bool has_prefix(const std::string& lhs, const std::string& rhs)
|
|
{
|
|
if (lhs.length() < rhs.length())
|
|
return false;
|
|
else
|
|
return lhs.compare(0, rhs.length(), rhs) == 0;
|
|
}
|
|
|
|
void
|
|
POParser::parse_header(const std::string& header)
|
|
{
|
|
std::string from_charset;
|
|
std::string::size_type start = 0;
|
|
for(std::string::size_type i = 0; i < header.length(); ++i)
|
|
{
|
|
if (header[i] == '\n')
|
|
{
|
|
std::string line = header.substr(start, i - start);
|
|
|
|
if (has_prefix(line, "Content-Type:"))
|
|
{
|
|
// from_charset = line.substr(len);
|
|
size_t len = strlen("Content-Type: text/plain; charset=");
|
|
if (line.compare(0, len, "Content-Type: text/plain; charset=") == 0)
|
|
{
|
|
from_charset = line.substr(len);
|
|
|
|
for(std::string::iterator ch = from_charset.begin(); ch != from_charset.end(); ++ch)
|
|
*ch = static_cast<char>(toupper(*ch));
|
|
}
|
|
else
|
|
{
|
|
warning("malformed Content-Type header");
|
|
}
|
|
}
|
|
else if (has_prefix(line, "Plural-Forms:"))
|
|
{
|
|
PluralForms plural_forms = PluralForms::from_string(line);
|
|
if (!plural_forms)
|
|
{
|
|
warning("unknown Plural-Forms given");
|
|
}
|
|
else
|
|
{
|
|
if (!dict.get_plural_forms())
|
|
{
|
|
dict.set_plural_forms(plural_forms);
|
|
}
|
|
else
|
|
{
|
|
if (dict.get_plural_forms() != plural_forms)
|
|
{
|
|
warning("Plural-Forms missmatch between .po file and dictionary");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
start = i+1;
|
|
}
|
|
}
|
|
|
|
if (from_charset.empty() || from_charset == "CHARSET")
|
|
{
|
|
warning("charset not specified for .po, fallback to utf-8");
|
|
from_charset = "UTF-8";
|
|
}
|
|
else if (from_charset == "BIG5")
|
|
{
|
|
big5 = true;
|
|
}
|
|
|
|
conv.set_charsets(from_charset, dict.get_charset());
|
|
}
|
|
|
|
bool
|
|
POParser::is_empty_line()
|
|
{
|
|
if (current_line.empty())
|
|
{
|
|
return true;
|
|
}
|
|
else if (current_line[0] == '#')
|
|
{ // handle comments as empty lines
|
|
if (current_line.size() == 1 || (current_line.size() >= 2 && isspace(current_line[1])))
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
for(std::string::iterator i = current_line.begin(); i != current_line.end(); ++i)
|
|
{
|
|
if (!isspace(*i))
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
POParser::prefix(const char* prefix_str)
|
|
{
|
|
return current_line.compare(0, strlen(prefix_str), prefix_str) == 0;
|
|
}
|
|
|
|
void
|
|
POParser::parse()
|
|
{
|
|
next_line();
|
|
|
|
// skip UTF-8 intro that some text editors produce
|
|
// see http://en.wikipedia.org/wiki/Byte-order_mark
|
|
if (current_line.size() >= 3 &&
|
|
current_line[0] == static_cast<char>(0xef) &&
|
|
current_line[1] == static_cast<char>(0xbb) &&
|
|
current_line[2] == static_cast<char>(0xbf))
|
|
{
|
|
current_line = current_line.substr(3);
|
|
}
|
|
|
|
// Parser structure
|
|
while(!eof)
|
|
{
|
|
try
|
|
{
|
|
bool fuzzy = false;
|
|
bool has_msgctxt = false;
|
|
std::string msgctxt;
|
|
std::string msgid;
|
|
|
|
while(prefix("#"))
|
|
{
|
|
if (current_line.size() >= 2 && current_line[1] == ',')
|
|
{
|
|
// FIXME: Rather simplistic hunt for fuzzy flag
|
|
if (current_line.find("fuzzy", 2) != std::string::npos)
|
|
fuzzy = true;
|
|
}
|
|
|
|
next_line();
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
{
|
|
if (prefix("msgctxt"))
|
|
{
|
|
has_msgctxt = true;
|
|
msgctxt = get_string(7);
|
|
}
|
|
|
|
if (prefix("msgid"))
|
|
msgid = get_string(5);
|
|
else
|
|
error("expected 'msgid'");
|
|
|
|
if (prefix("msgid_plural"))
|
|
{
|
|
std::string msgid_plural = get_string(12);
|
|
std::vector<std::string> msgstr_num;
|
|
bool saw_nonempty_msgstr = false;
|
|
|
|
next:
|
|
if (is_empty_line())
|
|
{
|
|
if (msgstr_num.empty())
|
|
error("expected 'msgstr[N] (0 <= N <= 9)'");
|
|
}
|
|
else if (prefix("msgstr[") &&
|
|
current_line.size() > 8 &&
|
|
isdigit(current_line[7]) && current_line[8] == ']')
|
|
{
|
|
unsigned int number = static_cast<unsigned int>(current_line[7] - '0');
|
|
std::string msgstr = get_string(9);
|
|
|
|
if(!msgstr.empty())
|
|
saw_nonempty_msgstr = true;
|
|
|
|
if (number >= msgstr_num.size())
|
|
msgstr_num.resize(number+1);
|
|
|
|
msgstr_num[number] = conv.convert(msgstr);
|
|
goto next;
|
|
}
|
|
else
|
|
{
|
|
error("expected 'msgstr[N]'");
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
error("expected 'msgstr[N]' or empty line");
|
|
|
|
if (saw_nonempty_msgstr)
|
|
{
|
|
if (use_fuzzy || !fuzzy)
|
|
{
|
|
if (!dict.get_plural_forms())
|
|
{
|
|
warning("msgstr[N] seen, but no Plural-Forms given");
|
|
}
|
|
else
|
|
{
|
|
if (msgstr_num.size() != dict.get_plural_forms().get_nplural())
|
|
{
|
|
warning("msgstr[N] count doesn't match Plural-Forms.nplural");
|
|
}
|
|
}
|
|
|
|
if (has_msgctxt)
|
|
dict.add_translation(msgctxt, msgid, msgid_plural, msgstr_num);
|
|
else
|
|
dict.add_translation(msgid, msgid_plural, msgstr_num);
|
|
}
|
|
|
|
if (0)
|
|
{
|
|
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
|
|
std::cout << "msgid \"" << msgid << "\"" << std::endl;
|
|
std::cout << "msgid_plural \"" << msgid_plural << "\"" << std::endl;
|
|
for(std::vector<std::string>::size_type i = 0; i < msgstr_num.size(); ++i)
|
|
std::cout << "msgstr[" << i << "] \"" << conv.convert(msgstr_num[i]) << "\"" << std::endl;
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
}
|
|
else if (prefix("msgstr"))
|
|
{
|
|
std::string msgstr = get_string(6);
|
|
|
|
if (msgid.empty())
|
|
{
|
|
parse_header(msgstr);
|
|
}
|
|
else if(!msgstr.empty())
|
|
{
|
|
if (use_fuzzy || !fuzzy)
|
|
{
|
|
if (has_msgctxt)
|
|
dict.add_translation(msgctxt, msgid, conv.convert(msgstr));
|
|
else
|
|
dict.add_translation(msgid, conv.convert(msgstr));
|
|
}
|
|
|
|
if (0)
|
|
{
|
|
std::cout << (fuzzy?"fuzzy":"not-fuzzy") << std::endl;
|
|
std::cout << "msgid \"" << msgid << "\"" << std::endl;
|
|
std::cout << "msgstr \"" << conv.convert(msgstr) << "\"" << std::endl;
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
error("expected 'msgstr' or 'msgid_plural'");
|
|
}
|
|
}
|
|
|
|
if (!is_empty_line())
|
|
error("expected empty line");
|
|
|
|
next_line();
|
|
}
|
|
catch(POParserError&)
|
|
{
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace tinygettext
|
|
|
|
/* EOF */
|