Fixed: Translations files are all using UTF-8 by default now (and LF instead of CRLF), fixes #128

This commit is contained in:
kervala 2016-10-29 19:50:28 +02:00
parent 7c4eb1b66b
commit 0fa7db915e
7 changed files with 41 additions and 58 deletions

View file

@ -21,7 +21,7 @@
namespace STRING_MANAGER namespace STRING_MANAGER
{ {
const ucstring nl("\r\n"); const ucstring nl("\n");
struct TStringInfo struct TStringInfo

View file

@ -162,11 +162,8 @@ public:
* EF,BB, BF. * EF,BB, BF.
* 16 bits encoding can be recognized by the official header : * 16 bits encoding can be recognized by the official header :
* FF, FE, witch can be reversed if the data are MSB first. * FF, FE, witch can be reversed if the data are MSB first.
*
* Optionally, you can force the reader to consider the file as
* UTF-8 encoded.
*/ */
static void readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forceUtf8 = false); static void readTextBuffer(uint8 *buffer, uint size, ucstring &result);
/** Remove any C style comment from the passed string. /** Remove any C style comment from the passed string.
*/ */

View file

@ -117,7 +117,7 @@ bool loadStringFile(const std::string filename, vector<TStringInfo> &stringInfos
*/ */
ucstring text; ucstring text;
CI18N::readTextFile(filename, text, false, false, true, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, text, false, false, true, CI18N::LINE_FMT_LF);
// CI18N::readTextBuffer(buffer, size, text); // CI18N::readTextBuffer(buffer, size, text);
// delete [] buffer; // delete [] buffer;
@ -313,7 +313,7 @@ bool readPhraseFile(const std::string &filename, vector<TPhrase> &phrases, bool
{ {
ucstring doc; ucstring doc;
CI18N::readTextFile(filename, doc, false, false, true, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, doc, false, false, true, CI18N::LINE_FMT_LF);
return readPhraseFileFromString(doc, filename, phrases, forceRehash); return readPhraseFileFromString(doc, filename, phrases, forceRehash);
} }
@ -577,7 +577,7 @@ ucstring preparePhraseFile(const vector<TPhrase> &phrases, bool removeDiffCommen
if (!c.Comments.empty()) if (!c.Comments.empty())
{ {
ucstring comment = tabLines(1, c.Comments); ucstring comment = tabLines(1, c.Comments);
ret += comment; // + '\r'+'\n'; ret += comment; // + '\n';
} }
if (!c.Conditions.empty()) if (!c.Conditions.empty())
{ {
@ -626,7 +626,7 @@ bool loadExcelSheet(const string filename, TWorksheet &worksheet, bool checkUniq
fp.close(); fp.close();
ucstring str; ucstring str;
CI18N::readTextFile(filename, str, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, str, false, false, false, CI18N::LINE_FMT_LF);
if (!readExcelSheet(str, worksheet, checkUnique)) if (!readExcelSheet(str, worksheet, checkUnique))
return false; return false;
@ -646,6 +646,8 @@ bool readExcelSheet(const ucstring &str, TWorksheet &worksheet, bool checkUnique
strArray[strArray.size()-1]= 0; strArray[strArray.size()-1]= 0;
memcpy(&strArray[0], &str[0], str.size()*sizeof(ucchar)); memcpy(&strArray[0], &str[0], str.size()*sizeof(ucchar));
// size of new line characters
size_t sizeOfNl = nl.length();
// **** Build array of lines. just point to strArray, and fill 0 where appropriated // **** Build array of lines. just point to strArray, and fill 0 where appropriated
vector<ucchar*> lines; vector<ucchar*> lines;
@ -660,10 +662,10 @@ bool readExcelSheet(const ucstring &str, TWorksheet &worksheet, bool checkUnique
// nldebug("Found line : [%s]", ucstring(&strArray[lastPos]).toString().c_str()); // nldebug("Found line : [%s]", ucstring(&strArray[lastPos]).toString().c_str());
lines.push_back(&strArray[lastPos]); lines.push_back(&strArray[lastPos]);
} }
lastPos = pos + 2; lastPos = pos + sizeOfNl;
} }
// Must add last line if no \r\n ending // Must add last line if no \n ending
if (lastPos < str.size()) if (lastPos < str.size())
{ {
pos= str.size(); pos= str.size();

View file

@ -743,7 +743,7 @@ void CI18N::_readTextFile(const string &filename,
// Transform the string in ucstring according to format header // Transform the string in ucstring according to format header
if (!text.empty()) if (!text.empty())
readTextBuffer((uint8*)&text[0], (uint)text.size(), result, forceUtf8); readTextBuffer((uint8*)&text[0], (uint)text.size(), result);
if (preprocess) if (preprocess)
{ {
@ -1137,32 +1137,17 @@ void CI18N::_readTextFile(const string &filename,
} }
} }
void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forceUtf8) void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result)
{ {
static uint8 utf16Header[] = { 0xffu, 0xfeu }; static uint8 utf16Header[] = { 0xffu, 0xfeu };
static uint8 utf16RevHeader[] = { 0xfeu, 0xffu }; static uint8 utf16RevHeader[] = { 0xfeu, 0xffu };
static uint8 utf8Header[] = { 0xefu, 0xbbu, 0xbfu }; static uint8 utf8Header[] = { 0xefu, 0xbbu, 0xbfu };
if (forceUtf8)
{
if (size>=3 && if (size>=3 &&
buffer[0]==utf8Header[0] && buffer[0]==utf8Header[0] &&
buffer[1]==utf8Header[1] && buffer[1]==utf8Header[1] &&
buffer[2]==utf8Header[2] buffer[2]==utf8Header[2]
) )
{
// remove utf8 header
buffer+= 3;
size-=3;
}
string text((char*)buffer, size);
result.fromUtf8(text);
}
else if (size>=3 &&
buffer[0]==utf8Header[0] &&
buffer[1]==utf8Header[1] &&
buffer[2]==utf8Header[2]
)
{ {
// remove utf8 header // remove utf8 header
buffer+= 3; buffer+= 3;
@ -1211,10 +1196,9 @@ void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forc
} }
else else
{ {
// hum.. ascii read ? // all text files without BOM are now parsed as UTF-8 by default
// so, just do a direct conversion
string text((char*)buffer, size); string text((char*)buffer, size);
result = text; result.fromUtf8(text);
} }
} }

View file

@ -745,11 +745,11 @@ int extractBotNames(int argc, char *argv[])
// saving the modified files // saving the modified files
ucstring s = prepareExcelSheet(botNames); ucstring s = prepareExcelSheet(botNames);
CI18N::writeTextFile(workBotNamesFile.asString(), s, false); CI18N::writeTextFile(workBotNamesFile.asString(), s);
s = prepareExcelSheet(transBotNames); s = prepareExcelSheet(transBotNames);
CI18N::writeTextFile(transBotNamesFile.asString(), s, false); CI18N::writeTextFile(transBotNamesFile.asString(), s);
s = prepareExcelSheet(fcts); s = prepareExcelSheet(fcts);
CI18N::writeTextFile(workTitleFile.asString(), s, false); CI18N::writeTextFile(workTitleFile.asString(), s);
return 0; return 0;
} }

View file

@ -315,7 +315,7 @@ void extractNewWords(string workSheetFileName, string columnId, IWordListBuilder
ucstring s = prepareExcelSheet(workSheet); ucstring s = prepareExcelSheet(workSheet);
try try
{ {
CI18N::writeTextFile(workSheetFileName.c_str(), s, false); CI18N::writeTextFile(workSheetFileName.c_str(), s);
} }
catch (const Exception &e) catch (const Exception &e)
{ {

View file

@ -190,7 +190,7 @@ bool readPhraseFile1(const std::string &filename, vector<TPhrase> &phrases, bool
{ {
ucstring doc; ucstring doc;
CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_LF);
verifyVersion(doc, 1); verifyVersion(doc, 1);
return readPhraseFileFromString(doc, filename, phrases, forceRehash); return readPhraseFileFromString(doc, filename, phrases, forceRehash);
} }
@ -199,7 +199,7 @@ bool readPhraseFile2(const std::string &filename, vector<TPhrase> &phrases, bool
{ {
ucstring doc; ucstring doc;
CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_LF);
verifyVersion(doc, 2); verifyVersion(doc, 2);
return readPhraseFileFromString(doc, filename, phrases, forceRehash); return readPhraseFileFromString(doc, filename, phrases, forceRehash);
} }
@ -352,7 +352,7 @@ bool mergeStringDiff(vector<TStringInfo> &strings, const string &language, const
{ {
// Check if the diff is translated // Check if the diff is translated
ucstring text; ucstring text;
CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF);
if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos)
{ {
LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(diffs[i]).c_str()); LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(diffs[i]).c_str());
@ -679,7 +679,7 @@ void cleanComment(const std::string & filename)
ucstring text; ucstring text;
uint nbOldValue=0; uint nbOldValue=0;
CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_LF);
ucstring newText; ucstring newText;
ucstring::size_type last = 0; ucstring::size_type last = 0;
@ -800,7 +800,7 @@ int mergeStringDiff(int argc, char *argv[])
{ {
// backup the original file // backup the original file
ucstring old; ucstring old;
CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF);
if (old != str) if (old != str)
CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename);
} }
@ -840,7 +840,7 @@ bool mergePhraseDiff(vector<TPhrase> &phrases, const string &language, bool only
{ {
// Check if the diff is translated // Check if the diff is translated
ucstring text; ucstring text;
CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF);
verifyVersion(text, 1); verifyVersion(text, 1);
if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos)
{ {
@ -1164,7 +1164,7 @@ int mergePhraseDiff(int argc, char *argv[], int version)
{ {
// backup the original file // backup the original file
ucstring old; ucstring old;
CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF);
if (old != str) if (old != str)
CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename);
} }
@ -1333,7 +1333,7 @@ int mergeClauseDiff(int argc, char *argv[])
{ {
// backup the original file // backup the original file
ucstring old; ucstring old;
CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF);
if (old != str) if (old != str)
CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename);
} }
@ -1358,7 +1358,7 @@ bool mergeWorksheetDiff(const std::string filename, TWorksheet &sheet, bool only
if (onlyTranslated) if (onlyTranslated)
{ {
ucstring text; ucstring text;
CI18N::readTextFile(fileList[i], text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(fileList[i], text, false, false, false, CI18N::LINE_FMT_LF);
if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos)
{ {
LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(fileList[i]).c_str()); LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(fileList[i]).c_str());
@ -1666,7 +1666,7 @@ int makeWorksheetDiff(int argc, char *argv[], const std::string &additionFilenam
string fn(CFile::getFilenameWithoutExtension(referenceFilename)), ext(CFile::getExtension(referenceFilename)); string fn(CFile::getFilenameWithoutExtension(referenceFilename)), ext(CFile::getExtension(referenceFilename));
std::string diffName(diffDir+fn+"_diff_"+diffVersion+"."+ext); std::string diffName(diffDir+fn+"_diff_"+diffVersion+"."+ext);
CI18N::writeTextFile(diffName, str, false); CI18N::writeTextFile(diffName, str);
} }
@ -1691,9 +1691,9 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons
// there is no translated file yet, build one from the working file. // there is no translated file yet, build one from the working file.
ucstring str; ucstring str;
string addfn = addDir+additionFile; string addfn = addDir+additionFile;
CI18N::readTextFile(addfn, str, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(addfn, str, false, false, false, CI18N::LINE_FMT_LF);
str = str.substr(0, str.find(nl)+2); str = str.substr(0, str.find(nl)+2);
CI18N::writeTextFile(transDir+filename, str, false); CI18N::writeTextFile(transDir+filename, str);
// reread the file. // reread the file.
bool res = loadExcelSheet(transDir+filename, translated); bool res = loadExcelSheet(transDir+filename, translated);
nlassert(res); nlassert(res);
@ -1711,7 +1711,7 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons
{ {
// backup the original file // backup the original file
ucstring old; ucstring old;
CI18N::readTextFile(transDir+filename, old, false, true, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(transDir+filename, old, false, true, false, CI18N::LINE_FMT_LF);
if (old != str) if (old != str)
{ {
string fn(CFile::getFilenameWithoutExtension(filename)), ext(CFile::getExtension(filename)); string fn(CFile::getFilenameWithoutExtension(filename)), ext(CFile::getExtension(filename));
@ -1720,7 +1720,7 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons
} }
if (translated.size() > 0) if (translated.size() > 0)
CI18N::writeTextFile(transDir+filename, str, false); CI18N::writeTextFile(transDir+filename, str);
return 0; return 0;
} }
@ -1865,7 +1865,7 @@ void cropLines(const std::string &filename, uint32 nbLines)
LOG("Cropping %u lines from file '%s'\n", nbLines, filename.c_str()); LOG("Cropping %u lines from file '%s'\n", nbLines, filename.c_str());
CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_LF);
string text = utext.toUtf8(); string text = utext.toUtf8();
@ -1881,7 +1881,7 @@ void cropLines(const std::string &filename, uint32 nbLines)
utext.fromUtf8(text); utext.fromUtf8(text);
CI18N::writeTextFile(filename, utext, true); CI18N::writeTextFile(filename, utext);
} }
@ -1910,7 +1910,7 @@ int makeWork()
// change #include "*_en.txt" into #include "*_wk.txt" // change #include "*_en.txt" into #include "*_wk.txt"
ucstring utext; ucstring utext;
CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_LF);
string text = utext.toUtf8(); string text = utext.toUtf8();
bool changedFile = false; bool changedFile = false;
@ -1943,7 +1943,7 @@ int makeWork()
if (changedFile) if (changedFile)
{ {
utext.fromUtf8(text); utext.fromUtf8(text);
CI18N::writeTextFile(filename, utext, true); CI18N::writeTextFile(filename, utext);
} }
// change filename // change filename
@ -2348,7 +2348,7 @@ void patchWorkFile(vector<TPhrase> &updatedPhrase, const std::string & filename)
{ {
ucstring text; ucstring text;
if ( updatedPhrase.empty() ) { return; } if ( updatedPhrase.empty() ) { return; }
CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_LF);
vector<TPhrase>::const_iterator first(updatedPhrase.begin()); vector<TPhrase>::const_iterator first(updatedPhrase.begin());
vector<TPhrase>::const_iterator last(updatedPhrase.end()); vector<TPhrase>::const_iterator last(updatedPhrase.end());
for (; first != last; ++first) for (; first != last; ++first)
@ -2489,7 +2489,7 @@ bool mergePhraseDiff2(vector<TPhrase> &phrases, const string &language, bool onl
{ {
// Check if the diff is translated // Check if the diff is translated
ucstring text; ucstring text;
CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF);
verifyVersion(text, 2); verifyVersion(text, 2);
if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos)
{ {
@ -2947,7 +2947,7 @@ void preprocessTextFile(const std::string &filename,
// Transform the string in ucstring according to format header // Transform the string in ucstring according to format header
if (!text.empty()) if (!text.empty())
CI18N::readTextBuffer((uint8*)&text[0], (uint)text.size(), result, false); CI18N::readTextBuffer((uint8*)&text[0], (uint)text.size(), result);
@ -3029,7 +3029,7 @@ int mergePhraseDiff(int argc, char *argv[])
{ {
// backup the original file // backup the original file
ucstring old; ucstring old;
CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF);
if (old != str) if (old != str)
CFile::moveFile((historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename)).c_str(), filename.c_str()); CFile::moveFile((historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename)).c_str(), filename.c_str());
} }