From 1c872d9ef9b3ac73fc1851335c7e88c3044321b0 Mon Sep 17 00:00:00 2001 From: kervala Date: Sat, 29 Oct 2016 19:50:28 +0200 Subject: [PATCH] Fixed: Translations files are all using UTF-8 by default now (and LF instead of CRLF), fixes #128 --HG-- branch : develop --- code/nel/include/nel/misc/diff_tool.h | 2 +- code/nel/include/nel/misc/i18n.h | 5 +-- code/nel/src/misc/diff_tool.cpp | 14 +++--- code/nel/src/misc/i18n.cpp | 26 +++-------- .../translation_tools/extract_bot_names.cpp | 6 +-- .../extract_new_sheet_names.cpp | 2 +- code/ryzom/tools/translation_tools/main.cpp | 44 +++++++++---------- 7 files changed, 41 insertions(+), 58 deletions(-) diff --git a/code/nel/include/nel/misc/diff_tool.h b/code/nel/include/nel/misc/diff_tool.h index 75a3f181b..e122137cf 100644 --- a/code/nel/include/nel/misc/diff_tool.h +++ b/code/nel/include/nel/misc/diff_tool.h @@ -21,7 +21,7 @@ namespace STRING_MANAGER { - const ucstring nl("\r\n"); + const ucstring nl("\n"); struct TStringInfo diff --git a/code/nel/include/nel/misc/i18n.h b/code/nel/include/nel/misc/i18n.h index 5f270a490..a24596911 100644 --- a/code/nel/include/nel/misc/i18n.h +++ b/code/nel/include/nel/misc/i18n.h @@ -162,11 +162,8 @@ public: * EF,BB, BF. * 16 bits encoding can be recognized by the official header : * FF, FE, witch can be reversed if the data are MSB first. - * - * Optionally, you can force the reader to consider the file as - * UTF-8 encoded. */ - static void readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forceUtf8 = false); + static void readTextBuffer(uint8 *buffer, uint size, ucstring &result); /** Remove any C style comment from the passed string. */ diff --git a/code/nel/src/misc/diff_tool.cpp b/code/nel/src/misc/diff_tool.cpp index f57746b1a..f4eb5b948 100644 --- a/code/nel/src/misc/diff_tool.cpp +++ b/code/nel/src/misc/diff_tool.cpp @@ -117,7 +117,7 @@ bool loadStringFile(const std::string filename, vector &stringInfos */ ucstring text; - CI18N::readTextFile(filename, text, false, false, true, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, text, false, false, true, CI18N::LINE_FMT_LF); // CI18N::readTextBuffer(buffer, size, text); // delete [] buffer; @@ -313,7 +313,7 @@ bool readPhraseFile(const std::string &filename, vector &phrases, bool { ucstring doc; - CI18N::readTextFile(filename, doc, false, false, true, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, doc, false, false, true, CI18N::LINE_FMT_LF); return readPhraseFileFromString(doc, filename, phrases, forceRehash); } @@ -577,7 +577,7 @@ ucstring preparePhraseFile(const vector &phrases, bool removeDiffCommen if (!c.Comments.empty()) { ucstring comment = tabLines(1, c.Comments); - ret += comment; // + '\r'+'\n'; + ret += comment; // + '\n'; } if (!c.Conditions.empty()) { @@ -626,7 +626,7 @@ bool loadExcelSheet(const string filename, TWorksheet &worksheet, bool checkUniq fp.close(); ucstring str; - CI18N::readTextFile(filename, str, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, str, false, false, false, CI18N::LINE_FMT_LF); if (!readExcelSheet(str, worksheet, checkUnique)) return false; @@ -646,6 +646,8 @@ bool readExcelSheet(const ucstring &str, TWorksheet &worksheet, bool checkUnique strArray[strArray.size()-1]= 0; memcpy(&strArray[0], &str[0], str.size()*sizeof(ucchar)); + // size of new line characters + size_t sizeOfNl = nl.length(); // **** Build array of lines. just point to strArray, and fill 0 where appropriated vector lines; @@ -660,10 +662,10 @@ bool readExcelSheet(const ucstring &str, TWorksheet &worksheet, bool checkUnique // nldebug("Found line : [%s]", ucstring(&strArray[lastPos]).toString().c_str()); lines.push_back(&strArray[lastPos]); } - lastPos = pos + 2; + lastPos = pos + sizeOfNl; } - // Must add last line if no \r\n ending + // Must add last line if no \n ending if (lastPos < str.size()) { pos= str.size(); diff --git a/code/nel/src/misc/i18n.cpp b/code/nel/src/misc/i18n.cpp index b64116c1c..6ac5a9d05 100644 --- a/code/nel/src/misc/i18n.cpp +++ b/code/nel/src/misc/i18n.cpp @@ -743,7 +743,7 @@ void CI18N::_readTextFile(const string &filename, // Transform the string in ucstring according to format header if (!text.empty()) - readTextBuffer((uint8*)&text[0], (uint)text.size(), result, forceUtf8); + readTextBuffer((uint8*)&text[0], (uint)text.size(), result); if (preprocess) { @@ -1137,28 +1137,13 @@ void CI18N::_readTextFile(const string &filename, } } -void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forceUtf8) +void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result) { static uint8 utf16Header[] = { 0xffu, 0xfeu }; static uint8 utf16RevHeader[] = { 0xfeu, 0xffu }; static uint8 utf8Header[] = { 0xefu, 0xbbu, 0xbfu }; - if (forceUtf8) - { - if (size>=3 && - buffer[0]==utf8Header[0] && - buffer[1]==utf8Header[1] && - buffer[2]==utf8Header[2] - ) - { - // remove utf8 header - buffer+= 3; - size-=3; - } - string text((char*)buffer, size); - result.fromUtf8(text); - } - else if (size>=3 && + if (size>=3 && buffer[0]==utf8Header[0] && buffer[1]==utf8Header[1] && buffer[2]==utf8Header[2] @@ -1211,10 +1196,9 @@ void CI18N::readTextBuffer(uint8 *buffer, uint size, ucstring &result, bool forc } else { - // hum.. ascii read ? - // so, just do a direct conversion + // all text files without BOM are now parsed as UTF-8 by default string text((char*)buffer, size); - result = text; + result.fromUtf8(text); } } diff --git a/code/ryzom/tools/translation_tools/extract_bot_names.cpp b/code/ryzom/tools/translation_tools/extract_bot_names.cpp index e25d0e2b0..2b4e32629 100644 --- a/code/ryzom/tools/translation_tools/extract_bot_names.cpp +++ b/code/ryzom/tools/translation_tools/extract_bot_names.cpp @@ -745,11 +745,11 @@ int extractBotNames(int argc, char *argv[]) // saving the modified files ucstring s = prepareExcelSheet(botNames); - CI18N::writeTextFile(workBotNamesFile.asString(), s, false); + CI18N::writeTextFile(workBotNamesFile.asString(), s); s = prepareExcelSheet(transBotNames); - CI18N::writeTextFile(transBotNamesFile.asString(), s, false); + CI18N::writeTextFile(transBotNamesFile.asString(), s); s = prepareExcelSheet(fcts); - CI18N::writeTextFile(workTitleFile.asString(), s, false); + CI18N::writeTextFile(workTitleFile.asString(), s); return 0; } diff --git a/code/ryzom/tools/translation_tools/extract_new_sheet_names.cpp b/code/ryzom/tools/translation_tools/extract_new_sheet_names.cpp index 791f51b77..4573c5eb5 100644 --- a/code/ryzom/tools/translation_tools/extract_new_sheet_names.cpp +++ b/code/ryzom/tools/translation_tools/extract_new_sheet_names.cpp @@ -315,7 +315,7 @@ void extractNewWords(string workSheetFileName, string columnId, IWordListBuilder ucstring s = prepareExcelSheet(workSheet); try { - CI18N::writeTextFile(workSheetFileName.c_str(), s, false); + CI18N::writeTextFile(workSheetFileName.c_str(), s); } catch (const Exception &e) { diff --git a/code/ryzom/tools/translation_tools/main.cpp b/code/ryzom/tools/translation_tools/main.cpp index 000da9551..9f0b33666 100644 --- a/code/ryzom/tools/translation_tools/main.cpp +++ b/code/ryzom/tools/translation_tools/main.cpp @@ -190,7 +190,7 @@ bool readPhraseFile1(const std::string &filename, vector &phrases, bool { ucstring doc; - CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_LF); verifyVersion(doc, 1); return readPhraseFileFromString(doc, filename, phrases, forceRehash); } @@ -199,7 +199,7 @@ bool readPhraseFile2(const std::string &filename, vector &phrases, bool { ucstring doc; - CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, doc, false, false, false, CI18N::LINE_FMT_LF); verifyVersion(doc, 2); return readPhraseFileFromString(doc, filename, phrases, forceRehash); } @@ -352,7 +352,7 @@ bool mergeStringDiff(vector &strings, const string &language, const { // Check if the diff is translated ucstring text; - CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF); if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) { LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(diffs[i]).c_str()); @@ -679,7 +679,7 @@ void cleanComment(const std::string & filename) ucstring text; uint nbOldValue=0; - CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_LF); ucstring newText; ucstring::size_type last = 0; @@ -800,7 +800,7 @@ int mergeStringDiff(int argc, char *argv[]) { // backup the original file ucstring old; - CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF); if (old != str) CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); } @@ -840,7 +840,7 @@ bool mergePhraseDiff(vector &phrases, const string &language, bool only { // Check if the diff is translated ucstring text; - CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF); verifyVersion(text, 1); if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) { @@ -1164,7 +1164,7 @@ int mergePhraseDiff(int argc, char *argv[], int version) { // backup the original file ucstring old; - CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF); if (old != str) CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); } @@ -1333,7 +1333,7 @@ int mergeClauseDiff(int argc, char *argv[]) { // backup the original file ucstring old; - CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF); if (old != str) CFile::moveFile(historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename), filename); } @@ -1358,7 +1358,7 @@ bool mergeWorksheetDiff(const std::string filename, TWorksheet &sheet, bool only if (onlyTranslated) { ucstring text; - CI18N::readTextFile(fileList[i], text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(fileList[i], text, false, false, false, CI18N::LINE_FMT_LF); if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) { LOG("Diff file [%s] is not translated, merging it later.\n", CFile::getFilename(fileList[i]).c_str()); @@ -1666,7 +1666,7 @@ int makeWorksheetDiff(int argc, char *argv[], const std::string &additionFilenam string fn(CFile::getFilenameWithoutExtension(referenceFilename)), ext(CFile::getExtension(referenceFilename)); std::string diffName(diffDir+fn+"_diff_"+diffVersion+"."+ext); - CI18N::writeTextFile(diffName, str, false); + CI18N::writeTextFile(diffName, str); } @@ -1691,9 +1691,9 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons // there is no translated file yet, build one from the working file. ucstring str; string addfn = addDir+additionFile; - CI18N::readTextFile(addfn, str, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(addfn, str, false, false, false, CI18N::LINE_FMT_LF); str = str.substr(0, str.find(nl)+2); - CI18N::writeTextFile(transDir+filename, str, false); + CI18N::writeTextFile(transDir+filename, str); // reread the file. bool res = loadExcelSheet(transDir+filename, translated); nlassert(res); @@ -1711,7 +1711,7 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons { // backup the original file ucstring old; - CI18N::readTextFile(transDir+filename, old, false, true, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(transDir+filename, old, false, true, false, CI18N::LINE_FMT_LF); if (old != str) { string fn(CFile::getFilenameWithoutExtension(filename)), ext(CFile::getExtension(filename)); @@ -1720,7 +1720,7 @@ int mergeWorksheetDiff(int argc, char *argv[], const std::string &filename, cons } if (translated.size() > 0) - CI18N::writeTextFile(transDir+filename, str, false); + CI18N::writeTextFile(transDir+filename, str); return 0; } @@ -1865,7 +1865,7 @@ void cropLines(const std::string &filename, uint32 nbLines) LOG("Cropping %u lines from file '%s'\n", nbLines, filename.c_str()); - CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_LF); string text = utext.toUtf8(); @@ -1881,7 +1881,7 @@ void cropLines(const std::string &filename, uint32 nbLines) utext.fromUtf8(text); - CI18N::writeTextFile(filename, utext, true); + CI18N::writeTextFile(filename, utext); } @@ -1910,7 +1910,7 @@ int makeWork() // change #include "*_en.txt" into #include "*_wk.txt" ucstring utext; - CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, utext, false, false, false, CI18N::LINE_FMT_LF); string text = utext.toUtf8(); bool changedFile = false; @@ -1943,7 +1943,7 @@ int makeWork() if (changedFile) { utext.fromUtf8(text); - CI18N::writeTextFile(filename, utext, true); + CI18N::writeTextFile(filename, utext); } // change filename @@ -2348,7 +2348,7 @@ void patchWorkFile(vector &updatedPhrase, const std::string & filename) { ucstring text; if ( updatedPhrase.empty() ) { return; } - CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, text, false, false, false, CI18N::LINE_FMT_LF); vector::const_iterator first(updatedPhrase.begin()); vector::const_iterator last(updatedPhrase.end()); for (; first != last; ++first) @@ -2489,7 +2489,7 @@ bool mergePhraseDiff2(vector &phrases, const string &language, bool onl { // Check if the diff is translated ucstring text; - CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(diffs[i], text, false, false, false, CI18N::LINE_FMT_LF); verifyVersion(text, 2); if (text.find(ucstring("DIFF NOT TRANSLATED")) != ucstring::npos) { @@ -2947,7 +2947,7 @@ void preprocessTextFile(const std::string &filename, // Transform the string in ucstring according to format header if (!text.empty()) - CI18N::readTextBuffer((uint8*)&text[0], (uint)text.size(), result, false); + CI18N::readTextBuffer((uint8*)&text[0], (uint)text.size(), result); @@ -3029,7 +3029,7 @@ int mergePhraseDiff(int argc, char *argv[]) { // backup the original file ucstring old; - CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_CRLF); + CI18N::readTextFile(filename, old, false, true, false, CI18N::LINE_FMT_LF); if (old != str) CFile::moveFile((historyDir+CFile::getFilenameWithoutExtension(filename)+"_"+diffVersion+"."+CFile::getExtension(filename)).c_str(), filename.c_str()); }