Added: More comprehensive codepage conversion for tstring
This commit is contained in:
parent
39c313aeb6
commit
8efe1d91c4
4 changed files with 381 additions and 58 deletions
|
@ -303,23 +303,23 @@ inline sint nlstricmp(const std::string &lhs, const std::string &rhs) { return s
|
|||
inline sint nlstricmp(const std::string &lhs, const char *rhs) { return stricmp(lhs.c_str(),rhs); }
|
||||
inline sint nlstricmp(const char *lhs, const std::string &rhs) { return stricmp(lhs,rhs.c_str()); }
|
||||
|
||||
// TODO: Can we prefix these with 'nl' like other methods?
|
||||
// TODO: Can we prefix these with 'nl' like other macros?
|
||||
// Macros helper to convert UTF-8 std::string and wchar_t*
|
||||
#define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8())
|
||||
#define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str())
|
||||
// #define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8())
|
||||
// #define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str())
|
||||
|
||||
// Macros helper to convert UTF-8 std::string and TCHAR*
|
||||
#ifdef _UNICODE
|
||||
#define tStrToUtf8(str) (ucstring((ucchar*)(LPCWSTR)str).toUtf8())
|
||||
#define utf8ToTStr(str) ((const wchar_t *)ucstring::makeFromUtf8(str).c_str())
|
||||
#define tstring wstring
|
||||
// #define tstring wstring
|
||||
#else
|
||||
// FIXME: This is not accurate, it should be a conversion between local charset and utf8
|
||||
#define tStrToUtf8(str) (std::string((LPCSTR)str))
|
||||
inline const char *nlutf8ToTStr(const char *str) { return str; }
|
||||
inline const char *nlutf8ToTStr(const std::string &str) { return str.c_str(); }
|
||||
#define utf8ToTStr(str) NLMISC::nlutf8ToTStr(str)
|
||||
#define tstring string
|
||||
// #define tstring string
|
||||
#endif
|
||||
|
||||
#if (NL_COMP_VC_VERSION <= 90)
|
||||
|
|
|
@ -246,55 +246,7 @@ inline bool fromString(const std::string &str, double &val) { bool ret = sscanf(
|
|||
// (str[0] == '1' || (str[0] & 0xD2) == 0x50)
|
||||
// - Kaetemi
|
||||
|
||||
inline bool fromString(const std::string &str, bool &val)
|
||||
{
|
||||
if (str.length() == 1)
|
||||
{
|
||||
const char c = str[0];
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case '1':
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'y':
|
||||
case 'Y':
|
||||
val = true;
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case 'f':
|
||||
case 'F':
|
||||
case 'n':
|
||||
case 'N':
|
||||
val = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
val = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string strl = toLower(str);
|
||||
if (strl == "true" || strl == "yes")
|
||||
{
|
||||
val = true;
|
||||
}
|
||||
else if (strl == "false" || strl == "no")
|
||||
{
|
||||
val = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
val = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
bool fromString(const std::string &str, bool &val);
|
||||
|
||||
inline bool fromString(const std::string &str, std::string &val) { val = str; return true; }
|
||||
|
||||
|
@ -305,6 +257,107 @@ inline bool fromString(const std::string &str, uint &val) { return sscanf(str.c_
|
|||
inline bool fromString(const std::string &str, sint &val) { return sscanf(str.c_str(), "%d", &val) == 1; }
|
||||
#endif // NL_COMP_VC6
|
||||
|
||||
// Convert local codepage to UTF-8
|
||||
// On Windows, the local codepage is undetermined
|
||||
// On Linux, the local codepage is always UTF-8 (no-op)
|
||||
std::string mbcsToUtf8(const char *str, size_t len = 0);
|
||||
std::string mbcsToUtf8(const std::string &str);
|
||||
|
||||
// Convert wide codepage to UTF-8
|
||||
// On Windows, the wide codepage is UTF-16
|
||||
// On Linux, the wide codepage is UTF-32
|
||||
std::string wideToUtf8(const wchar_t *str, size_t len = 0);
|
||||
std::string wideToUtf8(const std::wstring &str);
|
||||
|
||||
// Convert UTF-8 to wide character set
|
||||
std::wstring utf8ToWide(const char *str, size_t len = 0);
|
||||
std::wstring utf8ToWide(const std::string &str);
|
||||
|
||||
// Convert UTF-8 to local multibyte character set
|
||||
std::string utf8ToMbcs(const char *str, size_t len = 0);
|
||||
std::string utf8ToMbcs(const std::string &str);
|
||||
|
||||
// Convert wide to local multibyte character set
|
||||
std::string wideToMbcs(const wchar_t *str, size_t len = 0);
|
||||
std::string wideToMbcs(const std::wstring &str);
|
||||
|
||||
// Convert local multibyte to wide character set
|
||||
std::wstring mbcsToWide(const char *str, size_t len = 0);
|
||||
std::wstring mbcsToWide(const std::string &str);
|
||||
|
||||
inline const char* asCStr(const char *str) { return str; }
|
||||
inline const char* asCStr(const std::string &str) { return str.c_str(); }
|
||||
inline const wchar_t* asCStr(const wchar_t *str) { return str; }
|
||||
inline const wchar_t* asCStr(const std::wstring &str) { return str.c_str(); }
|
||||
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
#define nlUtf8ToMbcs(str) (NLMISC::utf8ToMbcs(str).c_str())
|
||||
#define nlMbcsToUtf8(str) (NLMISC::mbcsToUtf8(str).c_str())
|
||||
#else
|
||||
#define nlUtf8ToMbcs(str) (NLMISC::asCStr(str))
|
||||
#define nlMbcsToUtf8(str) (NLMISC::asCStr(str))
|
||||
#endif
|
||||
#define nlWideToUtf8(str) (NLMISC::wideToUtf8(str).c_str())
|
||||
#define nlUtf8ToWide(str) (NLMISC::utf8ToWide(str).c_str()
|
||||
#define nlWideToMbcs(str) (NLMISC::wideToMbcs(str).c_str())
|
||||
#define nlMbcsToWide(str) (NLMISC::mbcsToWide(str).c_str())
|
||||
|
||||
// On Windows, tstring is either local multibyte or utf-16 wide
|
||||
// On Linux, tstring is always utf-8
|
||||
|
||||
#if defined(NL_OS_WINDOWS) && (defined(UNICODE) || defined(_UNICODE))
|
||||
typedef std::wstring tstring;
|
||||
typedef wchar_t tchar;
|
||||
#define nltmain wmain
|
||||
inline std::string tStrToUtf8(const tchar *str) { return wideToUtf8((const wchar_t *)str); }
|
||||
inline std::string tStrToUtf8(const tstring &str) { return wideToUtf8((const std::wstring &)str); }
|
||||
inline std::wstring tStrToWide(const tchar *str) { return (const wchar_t *)str; }
|
||||
inline std::wstring tStrToWide(const tstring &str) { return (const std::wstring &)str; }
|
||||
inline std::string tStrToMbcs(const tchar *str) { return wideToMbcs((const wchar_t *)str); }
|
||||
inline std::string tStrToMbcs(const tstring &str) { return wideToMbcs((const std::wstring &)str); }
|
||||
#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str())
|
||||
#define nlTStrToWide(str) ((const wchar_t *)NLMISC::asCStr(str))
|
||||
#define nlTStrToMbcs(str) (NLMISC::tStrToMbcs(str).c_str())
|
||||
inline tstring utf8ToTStr(const char *str) {return (const tstring &)utf8ToWide(str); }
|
||||
inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToWide(str); }
|
||||
inline tstring wideToTStr(const wchar_t *str) { return (const tchar *)str; }
|
||||
inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)str; }
|
||||
inline tstring mbcsToTStr(const char *str) { return (const tstring &)mbcsToWide(str); }
|
||||
inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)mbcsToWide(str); }
|
||||
#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str())
|
||||
#define nlWideToTStr(str) ((const tchar *)NLMISC::asCStr(str))
|
||||
#define nlMbcsToTStr(str) (NLMISC::mbcsToTStr(str).c_str())
|
||||
#else
|
||||
typedef std::string tstring;
|
||||
typedef char tchar;
|
||||
#define nltmain main
|
||||
inline std::string tStrToUtf8(const tchar *str) { return mbcsToUtf8((const char *)str); }
|
||||
inline std::string tStrToUtf8(const tstring &str) { return mbcsToUtf8((const std::string &)str); }
|
||||
inline std::wstring tStrToWide(const tchar *str) { return mbcsToWide((const char *)str); }
|
||||
inline std::wstring tStrToWide(const tstring &str) { return mbcsToWide((const std::string &)str); }
|
||||
inline std::string tStrToMbcs(const tchar *str) { return (const char *)str; }
|
||||
inline std::string tStrToMbcs(const tstring &str) { return (const std::string &)str; }
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str())
|
||||
#else
|
||||
#define nlTStrToUtf8(str) ((const char *)NLMISC::asCStr(str))
|
||||
#endif
|
||||
#define nlTStrToWide(str) (NLMISC::tStrToWide(str).c_str())
|
||||
#define nlTStrToMbcs(str) ((const char *)NLMISC::asCStr(str))
|
||||
inline tstring utf8ToTStr(const char *str) { return (const tstring &)utf8ToMbcs(str); }
|
||||
inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToMbcs(str); }
|
||||
inline tstring wideToTStr(const wchar_t *str) { return (const tstring &)wideToMbcs(str); }
|
||||
inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)wideToMbcs(str); }
|
||||
inline tstring mbcsToTStr(const char *str) { return (const tchar *)str; }
|
||||
inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)str; }
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str())
|
||||
#else
|
||||
#define nlUtf8ToTStr(str) ((const tchar *)NLMISC::asCStr(str))
|
||||
#endif
|
||||
#define nlWideToTStr(str) (NLMISC::wideToTStr(str).c_str())
|
||||
#define nlMbcsToTStr(str) ((const tchar *)NLMISC::asCStr(str))
|
||||
#endif
|
||||
|
||||
} // NLMISC
|
||||
|
||||
|
|
|
@ -173,6 +173,12 @@
|
|||
# define NL_NO_EXCEPTION_SPECS
|
||||
#endif
|
||||
|
||||
#if defined(NL_COMP_VC) && (NL_COMP_VC_VERSION >= 140)
|
||||
#define nlmove(v) std::move(v)
|
||||
#else
|
||||
#define nlmove(v) (v)
|
||||
#endif
|
||||
|
||||
// gcc 3.4 introduced ISO C++ with tough template rules
|
||||
//
|
||||
// NL_ISO_SYNTAX can be used using #if NL_ISO_SYNTAX or #if !NL_ISO_SYNTAX
|
||||
|
|
|
@ -17,23 +17,24 @@
|
|||
#include "stdmisc.h"
|
||||
|
||||
#include "nel/misc/string_common.h"
|
||||
#include "nel/misc/sstring.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef DEBUG_NEW
|
||||
#define new DEBUG_NEW
|
||||
#define new DEBUG_NEW
|
||||
#endif
|
||||
|
||||
namespace NLMISC
|
||||
{
|
||||
|
||||
string addSlashR (const string &str)
|
||||
string addSlashR(const string &str)
|
||||
{
|
||||
string formatedStr;
|
||||
// replace \n with \r\n
|
||||
for (uint i = 0; i < str.size(); i++)
|
||||
{
|
||||
if (str[i] == '\n' && i > 0 && str[i-1] != '\r')
|
||||
if (str[i] == '\n' && i > 0 && str[i - 1] != '\r')
|
||||
{
|
||||
formatedStr += '\r';
|
||||
}
|
||||
|
@ -42,7 +43,7 @@ string addSlashR (const string &str)
|
|||
return formatedStr;
|
||||
}
|
||||
|
||||
string removeSlashR (const string &str)
|
||||
string removeSlashR(const string &str)
|
||||
{
|
||||
string formatedStr;
|
||||
// remove \r
|
||||
|
@ -54,4 +55,267 @@ string removeSlashR (const string &str)
|
|||
return formatedStr;
|
||||
}
|
||||
|
||||
bool fromString(const std::string &str, bool &val)
|
||||
{
|
||||
if (str.length() == 1)
|
||||
{
|
||||
const char c = str[0];
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '1':
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'y':
|
||||
case 'Y':
|
||||
val = true;
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case 'f':
|
||||
case 'F':
|
||||
case 'n':
|
||||
case 'N':
|
||||
val = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
val = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string strl = toLower(str);
|
||||
if (strl == "true" || strl == "yes")
|
||||
{
|
||||
val = true;
|
||||
}
|
||||
else if (strl == "false" || strl == "no")
|
||||
{
|
||||
val = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
val = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
|
||||
std::string winWideToCp(const wchar_t *str, size_t len, UINT cp)
|
||||
{
|
||||
if (!len)
|
||||
len = wcslen(str);
|
||||
if (!len)
|
||||
return std::string();
|
||||
|
||||
// Convert from wide to codepage
|
||||
char *tmp = (char *)_malloca((len + 1) * 4);
|
||||
if (!tmp)
|
||||
return std::string();
|
||||
int tmpLen = WideCharToMultiByte(cp, 0,
|
||||
str, (int)(len + 1),
|
||||
tmp, (int)((len + 1) * 4),
|
||||
NULL, NULL);
|
||||
if (tmpLen <= 1)
|
||||
{
|
||||
_freea(tmp);
|
||||
return std::string();
|
||||
}
|
||||
|
||||
std::string res = tmp;
|
||||
_freea(tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string winCpToCp(const char *str, size_t len, UINT srcCp, UINT dstCp)
|
||||
{
|
||||
if (!len)
|
||||
len = strlen(str);
|
||||
if (!len)
|
||||
return std::string();
|
||||
|
||||
// First convert from codepage to wide
|
||||
wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4);
|
||||
if (!tmp)
|
||||
return std::string();
|
||||
int tmpLen = MultiByteToWideChar(srcCp, MB_PRECOMPOSED,
|
||||
str, (int)(len + 1), /* include null-termination */
|
||||
tmp, (int)((len + 1) * 4));
|
||||
if (tmpLen <= 1)
|
||||
{
|
||||
_freea(tmp);
|
||||
return std::string();
|
||||
}
|
||||
|
||||
// Then convert from wide to codepage
|
||||
std::string res = winWideToCp(tmp, (size_t)tmpLen - 1, dstCp); /* tmpLen includes null-term */
|
||||
_freea(tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::wstring winCpToWide(const char *str, size_t len, UINT cp)
|
||||
{
|
||||
if (!len)
|
||||
len = strlen(str);
|
||||
if (!len)
|
||||
return std::wstring();
|
||||
|
||||
// Convert from codepage to wide
|
||||
wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4);
|
||||
if (!tmp)
|
||||
return std::wstring();
|
||||
int tmpLen = MultiByteToWideChar(cp, MB_PRECOMPOSED,
|
||||
str, (int)(len + 1), /* include null-termination */
|
||||
tmp, (int)((len + 1) * 4));
|
||||
if (tmpLen <= 1)
|
||||
{
|
||||
_freea(tmp);
|
||||
return std::wstring();
|
||||
}
|
||||
|
||||
std::wstring res = tmp;
|
||||
_freea(tmp);
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// Convert local codepage to UTF-8
|
||||
// On Windows, the local codepage is undetermined
|
||||
// On Linux, the local codepage is always UTF-8 (no-op)
|
||||
std::string mbcsToUtf8(const char *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
UINT codePage = GetACP();
|
||||
// Windows 10 allows setting the local codepage to UTF-8
|
||||
if (codePage == CP_UTF8) /* 65001 */
|
||||
return str;
|
||||
return winCpToCp(str, len, CP_ACP, CP_UTF8);
|
||||
#else
|
||||
return str; /* no-op */
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string mbcsToUtf8(const std::string &str)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
if (str.empty())
|
||||
return str;
|
||||
UINT codePage = GetACP();
|
||||
// Windows 10 allows setting the local codepage to UTF-8
|
||||
if (codePage == CP_UTF8) /* 65001 */
|
||||
return str;
|
||||
return winCpToCp(str.c_str(), str.size(), CP_ACP, CP_UTF8);
|
||||
#else
|
||||
return str; /* no-op */
|
||||
#endif
|
||||
}
|
||||
|
||||
// Convert wide codepage to UTF-8
|
||||
// On Windows, the wide codepage is UTF-16
|
||||
// On Linux, the wide codepage is UTF-32
|
||||
std::string wideToUtf8(const wchar_t *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winWideToCp(str, len, CP_UTF8);
|
||||
#else
|
||||
// TODO: UTF-32 to UTF-8
|
||||
nlassert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string wideToUtf8(const std::wstring &str)
|
||||
{
|
||||
return wideToUtf8(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
// Convert UTF-8 to wide character set
|
||||
std::wstring utf8ToWide(const char *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winCpToWide(str, len, CP_UTF8);
|
||||
#else
|
||||
// TODO: UTF-32 to UTF-8
|
||||
nlassert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::wstring utf8ToWide(const std::string &str)
|
||||
{
|
||||
return utf8ToWide(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
// Convert UTF-8 to local multibyte character set
|
||||
std::string utf8ToMbcs(const char *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
UINT codePage = GetACP();
|
||||
// Windows 10 allows setting the local codepage to UTF-8
|
||||
if (codePage == CP_UTF8) /* 65001 */
|
||||
return str;
|
||||
return winCpToCp(str, len, CP_UTF8, CP_ACP);
|
||||
#else
|
||||
return str; /* no-op */
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string utf8ToMbcs(const std::string &str)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
if (str.empty())
|
||||
return str;
|
||||
UINT codePage = GetACP();
|
||||
// Windows 10 allows setting the local codepage to UTF-8
|
||||
if (codePage == CP_UTF8) /* 65001 */
|
||||
return str;
|
||||
return winCpToCp(str.c_str(), str.size(), CP_UTF8, CP_ACP);
|
||||
#else
|
||||
return str; /* no-op */
|
||||
#endif
|
||||
}
|
||||
|
||||
// Convert wide to local multibyte character set
|
||||
std::string wideToMbcs(const wchar_t *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winWideToCp(str, len, CP_ACP);
|
||||
#else
|
||||
return wideToUTf8(str, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string wideToMbcs(const std::wstring &str)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winWideToCp(str.c_str(), str.size(), CP_ACP);
|
||||
#else
|
||||
return wideToUTf8(str);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Convert local multibyte to wide character set
|
||||
std::wstring mbcsToWide(const char *str, size_t len)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winCpToWide(str, len, CP_ACP);
|
||||
#else
|
||||
return utf8ToWide(str, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::wstring mbcsToWide(const std::string &str)
|
||||
{
|
||||
#if defined(NL_OS_WINDOWS)
|
||||
return winCpToWide(str.c_str(), str.size(), CP_ACP);
|
||||
#else
|
||||
return utf8ToWide(str);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue