UTF8 和 gbk 互转的函数来自 https://www.cnblogs.com/zhongbin/p/3160641.html
判断 UTF8 编码的函数来自 https://blog.csdn.net/jiankekejian/article/details/106720432
对他们的代码进行了整理。
以下内容中,UTF8 转 gbk 实测可用, gbk 转 utf8 没试,判断 utf8 编码的函数也实测过了,VS2019 可以运行。
/**
* @brief GBK 编码字符串转 UTF-8 编码字符串
* @param[in] lpGBKStr: 原 gbk 字符串
* @param[out] lpUTF8Str: 转码后的 utf-8 字符串
* @param[in] nUTF8StrLen: utf-8 字符串的最大长度
* @return 返回转码后字符串的长度
* @note 代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int GBKToUTF8(unsigned char* lpGBKStr, unsigned char* lpUTF8Str, int nUTF8StrLen)
{
wchar_t* lpUnicodeStr = NULL;
int nRetLen = 0;
if (!lpGBKStr) return 0;
nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, NULL, NULL);
lpUnicodeStr = new WCHAR[nRetLen + 1];
nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, lpUnicodeStr, nRetLen);
if (!nRetLen) return 0;
nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, NULL, 0, NULL, NULL);
if (!lpUTF8Str)
{
if (lpUnicodeStr) delete[] lpUnicodeStr;
return nRetLen;
}
if (nUTF8StrLen < nRetLen)
{
if (lpUnicodeStr) delete[] lpUnicodeStr;
return 0;
}
nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, (char*)lpUTF8Str, nUTF8StrLen, NULL, NULL);
if (lpUnicodeStr) delete[] lpUnicodeStr;
return nRetLen;
}
/**
* @brief UTF-8 编码字符串转 GBK 编码字符串
* @param[in] lpUTF8Str: 原 utf-8 字符串
* @param[out] lpGBKStr: 转码后的 gbk 字符串
* @param[in] nGBKStrLen: gbk 字符串的最大长度
* @return 返回转换后字符串的长度
* @note 代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int UTF8ToGBK(char* lpUTF8Str, char* lpGBKStr, int nGBKStrLen)
{
wchar_t* lpUnicodeStr = NULL;
int nRetLen = 0;
if (!lpUTF8Str) return 0;
nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, NULL, NULL);
lpUnicodeStr = new WCHAR[nRetLen + 1];
nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, lpUnicodeStr, nRetLen);
if (!nRetLen) return 0;
nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, NULL, NULL, NULL, NULL);
if (!lpGBKStr)
{
if (lpUnicodeStr) delete[] lpUnicodeStr;
return nRetLen;
}
if (nGBKStrLen < nRetLen)
{
if (lpUnicodeStr) delete[] lpUnicodeStr;
return 0;
}
nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, (char*)lpGBKStr, nRetLen, NULL, NULL);
if (lpUnicodeStr) delete[] lpUnicodeStr;
return nRetLen;
}
/**
* @brief 判断一个字符串是否为 UTF-8 编码
* @note 来自 https://blog.csdn.net/jiankekejian/article/details/106720432 (有删改)
*/
bool isUTF8(const char* str)
{
int length = strlen(str);
int check_sub = 0;
int i = 0;
int j = 0;
for (i = 0; i < length; i++)
{
if (check_sub == 0)
{
if ((str[i] >> 7) == 0)
{
continue;
}
struct
{
int cal;
int cmp;
} Utf8NumMap[] = { {0xE0,0xC0},{0xF0,0xE0},{0xF8,0xF0},{0xFC,0xF8},{0xFE,0xFC}, };
for (j = 0; j < (sizeof(Utf8NumMap) / sizeof(Utf8NumMap[0])); j++)
{
if ((str[i] & Utf8NumMap[j].cal) == Utf8NumMap[j].cmp)
{
check_sub = j + 1;
break;
}
}
if (0 == check_sub)
{
return false;
}
}
else
{
if ((str[i] & 0xC0) != 0x80)
{
return false;
}
check_sub--;
}
}
return true;
}