UTF8 和 GBK 编码字符串互转,UTF8 编码格式判断
[編輯] [转简体] (简体译文)
|
作者:huidong
| 分類:【編程】編碼
[
24 瀏覽
0 評論
8 贊
7 踩
]
概要
正文
UTF8 和 gbk 互转的函数来自 https://www.cnblogs.com/zhongbin/p/3160641.html
判断 UTF8 编码的函数来自 https://blog.csdn.net/jiankekejian/article/details/106720432
对他们的代码进行了整理。
以下内容中,UTF8 转 gbk 实测可用, gbk 转 utf8 没试,判断 utf8 编码的函数也实测过了,VS2019 可以运行。
/** * @brief GBK 编码字符串转 UTF-8 编码字符串 * @param[in] lpGBKStr: 原 gbk 字符串 * @param[out] lpUTF8Str: 转码后的 utf-8 字符串 * @param[in] nUTF8StrLen: utf-8 字符串的最大长度 * @return 返回转码后字符串的长度 * @note 代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html */ int GBKToUTF8(unsigned char* lpGBKStr, unsigned char* lpUTF8Str, int nUTF8StrLen) { wchar_t* lpUnicodeStr = NULL; int nRetLen = 0; if (!lpGBKStr) return 0; nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, NULL, NULL); lpUnicodeStr = new WCHAR[nRetLen + 1]; nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, lpUnicodeStr, nRetLen); if (!nRetLen) return 0; nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, NULL, 0, NULL, NULL); if (!lpUTF8Str) { if (lpUnicodeStr) delete[] lpUnicodeStr; return nRetLen; } if (nUTF8StrLen < nRetLen) { if (lpUnicodeStr) delete[] lpUnicodeStr; return 0; } nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, (char*)lpUTF8Str, nUTF8StrLen, NULL, NULL); if (lpUnicodeStr) delete[] lpUnicodeStr; return nRetLen; } /** * @brief UTF-8 编码字符串转 GBK 编码字符串 * @param[in] lpUTF8Str: 原 utf-8 字符串 * @param[out] lpGBKStr: 转码后的 gbk 字符串 * @param[in] nGBKStrLen: gbk 字符串的最大长度 * @return 返回转换后字符串的长度 * @note 代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html */ int UTF8ToGBK(char* lpUTF8Str, char* lpGBKStr, int nGBKStrLen) { wchar_t* lpUnicodeStr = NULL; int nRetLen = 0; if (!lpUTF8Str) return 0; nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, NULL, NULL); lpUnicodeStr = new WCHAR[nRetLen + 1]; nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, lpUnicodeStr, nRetLen); if (!nRetLen) return 0; nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, NULL, NULL, NULL, NULL); if (!lpGBKStr) { if (lpUnicodeStr) delete[] lpUnicodeStr; return nRetLen; } if (nGBKStrLen < nRetLen) { if (lpUnicodeStr) delete[] lpUnicodeStr; return 0; } nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, (char*)lpGBKStr, nRetLen, NULL, NULL); if (lpUnicodeStr) delete[] lpUnicodeStr; return nRetLen; } /** * @brief 判断一个字符串是否为 UTF-8 编码 * @note 来自 https://blog.csdn.net/jiankekejian/article/details/106720432 (有删改) */ bool isUTF8(const char* str) { int length = strlen(str); int check_sub = 0; int i = 0; int j = 0; for (i = 0; i < length; i++) { if (check_sub == 0) { if ((str[i] >> 7) == 0) { continue; } struct { int cal; int cmp; } Utf8NumMap[] = { {0xE0,0xC0},{0xF0,0xE0},{0xF8,0xF0},{0xFC,0xF8},{0xFE,0xFC}, }; for (j = 0; j < (sizeof(Utf8NumMap) / sizeof(Utf8NumMap[0])); j++) { if ((str[i] & Utf8NumMap[j].cal) == Utf8NumMap[j].cmp) { check_sub = j + 1; break; } } if (0 == check_sub) { return false; } } else { if ((str[i] & 0xC0) != 0x80) { return false; } check_sub--; } } return true; }