UTF8 和 GBK 编码字符串互转，UTF8 编码格式判断

[編輯] [转简体]

創建於 2021-08-09 | 修改於 2024-06-20

作者：huidong | 分類：【編程】編碼

[ 10 瀏覽 0 評論 2 贊 2 踩 ]

概要

正文

UTF8 和 gbk 互转的函数来自 https://www.cnblogs.com/zhongbin/p/3160641.html

判断 UTF8 编码的函数来自 https://blog.csdn.net/jiankekejian/article/details/106720432

对他们的代码进行了整理。

以下内容中，UTF8 转 gbk 实测可用， gbk 转 utf8 没试，判断 utf8 编码的函数也实测过了，VS2019 可以运行。

/**
 * @brief        GBK 编码字符串转 UTF-8 编码字符串
 * @param[in]    lpGBKStr: 原 gbk 字符串
 * @param[out]    lpUTF8Str: 转码后的 utf-8 字符串
 * @param[in]    nUTF8StrLen: utf-8 字符串的最大长度
 * @return        返回转码后字符串的长度
 * @note        代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int GBKToUTF8(unsigned char* lpGBKStr, unsigned char* lpUTF8Str, int nUTF8StrLen)
{
    wchar_t* lpUnicodeStr = NULL;
    int nRetLen = 0;
    if (!lpGBKStr) return 0;
    nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, NULL, NULL);
    lpUnicodeStr = new WCHAR[nRetLen + 1];
    nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, lpUnicodeStr, nRetLen);
    if (!nRetLen) return 0;
    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, NULL, 0, NULL, NULL);
    if (!lpUTF8Str)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return nRetLen;
    }
    if (nUTF8StrLen < nRetLen)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return 0;
    }
    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, (char*)lpUTF8Str, nUTF8StrLen, NULL, NULL);
    if (lpUnicodeStr) delete[] lpUnicodeStr;
    return nRetLen;

}

/**
 * @brief        UTF-8 编码字符串转 GBK 编码字符串
 * @param[in]    lpUTF8Str: 原 utf-8 字符串
 * @param[out]    lpGBKStr: 转码后的 gbk 字符串
 * @param[in]    nGBKStrLen: gbk 字符串的最大长度
 * @return        返回转换后字符串的长度
 * @note        代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int UTF8ToGBK(char* lpUTF8Str, char* lpGBKStr, int nGBKStrLen)
{
    wchar_t* lpUnicodeStr = NULL;
    int nRetLen = 0;
    if (!lpUTF8Str) return 0;
    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, NULL, NULL);
    lpUnicodeStr = new WCHAR[nRetLen + 1];
    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, lpUnicodeStr, nRetLen);
    if (!nRetLen) return 0;
    nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, NULL, NULL, NULL, NULL);
    if (!lpGBKStr)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return nRetLen;
    }
    if (nGBKStrLen < nRetLen)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return 0;
    }
    nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, (char*)lpGBKStr, nRetLen, NULL, NULL);
    if (lpUnicodeStr) delete[] lpUnicodeStr;
    return nRetLen;
}

/**
 * @brief    判断一个字符串是否为 UTF-8 编码
 * @note    来自 https://blog.csdn.net/jiankekejian/article/details/106720432 （有删改）
*/
bool isUTF8(const char* str)
{
    int length = strlen(str);
    int check_sub = 0;
    int i = 0;
    int j = 0;

    for (i = 0; i < length; i++)
    {
        if (check_sub == 0)
        {
            if ((str[i] >> 7) == 0)
            {
                continue;
            }
            struct
            {
                int cal;
                int cmp;
            } Utf8NumMap[] = { {0xE0,0xC0},{0xF0,0xE0},{0xF8,0xF0},{0xFC,0xF8},{0xFE,0xFC}, };
            for (j = 0; j < (sizeof(Utf8NumMap) / sizeof(Utf8NumMap[0])); j++)
            {
                if ((str[i] & Utf8NumMap[j].cal) == Utf8NumMap[j].cmp)
                {
                    check_sub = j + 1;
                    break;
                }
            }
            if (0 == check_sub)
            {
                return false;
            }
        }
        else
        {
            if ((str[i] & 0xC0) != 0x80)
            {
                return false;
            }
            check_sub--;
        }
    }
    return true;
}

<p><br/></p><p>UTF8 和 gbk 互转的函数来自 <a href="https://www.cnblogs.com/zhongbin/p/3160641.html">https://www.cnblogs.com/zhongbin/p/3160641.html</a> <br/></p><p>判断 UTF8 编码的函数来自 <a href="https://blog.csdn.net/caixiaobai_1/article/details/103876688">https://blog.csdn.net/jiankekejian/article/details/106720432</a>  <br/></p><p><br/></p><p>对他们的代码进行了整理。</p><p><br/></p><p>以下内容中，UTF8 转 gbk 实测可用， gbk 转 utf8 没试，判断 utf8 编码的函数也实测过了，VS2019 可以运行。</p><pre class="brush:cpp;toolbar:false">/**
 * @brief        GBK 编码字符串转 UTF-8 编码字符串
 * @param[in]    lpGBKStr: 原 gbk 字符串
 * @param[out]    lpUTF8Str: 转码后的 utf-8 字符串
 * @param[in]    nUTF8StrLen: utf-8 字符串的最大长度
 * @return        返回转码后字符串的长度
 * @note        代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int GBKToUTF8(unsigned char* lpGBKStr, unsigned char* lpUTF8Str, int nUTF8StrLen)
{
    wchar_t* lpUnicodeStr = NULL;
    int nRetLen = 0;
    if (!lpGBKStr) return 0;
    nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, NULL, NULL);
    lpUnicodeStr = new WCHAR[nRetLen + 1];
    nRetLen = ::MultiByteToWideChar(CP_ACP, 0, (char*)lpGBKStr, -1, lpUnicodeStr, nRetLen);
    if (!nRetLen) return 0;
    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, NULL, 0, NULL, NULL);
    if (!lpUTF8Str)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return nRetLen;
    }
    if (nUTF8StrLen < nRetLen)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return 0;
    }
    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0, lpUnicodeStr, -1, (char*)lpUTF8Str, nUTF8StrLen, NULL, NULL);
    if (lpUnicodeStr) delete[] lpUnicodeStr;
    return nRetLen;

}

/**
 * @brief        UTF-8 编码字符串转 GBK 编码字符串
 * @param[in]    lpUTF8Str: 原 utf-8 字符串
 * @param[out]    lpGBKStr: 转码后的 gbk 字符串
 * @param[in]    nGBKStrLen: gbk 字符串的最大长度
 * @return        返回转换后字符串的长度
 * @note        代码来自 https://www.cnblogs.com/zhongbin/p/3160641.html
*/
int UTF8ToGBK(char* lpUTF8Str, char* lpGBKStr, int nGBKStrLen)
{
    wchar_t* lpUnicodeStr = NULL;
    int nRetLen = 0;
    if (!lpUTF8Str) return 0;
    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, NULL, NULL);
    lpUnicodeStr = new WCHAR[nRetLen + 1];
    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0, (char*)lpUTF8Str, -1, lpUnicodeStr, nRetLen);
    if (!nRetLen) return 0;
    nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, NULL, NULL, NULL, NULL);
    if (!lpGBKStr)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return nRetLen;
    }
    if (nGBKStrLen < nRetLen)
    {
        if (lpUnicodeStr) delete[] lpUnicodeStr;
        return 0;
    }
    nRetLen = ::WideCharToMultiByte(CP_ACP, 0, lpUnicodeStr, -1, (char*)lpGBKStr, nRetLen, NULL, NULL);
    if (lpUnicodeStr) delete[] lpUnicodeStr;
    return nRetLen;
}

/**
 * @brief    判断一个字符串是否为 UTF-8 编码
 * @note    来自 https://blog.csdn.net/jiankekejian/article/details/106720432 （有删改）
*/
bool isUTF8(const char* str)
{
    int length = strlen(str);
    int check_sub = 0;
    int i = 0;
    int j = 0;

    for (i = 0; i < length; i++)
    {
        if (check_sub == 0)
        {
            if ((str[i] >> 7) == 0)
            {
                continue;
            }
            struct
            {
                int cal;
                int cmp;
            } Utf8NumMap[] = { {0xE0,0xC0},{0xF0,0xE0},{0xF8,0xF0},{0xFC,0xF8},{0xFE,0xFC}, };
            for (j = 0; j < (sizeof(Utf8NumMap) / sizeof(Utf8NumMap[0])); j++)
            {
                if ((str[i] & Utf8NumMap[j].cal) == Utf8NumMap[j].cmp)
                {
                    check_sub = j + 1;
                    break;
                }
            }
            if (0 == check_sub)
            {
                return false;
            }
        }
        else
        {
            if ((str[i] & 0xC0) != 0x80)
            {
                return false;
            }
            check_sub--;
        }
    }
    return true;
}</pre><p><br/>  </p>

[贊 2] [踩 2]

評論區 0 條評論

+ 添加評論

昵稱：		必填
聯系方式：		公開，填「無」亦可
	在此瀏覽器上保存我的信息