Qt文件编码转换

主要使用QFile、QTextStream及QTextCodec三个类,先进行文件编码的识别(ANSI、UTF-8、UTF-8 BOM、UTF-16 LE、UTF-8 BE),再进行文件编码的转换。

主要用于Qt项目中源代码的批量转换,从ANSI或UTF-8转换为UTF-8 BOM。

#ifndef CODEHELPER_H
#define CODEHELPER_H

#include 
#include 

enum EncodingFormat {
    ANSI,
    UTF16LE,
    UTF16BE,
    UTF8,
    UTF8BOM
};

class CodeHelper
{
public:
    CodeHelper();

    //尝试获取当前文件编码
    EncodingFormat getFileEncoding(const QString &filename);
    QString        getFileEncodingStr(const QString &filename);

    //转换文件编码为UTF-8 BOM.
    bool translateFile2UTF8BOM(const QString &srcFile, const QString &desFile);

private:
    void useRightCodec(QTextStream &stream, const EncodingFormat &encodingFormat);
};

#endif // CODEHELPER_H
#include "codehelper.h"

#include 
#include 
#include 
#include 

static const QMap code2StringMap = {
    {ANSI, "ANSI"},
    {UTF16LE, "UTF-16LE"},
    {UTF16BE, "UTF-16BE"},
    {UTF8, "UTF-8"},
    {UTF8BOM, "UTF-8BOM"}
};

CodeHelper::CodeHelper()
{

}

QString CodeHelper::getFileEncodingStr(const QString& filename)
{
    EncodingFormat encodingFormat = getFileEncoding(filename);
    return code2StringMap.value(encodingFormat);
}

EncodingFormat CodeHelper::getFileEncoding(const QString& filename)
{
    EncodingFormat code;
    QFile file(filename);
    if(file.open(QIODevice::ReadOnly))
    {
        QByteArray buffer = file.read(3);
        quint8 byte1st = buffer.at(0);
        quint8 byte2st = buffer.at(1);
        quint8 byte3st = buffer.at(2);
        if(byte1st == 0xFF && byte2st == 0xFE) {
            code = EncodingFormat::UTF16LE;
        } else if(byte1st == 0xFE && byte2st == 0xFF) {
            code = EncodingFormat::UTF16BE;
        } else if(byte1st == 0xEF && byte2st == 0xBB && byte3st == 0xBF) {
            code = EncodingFormat::UTF8BOM;
        } else {
            QTextCodec::ConverterState cs;
            QTextCodec* tc = QTextCodec::codecForName("UTF-8");
            tc->toUnicode(buffer.constData(), buffer.size(), &cs);
            code - (cs.invalidChars > 0) ? EncodingFormat::ANSI : EncodingFormat::UTF8;
        }

        file.close();
    }
    return code;
}

void CodeHelper::useRightCodec(QTextStream& stream, const EncodingFormat& encodingFormat)
{
    QByteArray codeBa     = code2StringMap.value(encodingFormat).toUtf8();
    QByteArray codeBaUTF8 = code2StringMap.value(EncodingFormat::UTF8).toUtf8();
    switch (encodingFormat) {
    case UTF8BOM:
        stream.setGenerateByteOrderMark(true);
        stream.setCodec(QTextCodec::codecForName(codeBaUTF8));
        break;
    default:
        stream.setCodec(QTextCodec::codecForName(codeBa));
        break;
    }
}

bool CodeHelper::translateFile2UTF8BOM(const QString &srcFile, const QString &desFile)
{
    if(!QFile::exists(srcFile)) {
        return false;
    }

    QFile file_src(srcFile);
    if(!file_src.open(QIODevice::ReadOnly)) {
        return false;
    }

    QTextStream stream_src(&file_src);
    useRightCodec(stream_src, getFileEncoding(srcFile));
    QString info_src = stream_src.readAll();
    file_src.close();

    QFile file_des(desFile);
    if(!file_des.open(QIODevice::WriteOnly)) {
        return false;
    }

    QTextStream stream_des(&file_des);
    useRightCodec(stream_des, EncodingFormat::UTF8BOM);
    stream_des << info_src;
    file_des.close();
    return true;
}

你可能感兴趣的:(Qt,qt)