sqlitebrowser/src/Data.cpp at master · mkll/sqlitebrowser

History

113 lines (99 loc) · 3.52 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

#include "Data.h"

#include <QTextCodec>

#include <algorithm>

// Note that these aren't all possible BOMs. But they are probably the most common ones.

// The size is needed at least for the ones with character zero in them.

static const QByteArray bom3("\xEF\xBB\xBF", 3);

static const QByteArray bom2a("\xFE\xFF", 2);

static const QByteArray bom2b("\xFF\xFE", 2);

static const QByteArray bom4a("\x00\x00\xFE\xFF", 4);

static const QByteArray bom4b("\xFF\xFE\x00\x00", 4);

bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)

{

// If the data starts with a Unicode BOM, we always assume it is text

if(startsWithBom(data))

return true;

// Truncate to the first few bytes for quick testing

int testSize = quickTest? std::min(512, data.size()) : data.size();

// If the quick test has been requested and we have to truncate the string, we have to use

// an approach where truncated multibyte characters are not interpreted as invalid characters.

if(quickTest && data.size() > testSize) {

// We can assume that the default encoding (UTF-8) and all the ISO-8859

// cannot contain character zero.

// This has to be checked explicitly because toUnicode() is using zero as

// a terminator for these encodings.

if((encoding.isEmpty() || encoding.startsWith("ISO-8859")) && data.contains('\0'))

return false;

QTextCodec::ConverterState state;

QTextCodec *codec = encoding.isEmpty()? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForName(encoding.toUtf8());

const QString text = codec->toUnicode(data.constData(), testSize, &state);

return state.invalidChars == 0;

} else {

// Convert to Unicode if necessary

data = decodeString(data, encoding);

// Perform check

return QString(data).toUtf8() == data;

}

bool containsRightToLeft(const QString& text) {

for(QChar ch : text) {

switch(ch.direction()) {

case QChar::DirR:

case QChar::DirAL:

case QChar::DirRLE:

case QChar::DirRLO:

case QChar::DirRLI:

return true;

}

return false;

}

bool startsWithBom(const QByteArray& data)

{

if(data.startsWith(bom3) ||

data.startsWith(bom2a) || data.startsWith(bom2b) ||

data.startsWith(bom4a) || data.startsWith(bom4b))

return true;

else

return false;

}

QByteArray removeBom(QByteArray& data)

{

if(data.startsWith(bom3))

{

QByteArray bom = data.left(3);

data.remove(0, 3);

return bom;

} else if(data.startsWith(bom2a) || data.startsWith(bom2b)) {

QByteArray bom = data.left(2);

data.remove(0, 2);

return bom;

} else if(data.startsWith(bom4a) || data.startsWith(bom4b)) {

QByteArray bom = data.left(4);

data.remove(0, 4);

return bom;

} else {

return QByteArray();

}

QStringList toStringList(const QList<QByteArray>& list) {

QStringList strings;

for (const QByteArray &item : list) {

strings.append(QString::fromUtf8(item));

}

return strings;

}

QByteArray encodeString(const QByteArray& str, const QString& encoding)

{

if(encoding.isEmpty())

return str;

else

return QTextCodec::codecForName(encoding.toUtf8())->fromUnicode(str);

}

QByteArray decodeString(const QByteArray& str, const QString& encoding)

{

if(encoding.isEmpty())

return str;

else

return QTextCodec::codecForName(encoding.toUtf8())->toUnicode(str).toUtf8();

}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

Data.cpp

Latest commit

History

Data.cpp

File metadata and controls