forked from sqlitebrowser/sqlitebrowser
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathData.cpp
More file actions
113 lines (99 loc) · 3.52 KB
/
Data.cpp
File metadata and controls
113 lines (99 loc) · 3.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#include "Data.h"
#include <QTextCodec>
#include <algorithm>
// Note that these aren't all possible BOMs. But they are probably the most common ones.
// The size is needed at least for the ones with character zero in them.
static const QByteArray bom3("\xEF\xBB\xBF", 3);
static const QByteArray bom2a("\xFE\xFF", 2);
static const QByteArray bom2b("\xFF\xFE", 2);
static const QByteArray bom4a("\x00\x00\xFE\xFF", 4);
static const QByteArray bom4b("\xFF\xFE\x00\x00", 4);
bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
{
// If the data starts with a Unicode BOM, we always assume it is text
if(startsWithBom(data))
return true;
// Truncate to the first few bytes for quick testing
int testSize = quickTest? std::min(512, data.size()) : data.size();
// If the quick test has been requested and we have to truncate the string, we have to use
// an approach where truncated multibyte characters are not interpreted as invalid characters.
if(quickTest && data.size() > testSize) {
// We can assume that the default encoding (UTF-8) and all the ISO-8859
// cannot contain character zero.
// This has to be checked explicitly because toUnicode() is using zero as
// a terminator for these encodings.
if((encoding.isEmpty() || encoding.startsWith("ISO-8859")) && data.contains('\0'))
return false;
QTextCodec::ConverterState state;
QTextCodec *codec = encoding.isEmpty()? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForName(encoding.toUtf8());
const QString text = codec->toUnicode(data.constData(), testSize, &state);
return state.invalidChars == 0;
} else {
// Convert to Unicode if necessary
data = decodeString(data, encoding);
// Perform check
return QString(data).toUtf8() == data;
}
}
bool containsRightToLeft(const QString& text) {
for(QChar ch : text) {
switch(ch.direction()) {
case QChar::DirR:
case QChar::DirAL:
case QChar::DirRLE:
case QChar::DirRLO:
case QChar::DirRLI:
return true;
}
}
return false;
}
bool startsWithBom(const QByteArray& data)
{
if(data.startsWith(bom3) ||
data.startsWith(bom2a) || data.startsWith(bom2b) ||
data.startsWith(bom4a) || data.startsWith(bom4b))
return true;
else
return false;
}
QByteArray removeBom(QByteArray& data)
{
if(data.startsWith(bom3))
{
QByteArray bom = data.left(3);
data.remove(0, 3);
return bom;
} else if(data.startsWith(bom2a) || data.startsWith(bom2b)) {
QByteArray bom = data.left(2);
data.remove(0, 2);
return bom;
} else if(data.startsWith(bom4a) || data.startsWith(bom4b)) {
QByteArray bom = data.left(4);
data.remove(0, 4);
return bom;
} else {
return QByteArray();
}
}
QStringList toStringList(const QList<QByteArray>& list) {
QStringList strings;
for (const QByteArray &item : list) {
strings.append(QString::fromUtf8(item));
}
return strings;
}
QByteArray encodeString(const QByteArray& str, const QString& encoding)
{
if(encoding.isEmpty())
return str;
else
return QTextCodec::codecForName(encoding.toUtf8())->fromUnicode(str);
}
QByteArray decodeString(const QByteArray& str, const QString& encoding)
{
if(encoding.isEmpty())
return str;
else
return QTextCodec::codecForName(encoding.toUtf8())->toUnicode(str).toUtf8();
}