-
Notifications
You must be signed in to change notification settings - Fork 119
Expand file tree
/
Copy pathencoding.h
More file actions
120 lines (110 loc) · 5.54 KB
/
Copy pathencoding.h
File metadata and controls
120 lines (110 loc) · 5.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) <[email protected]>
//
// See accompanying file COPYING.TXT file for licensing details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef CPPCMS_ENCODING_H
#define CPPCMS_ENCODING_H
#include <string>
#include <map>
#include <locale>
#include <cppcms/defs.h>
#include <cppcms/config.h>
namespace cppcms {
///
/// \brief this Namespace holds various function for dealing with encoding
///
///
namespace encoding {
/// Note: all these function assume that control characters that invalid in HTML are illegal.
/// For example. NUL is legal UTF-8 code but it is illegal in terms of HTML validity thus,
/// valid_utf8 would return false.
///
/// Check if string in range [begin,end) is valid in the locale \a loc and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(std::locale const &loc,char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid UTF-8 and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid_utf8(char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid encoding \a encoding and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(char const *encoding,char const *begin,char const *end,size_t &count);
///
/// Check if string in range [begin,end) is valid encoding \a encoding and does not include
/// HTML illegal characters. Number of codepoints is stored in \a count
///
bool CPPCMS_API valid(std::string const &encoding,char const *begin,char const *end,size_t &count);
///
/// Returns true if ASCII is strict subset of the encoding, i.e. All non-ASCII characters
/// encoding using bytes >= 0x80.
///
/// This is very important for XML or HTML parsing to prevent invlaid detenction of HTML specific
/// characters. So filters that work with encodings that are not ASCII compatible should convert
/// the text to UTF-8 and then convert them back.
///
/// These are UTF-8, ISO-8859-*, windows-12* and koi encodings families.
///
///
bool CPPCMS_API is_ascii_compatible(std::string const &encoding);
///
/// Check if the \a encoding is valid for the text in range [\a begin, \a end) , if it is valid,
/// returns true otherwise removes all invalid characters (if replace == 0) or replaces them with \a replace
/// and saves the result to \a output returning false.
///
/// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-*
/// and single byte windows-12XX, and koi family
///
bool CPPCMS_API validate_or_filter( std::string const &encoding,
char const *begin,char const *end,
std::string &output,
char replace = 0);
///
/// Convert string in range [begin,end) from local 8 bit encoding according to locale \a loc to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(std::locale const &loc,char const *begin,char const *end);
///
/// Convert string in range [begin,end) from local 8 bit encoding \a encoding to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(char const *encoding,char const *begin,char const *end);
///
/// Convert string \a str from local 8 bit encoding according to locale \a loc to UTF-8
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(std::locale const &loc,std::string const &str);
///
/// Convert string \a str from local 8 bit encoding according to encoding \a encoding
/// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API to_utf8(char const *encoding,std::string const &str);
///
/// Convert UTF-8 string in range [begin,end) to local 8 bit encoding according to locale \a loc.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(std::locale const &loc,char const *begin,char const *end);
///
/// Convert UTF-8 string in range [begin,end) to local 8 bit encoding \a encoding.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(char const *encoding,char const *begin,char const *end);
///
/// Convert UTF-8 string \a str to local 8 bit encoding according to locale \a loc.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(std::locale const &loc,std::string const &str);
///
/// Convert UTF-8 string \a str to local 8 bit encoding \a encoding.
/// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
///
std::string CPPCMS_API from_utf8(char const *encoding,std::string const &str);
} // encoding
} // cppcms
#endif