forked from artyom-beilis/cppcms
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxss.h
More file actions
461 lines (422 loc) · 16.7 KB
/
Copy pathxss.h
File metadata and controls
461 lines (422 loc) · 16.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
///////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) <[email protected]>
//
// See accompanying file COPYING.TXT file for licensing details.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef CPPCMS_XSS_H
#define CPPCMS_XSS_H
#include <booster/copy_ptr.h>
#include <booster/regex.h>
#include <booster/function.h>
#include <cppcms/defs.h>
#include <string.h>
#include <string>
#include <algorithm>
namespace cppcms {
namespace json {
class value;
}
///
/// \brief Namespace that holds Anti-Cross Site Scripting Filter support
///
/// The classes in this namespace created to provide a filtering for a save
/// handing of HTML and preventing XSS attacks
///
namespace xss {
/// \cond INTERNAL
namespace details {
class c_string;
}
struct basic_rules_holder;
/// \endcond
///
/// \brief The class that holds XSS filter rules
///
/// This is the major class the defines the white list rules to handle the
/// Correct HTML input.
///
/// When using these rules you should be very strict about what you need and what you
/// allow.
///
/// Basically you need to specify:
///
/// -# The XHTML or HTML parsing rules - should be done first
/// -# The encoding of the text. If you do not specify the encoding
/// it would be assumed that it is ASCII compatible.
/// You may not specify encoding only if you know that it was validated
/// for example by using widgets::text, otherwise \b always specify
/// encoding
/// -# Provide the list of tags that should be used. Specify only thous you need.
/// .
/// Never allow tags like style, object, embed or of course script as they can be easily
/// used for XSS attacks
/// -# Provide essential HTML attributes - properties for tags you need.
/// Use add_uri_property for links like src for img or href for a.
/// It would check correctness of URI syntax and ensure that only white-listed
/// schemas are allowed (i.e. no javascript would be allowed).
/// .
/// Never allow style tags unless you specify very strict white list of really used
/// styles. Styles can be easily exploited for both XSS and click-jacking. For example
/// \code
/// <p style="width: expression(alert('XSS'));"></p>
/// \endcode
/// .
/// If you want to use styles specify very strict list of things you need like:
/// \code
/// add_property("p","style",booster::regex("text-align:(left|right|center)"));
/// \endcode
/// -# Do not allow comments unless you need them. Note not all comments are allowed. Comments
/// containing "<", ">" or "&" would be considered invalid as some exploits use them.
///
/// Remember more strict you are it is harder to make attack. Read about XSS, see existing
/// attacks to understand how they work and then decide what you allow.
///
/// rules class can be treated as value for thread safe access, i.e. you can safely use
/// const reference and const member functions as long as you don't change the rules
/// under the hood.
///
/// The simplest way: define at application startup some global rules object configure
/// it and use it for filtering and validation - and make your attackers cry :-).
///
///
class CPPCMS_API rules {
public:
rules();
rules(rules const &);
rules(rules &&);
rules const &operator=(rules const &);
rules &operator=(rules &&);
~rules();
/// Create rules from JSON object \a r
///
/// The json object the defines the XSS prevention rules.
/// This object has following properties:
///
/// - "xhtml" - boolean; default true - use XHTML (true) or HTML input
/// - "comments" - boolean; setting it to true allows comments, default false
/// - "numeric_entities" - boolean; setting it to true allows numeric_entities, default false
/// - "entities" - array of strings: list of allowed HTML entities besides lt, gt and amp
/// - "encoding" - string; the encoding of the text to validate, by default not checked and the
/// input is assumed to be ASCII compatible. Always specifiy it for multibyte encodings
/// like Shift-JIS or GBK as they are not ASCII compatible.
/// - "tags" - object with 3 properties of type array of string:
/// - "opening_and_closing" - the tags that should come in pair like "<b></b>"
/// - "stand_alone" - the tags that should appear stand alone like "<br/>"
/// - "any_tag" - the tags that can be both like "<input>"
/// - "attributes" - array of objects that define HTML attributes. Each object consists
/// of following properties:
/// - "type" - string - the type of the attribute one of: "boolean", "uri", "relative_uri",
/// "absolute_uri", "integer", "regex".
/// - "scheme" - string the allowed URI scheme - regular expression like "(http|ftp)". Used with
/// "uri" and "absolute_uri" type
/// - "expression" - string the regular expression that defines the value that the attribute
/// should match.
/// - "tags" - array of strings - list of tags that this attribute is allowed for.
/// - "attributes" - array of strings - lisf of names of the attribute
/// - "pairs" - array of objects that consists of two properities "tag" and "attr" of
/// type string that define tag and attributed that such type of property
/// should be allowed for.
///
/// The extra properties that are not defined by this scheme are ingored
///
/// For example:
/// \code
/// {
/// "xhtml" : true,
/// "encoding" : "UTF-8",
/// "entities" : [ "nbsp" , "copy" ],
/// "comments" : false,
/// "numeric_entities" : false,
/// "tags" : {
/// "opening_and_closing" : [
/// "p", "b", "i", "tt",
/// "a",
/// "strong", "em",
/// "sub", "sup",
/// "ol", "ul", "li",
/// "dd", "dt", "dl",
/// "blockquote","code", "pre",
/// "span", "div"
/// ],
/// "stand_alone" : [ "br", "hr", "img" ]
/// ],
/// "attributes": [
/// {
/// "tags" : [ "p", "li", "ul" ]
/// "attr" : [ "style" ],
/// "type" : "regex",
/// "expression" : "\\s*text-algin:\\s*(center|left|right|justify);?\\s*"
/// },
/// {
/// "tags" : [ "span", "div" ]
/// "attr" : [ "class", "id" ],
/// "type" : "regex",
/// "expression" : "[a-zA-Z_0-9]+"
/// },
/// {
/// "pairs" : [
/// { "tag" : "a", "attr" : "href" },
/// { "tag" : "img", "attr" : "src" }
/// ],
/// "type" : "absolute_uri",
/// "scheme" : "(http|https|ftp)"
/// },
/// {
/// "tags" : [ "img" ],
/// "attr" : [ "alt" ],
/// "type" : "regex",
/// "expression" : ".*"
/// }
/// ]
/// }
/// \endcode
///
rules(json::value const &r);
///
/// Create rules from the JSON object stored in the file \a file_name
///
/// \see rules(json::value const&)
///
rules(std::string const &file_name);
///
/// How to treat in input
///
typedef enum {
xhtml_input, ///< Assume that the input is XHTML
html_input ///< Assume that the input is HTML
} html_type;
///
/// The type of tag
///
typedef enum {
invalid_tag = 0, ///< This tag is invalid (returned by validate)
opening_and_closing = 1, ///< This tag should be opened and closed like em , or strong
stand_alone = 2, ///< This tag should stand alone (like hr or br)
any_tag = 3, ///< This tag can be used in both roles (like input)
} tag_type;
///
/// Get how to treat input - HTML or XHTML
///
html_type html() const;
///
/// Set how to treat input - HTML or XHTML, it should be called first before you add any other
/// rules
///
void html(html_type t);
///
/// Add the tag that should be allowed to appear in the text, for HTML the name is case
/// insensitive, i.e. "br", "Br", "bR" and "BR" are valid tags for name "br".
///
/// The \a name should be ASCII only
///
void add_tag(std::string const &name,tag_type = any_tag);
///
/// Add allowed HTML entity, by default only "lt", "gt", "quot" and "amp" are allowed
///
void add_entity(std::string const &name);
///
/// Get if numeric entities are allowed, default is false
///
bool numeric_entities_allowed() const;
///
/// Set if numeric entities are allowed
///
void numeric_entities_allowed(bool v);
///
/// Functor that allows to provide custom validations for different properties
///
typedef booster::function<bool(char const *begin,char const *end)> validator_type;
///
/// Add the property that should be allowed to appear for specific tag as boolean property like
/// checked="checked", when the type
/// is HTML it is case insensitive.
///
/// The \a property should be ASCII only
///
void add_boolean_property(std::string const &tag_name,std::string const &property);
///
/// Add the property that should be checked using custom functor
///
void add_property(std::string const &tag_name,std::string const &property,validator_type const &val);
///
/// Add the property that should be checked using regular expression.
///
void add_property(std::string const &tag_name,std::string const &property,booster::regex const &r);
///
/// Add numeric property, same as add_property(tag_name,property,booster::regex("-?[0-9]+") but
/// little bit more efficient
///
void add_integer_property(std::string const &tag_name,std::string const &property);
///
/// Add URI property.
/// It should be used for properties like like "href" or "src".
/// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
///
/// It's behavior is same as add_property(tag_name,property,rules::uri_validator());
///
void add_uri_property(std::string const &tag_name,std::string const &property);
///
/// Add URI property, using regular expression that matches allowed schemas.
/// It should be used for properties like like "href" or "src".
/// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
///
/// It's behavior is same as add_property(tag_name,property,rules::uri_validator(schema));
///
void add_uri_property(std::string const &tag_name,std::string const &property,std::string const &schema);
///
/// \deprecated use uri_validator
///
/// Create a regular expression that checks URI for safe inclusion in the property.
/// By default it allows only: http, https, ftp, mailto, news, nntp.
///
/// If you need finer control over allowed schemas, use uri_matcher(std::string const&).
///
CPPCMS_DEPRECATED static booster::regex uri_matcher();
///
/// \deprecated use uri_validator
///
/// Create a regular expression that checks URI for safe inclusion in the text, where
/// schema is a regular expression that matches specific protocols that can be used.
///
/// \note Don't add "^" or "$" tags as this expression would be used in construction of regular
/// other expression.
///
/// For example:
/// \code
/// booster::regex uri = uri_matcher("(http|https)");
/// \endcode
///
CPPCMS_DEPRECATED static booster::regex uri_matcher(std::string const &schema);
////
/// Create a validator that checks URI for safe inclusion in the property.
/// By default it allows only: http, https, ftp, mailto, news, nntp.
///
/// If you need finer control over allowed schemas, use uri_validator(std::string const&).
///
static validator_type uri_validator();
////
/// Create a validator that checks URI for safe inclusion in the property.
/// - schema is a regular expression that matches specific protocols that can be used.
/// - absolute_only - set to true to prevent accepting relative URIs like "/files/img.png" or "test.html"
///
/// \note You don't need to add "^" or "$" tags to \a scheme
///
/// For example:
/// \code
/// uri_validator("(http|https)");
/// \endcode
///
///
/// If you need finer control over allowed schemas, use uri_validator(std::string const&).
///
static validator_type uri_validator(std::string const &scheme,bool absolute_only = false);
///
/// Create a validator that checks that this URI is relative and it is safe for inclusion
/// in URI property like href or src
///
static validator_type relative_uri_validator();
///
/// Check if the comments are allowed in the text
///
bool comments_allowed() const;
///
/// Set to true if the comments are allowed in the text
///
void comments_allowed(bool comments);
///
/// Set the character encoding of the source, otherwise encoding is not checked and
/// assumed valid all invalid characters are removed from the text or replaced with default character
///
/// It is very important to specify this option. You may skip it if you are sure that the
/// the input encoding was already validated using cppcms::form::text widget that handles
/// character encoding validation by default.
///
/// In any case it is generally better to always specify this option.
///
///
/// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-* and single byte windows-12XX
/// encodings support such replacement with default character, for all other encodings like Shift-JIS, the invalid
/// characters or characters that are invalid for use in HTML are removed.
///
void encoding(std::string const &enc);
/// \cond INTERNAL
///
/// Test if the tag is valid.
/// \a tag should be lower case for HTML or unchanged for XHTML
///
tag_type valid_tag(details::c_string const &tag) const;
///
/// Test if the property is valid (without value) or unchanged for XHTML
/// \a tag and \a property should be lower case for HTML or unchanged for XHTML
///
bool valid_boolean_property(details::c_string const &tag,details::c_string const &property) const;
///
/// Test if the property and its \a value are valid;
///
/// \a tag and \a property should be lower case for HTML or unchanged for XHTML
///
bool valid_property(details::c_string const &tag,details::c_string const &property,details::c_string const &value) const;
///
/// Test if specific html entity is valid
///
bool valid_entity(details::c_string const &val) const;
///
/// Get the encoding, returns empty string if not encoding testing
/// is required
///
std::string encoding() const;
/// \endcond
private:
basic_rules_holder &impl();
basic_rules_holder const &impl() const;
struct data;
booster::copy_ptr<data> d;
};
///
/// \brief The enumerator that defines filtering invalid HTML method
///
typedef enum {
remove_invalid, ///< Remove all invalid HTML form the input
escape_invalid ///< Escape (convert to text) all invalid HTML in the input
} filtering_method_type;
///
/// \brief Check the input in range [\a begin, \a end) according to the rules \a r.
///
/// It does not filters the input it only checks its validity, it would be faster then validate_and_filter_if_invalid
/// or filter functions but it does not correct errors.
///
CPPCMS_API bool validate(char const *begin,char const *end,rules const &r);
///
/// \brief Validate the input in range [\a begin, \a end) according to the rules \a r and if it is not valid filter it
/// and save filtered text into \a filtered string using a filtering method \a method.
///
/// If the data was valid, \a filtered remains unchanged and the function returns true, otherwise it returns false
/// and the filtered data is saved.
///
CPPCMS_API bool validate_and_filter_if_invalid( char const *begin,
char const *end,
rules const &r,
std::string &filtered,
filtering_method_type method=remove_invalid,
char replacement_char = 0);
///
/// \brief Filter the input in range [\a begin, \a end) according to the rules \a r using filtering
/// method \a method
///
CPPCMS_API std::string filter(char const *begin,
char const *end,
rules const &r,
filtering_method_type method=remove_invalid,
char replacement_char = 0);
///
/// \brief Filter the input text \a input according to the rules \a r using filtering method \a method
///
CPPCMS_API std::string filter(std::string const &input,
rules const &r,
filtering_method_type method=remove_invalid,
char replacement_char = 0);
} // xss
}
#endif