summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'Swiften/IDN/UTF8Validator.h')
-rw-r--r--Swiften/IDN/UTF8Validator.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/Swiften/IDN/UTF8Validator.h b/Swiften/IDN/UTF8Validator.h
new file mode 100644
index 0000000..5df8769
--- /dev/null
+++ b/Swiften/IDN/UTF8Validator.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2015 Isode Limited.
3 * All rights reserved.
4 * See the COPYING file for more information.
5 */
6
7#pragma once
8
9#include <cstddef>
10
11namespace Swift {
12
13// UTF-8 validation based on the description in https://tools.ietf.org/html/rfc3629#section-3 .
14template <typename CharType>
15bool UTF8IsValid(const CharType* data, size_t length) {
16 bool isValid = true;
17 const CharType* current = data;
18 const CharType* end = data + length;
19 while (isValid && (current < end)) {
20 // one byte sequences
21 if ((*current & 0x80) == 0x0) {
22 current++;
23 continue;
24 }
25 // longer byte sequences
26 else {
27 // two byte sequences
28 if ((*current & 0xE0) == 0xC0) {
29 current++;
30 if ( (current < end) && ((*current & 0xC0) == 0x80) ) {
31 current++;
32 continue;
33 }
34 }
35 // three byte sequences
36 else if ((*current & 0xF0) == 0xE0) {
37 current++;
38 if ( ((current + 1) < end) && ((*current & 0xC0) == 0x80) ) {
39 current++;
40 if ((*current & 0xC0) == 0x80) {
41 current++;
42 continue;
43 }
44 }
45 }
46 // four byte sequences
47 else if ((*current & 0xF8) == 0xF0) {
48 current++;
49 if ( ((current + 2) < end) && ((*current & 0xC0) == 0x80) ) {
50 current++;
51 if ((*current & 0xC0) == 0x80) {
52 current++;
53 if ((*current & 0xC0) == 0x80) {
54 current++;
55 continue;
56 }
57 }
58 }
59 }
60 // invalid sequences
61 isValid = false;
62 }
63 }
64 return isValid;
65}
66
67}