summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'Swiften/IDN/UTF8Validator.h')
-rw-r--r--Swiften/IDN/UTF8Validator.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/Swiften/IDN/UTF8Validator.h b/Swiften/IDN/UTF8Validator.h
new file mode 100644
index 0000000..5df8769
--- /dev/null
+++ b/Swiften/IDN/UTF8Validator.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015 Isode Limited.
+ * All rights reserved.
+ * See the COPYING file for more information.
+ */
+
+#pragma once
+
+#include <cstddef>
+
+namespace Swift {
+
+// UTF-8 validation based on the description in https://tools.ietf.org/html/rfc3629#section-3 .
+template <typename CharType>
+bool UTF8IsValid(const CharType* data, size_t length) {
+ bool isValid = true;
+ const CharType* current = data;
+ const CharType* end = data + length;
+ while (isValid && (current < end)) {
+ // one byte sequences
+ if ((*current & 0x80) == 0x0) {
+ current++;
+ continue;
+ }
+ // longer byte sequences
+ else {
+ // two byte sequences
+ if ((*current & 0xE0) == 0xC0) {
+ current++;
+ if ( (current < end) && ((*current & 0xC0) == 0x80) ) {
+ current++;
+ continue;
+ }
+ }
+ // three byte sequences
+ else if ((*current & 0xF0) == 0xE0) {
+ current++;
+ if ( ((current + 1) < end) && ((*current & 0xC0) == 0x80) ) {
+ current++;
+ if ((*current & 0xC0) == 0x80) {
+ current++;
+ continue;
+ }
+ }
+ }
+ // four byte sequences
+ else if ((*current & 0xF8) == 0xF0) {
+ current++;
+ if ( ((current + 2) < end) && ((*current & 0xC0) == 0x80) ) {
+ current++;
+ if ((*current & 0xC0) == 0x80) {
+ current++;
+ if ((*current & 0xC0) == 0x80) {
+ current++;
+ continue;
+ }
+ }
+ }
+ }
+ // invalid sequences
+ isValid = false;
+ }
+ }
+ return isValid;
+}
+
+}