diff options
Diffstat (limited to 'Swiften/IDN/UTF8Validator.h')
-rw-r--r-- | Swiften/IDN/UTF8Validator.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/Swiften/IDN/UTF8Validator.h b/Swiften/IDN/UTF8Validator.h new file mode 100644 index 0000000..5df8769 --- /dev/null +++ b/Swiften/IDN/UTF8Validator.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2015 Isode Limited. + * All rights reserved. + * See the COPYING file for more information. + */ + +#pragma once + +#include <cstddef> + +namespace Swift { + +// UTF-8 validation based on the description in https://tools.ietf.org/html/rfc3629#section-3 . +template <typename CharType> +bool UTF8IsValid(const CharType* data, size_t length) { + bool isValid = true; + const CharType* current = data; + const CharType* end = data + length; + while (isValid && (current < end)) { + // one byte sequences + if ((*current & 0x80) == 0x0) { + current++; + continue; + } + // longer byte sequences + else { + // two byte sequences + if ((*current & 0xE0) == 0xC0) { + current++; + if ( (current < end) && ((*current & 0xC0) == 0x80) ) { + current++; + continue; + } + } + // three byte sequences + else if ((*current & 0xF0) == 0xE0) { + current++; + if ( ((current + 1) < end) && ((*current & 0xC0) == 0x80) ) { + current++; + if ((*current & 0xC0) == 0x80) { + current++; + continue; + } + } + } + // four byte sequences + else if ((*current & 0xF8) == 0xF0) { + current++; + if ( ((current + 2) < end) && ((*current & 0xC0) == 0x80) ) { + current++; + if ((*current & 0xC0) == 0x80) { + current++; + if ((*current & 0xC0) == 0x80) { + current++; + continue; + } + } + } + } + // invalid sequences + isValid = false; + } + } + return isValid; +} + +} |