diff options
Diffstat (limited to 'Sluift/tokenize.cpp')
-rw-r--r-- | Sluift/tokenize.cpp | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/Sluift/tokenize.cpp b/Sluift/tokenize.cpp new file mode 100644 index 0000000..b089cdb --- /dev/null +++ b/Sluift/tokenize.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 Remko Tronçon + * Licensed under the GNU General Public License. + * See the COPYING file for more information. + */ + +#include <Sluift/tokenize.h> + +#include <boost/tokenizer.hpp> +#include <cctype> + +using namespace Swift; + +namespace { + struct LuaTokenizeFunctor { + void reset() { + } + + template<typename InputIterator, typename Token> + bool operator()(InputIterator& next, InputIterator& end, Token& result) { + while (next != end && std::isspace(*next)) { + ++next; + } + if (next == end) { + return false; + } + + std::vector<char> token; + char c = *next++; + token.push_back(c); + + // String literal + if (c == '\'' || c == '"') { + char quote = c; + bool inEscape = false; + for (; next != end; ++next) { + c = *next; + token.push_back(c); + if (inEscape) { + inEscape = false; + } + else if (c == '\\') { + inEscape = true; + } + else if (c == quote) { + break; + } + } + if (next != end) { + ++next; + } + } + // Identifier + else if (std::isalpha(c) || c == '_') { + while (next != end && (std::isalpha(*next) || *next == '_' || std::isdigit(*next))) { + token.push_back(*next); + ++next; + } + } + // Digit + else if (std::isdigit(c)) { + while (next != end && !std::isspace(*next)) { + token.push_back(*next); + ++next; + } + } + // Dots + else if (c == '.') { + while (next != end && *next == '.') { + token.push_back(*next); + ++next; + } + } + + result = Token(&token[0], token.size()); + return true; + } + }; +} + + +std::vector<std::string> Lua::tokenize(const std::string& input) { + boost::tokenizer<LuaTokenizeFunctor> tokenizer(input); + return std::vector<std::string>(tokenizer.begin(), tokenizer.end()); +} + |