summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'Sluift/tokenize.cpp')
-rw-r--r--Sluift/tokenize.cpp86
1 files changed, 86 insertions, 0 deletions
diff --git a/Sluift/tokenize.cpp b/Sluift/tokenize.cpp
new file mode 100644
index 0000000..b089cdb
--- /dev/null
+++ b/Sluift/tokenize.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2013 Remko Tronçon
+ * Licensed under the GNU General Public License.
+ * See the COPYING file for more information.
+ */
+
+#include <Sluift/tokenize.h>
+
+#include <boost/tokenizer.hpp>
+#include <cctype>
+
+using namespace Swift;
+
+namespace {
+ struct LuaTokenizeFunctor {
+ void reset() {
+ }
+
+ template<typename InputIterator, typename Token>
+ bool operator()(InputIterator& next, InputIterator& end, Token& result) {
+ while (next != end && std::isspace(*next)) {
+ ++next;
+ }
+ if (next == end) {
+ return false;
+ }
+
+ std::vector<char> token;
+ char c = *next++;
+ token.push_back(c);
+
+ // String literal
+ if (c == '\'' || c == '"') {
+ char quote = c;
+ bool inEscape = false;
+ for (; next != end; ++next) {
+ c = *next;
+ token.push_back(c);
+ if (inEscape) {
+ inEscape = false;
+ }
+ else if (c == '\\') {
+ inEscape = true;
+ }
+ else if (c == quote) {
+ break;
+ }
+ }
+ if (next != end) {
+ ++next;
+ }
+ }
+ // Identifier
+ else if (std::isalpha(c) || c == '_') {
+ while (next != end && (std::isalpha(*next) || *next == '_' || std::isdigit(*next))) {
+ token.push_back(*next);
+ ++next;
+ }
+ }
+ // Digit
+ else if (std::isdigit(c)) {
+ while (next != end && !std::isspace(*next)) {
+ token.push_back(*next);
+ ++next;
+ }
+ }
+ // Dots
+ else if (c == '.') {
+ while (next != end && *next == '.') {
+ token.push_back(*next);
+ ++next;
+ }
+ }
+
+ result = Token(&token[0], token.size());
+ return true;
+ }
+ };
+}
+
+
+std::vector<std::string> Lua::tokenize(const std::string& input) {
+ boost::tokenizer<LuaTokenizeFunctor> tokenizer(input);
+ return std::vector<std::string>(tokenizer.begin(), tokenizer.end());
+}
+