Add tokenizer

author: HombreLaser <buran@silosneeded.com> 2024-06-22 11:01:41 -0600
committer: HombreLaser <buran@silosneeded.com> 2024-06-22 11:01:41 -0600
commit: 10ecf4d95194a273cc6aab89c3a1bcf739a194cd (patch)
tree: 9163b606ef00f7e0e147bb240a85032cea6026e2 /src/tokenizer.cpp
parent: 0ef2b4da78f51002bdf45ab632c97195ece9a8b7 (diff)
1 files changed, 125 insertions, 0 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
new file mode 100644
index 0000000..a156000
--- /dev/null
+++ b/src/tokenizer.cpp
@@ -0,0 +1,125 @@
+#include <string>
+#include <array>
+#include "include/tokenizer.hpp"
+#include "exceptions/tokenizer_exception.hpp"
+
+Tokenizer::Tokenizer() {
+  tokens.fill(nullptr);
+  tokens_head = 0;
+}
+
+Tokenizer::~Tokenizer() {
+  clearTokens();
+}
+
+void Tokenizer::clearTokens() {
+  for(auto token{ tokens.begin() }; *token != nullptr; ++token)
+    delete token;
+
+  tokens.fill(nullptr);
+}
+
+void Tokenizer::insertToken(Type type, int value = 0) {
+  Token *new_token = new Token { .type = type, .value = value};
+
+  tokens[tokens_head] = new_token;
+  tokens_head += 1;
+}
+
+
+const std::array<Token *, 16>
+&Tokenizer::tokenize(const std::string &operation) {
+  to_tokenize = &operation;
+  current_char_index = 0;
+  
+  while(current_char_index < operation.length()) {
+    size_t unchanged_index = current_char_index;
+    
+    matchOperand(operation);
+    matchParens(operation);
+    matchOperation(operation);
+
+    // No matches.
+    if(unchanged_index == current_char_index)
+      throw TokenizerException("Invalid character detected.");
+  }
+
+  return getTokens();
+}
+
+void Tokenizer::matchOperand(const std::string &operation) {
+  std::smatch match;
+
+  if(current_char_index > operation.length())
+    return;
+
+  std::string remaining(operation.substr(current_char_index)); 
+  bool result = std::regex_search(remaining, match,
+				  operand_regex);
+
+  if(!result)
+    return;
+
+  insertToken(operand, std::stoi(match.str(0)));
+  current_char_index += match.length(0);
+}
+
+void Tokenizer::matchParens(const std::string &operation) {
+  std::smatch match;
+
+  if(current_char_index > operation.length())
+    return;
+
+  std::string remaining(operation.substr(current_char_index)); 
+  bool result = std::regex_search(remaining, match,
+				  operand_regex);
+
+  if(!result)
+    return;
+
+  switch(match.str(0)[0]) {
+  case '(':
+    insertToken(left_parens);
+    break;
+  case ')':
+    insertToken(right_parens);
+    break;
+  }
+
+  current_char_index += 1;
+}
+
+void Tokenizer::matchOperation(const std::string &operation) {
+  std::smatch match;
+
+  if(current_char_index > operation.length())
+    return;
+
+  std::string remaining(operation.substr(current_char_index)); 
+  bool result = std::regex_search(remaining, match,
+				  operand_regex);
+
+  if(!result)
+    return;
+
+  switch(match.str(0)[0]) {
+  case '+':
+    insertToken(sum);
+    break;
+  case '-':
+    insertToken(substraction);
+    break;
+  case '*':
+    insertToken(multiplication);
+    break;
+  case '/':
+    insertToken(division);
+    break;
+  }
+
+  current_char_index += 1;
+}
+
+const std::array<Token *, 16> &Tokenizer::getTokens() {
+  return tokens;
+}
author	HombreLaser <buran@silosneeded.com>	2024-06-22 11:01:41 -0600
committer	HombreLaser <buran@silosneeded.com>	2024-06-22 11:01:41 -0600
commit	10ecf4d95194a273cc6aab89c3a1bcf739a194cd (patch)
tree	9163b606ef00f7e0e147bb240a85032cea6026e2 /src/tokenizer.cpp
parent	0ef2b4da78f51002bdf45ab632c97195ece9a8b7 (diff)