diff options
author | HombreLaser <buran@silosneeded.com> | 2024-06-22 11:01:41 -0600 |
---|---|---|
committer | HombreLaser <buran@silosneeded.com> | 2024-06-22 11:01:41 -0600 |
commit | 10ecf4d95194a273cc6aab89c3a1bcf739a194cd (patch) | |
tree | 9163b606ef00f7e0e147bb240a85032cea6026e2 /src/tokenizer.cpp | |
parent | 0ef2b4da78f51002bdf45ab632c97195ece9a8b7 (diff) |
Add tokenizer
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r-- | src/tokenizer.cpp | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..a156000 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,125 @@ +#include <string> +#include <array> +#include "include/tokenizer.hpp" +#include "exceptions/tokenizer_exception.hpp" + +Tokenizer::Tokenizer() { + tokens.fill(nullptr); + tokens_head = 0; +} + +Tokenizer::~Tokenizer() { + clearTokens(); +} + +void Tokenizer::clearTokens() { + for(auto token{ tokens.begin() }; *token != nullptr; ++token) + delete token; + + tokens.fill(nullptr); +} + +void Tokenizer::insertToken(Type type, int value = 0) { + Token *new_token = new Token { .type = type, .value = value}; + + tokens[tokens_head] = new_token; + tokens_head += 1; +} + + +const std::array<Token *, 16> +&Tokenizer::tokenize(const std::string &operation) { + to_tokenize = &operation; + current_char_index = 0; + + while(current_char_index < operation.length()) { + size_t unchanged_index = current_char_index; + + matchOperand(operation); + matchParens(operation); + matchOperation(operation); + + // No matches. + if(unchanged_index == current_char_index) + throw TokenizerException("Invalid character detected."); + } + + return getTokens(); +} + +void Tokenizer::matchOperand(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + insertToken(operand, std::stoi(match.str(0))); + current_char_index += match.length(0); +} + +void Tokenizer::matchParens(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + switch(match.str(0)[0]) { + case '(': + insertToken(left_parens); + break; + case ')': + insertToken(right_parens); + break; + } + + current_char_index += 1; +} + +void Tokenizer::matchOperation(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + switch(match.str(0)[0]) { + case '+': + insertToken(sum); + break; + case '-': + insertToken(substraction); + break; + case '*': + insertToken(multiplication); + break; + case '/': + insertToken(division); + break; + } + + current_char_index += 1; +} + +const std::array<Token *, 16> &Tokenizer::getTokens() { + return tokens; +} |