From 10ecf4d95194a273cc6aab89c3a1bcf739a194cd Mon Sep 17 00:00:00 2001 From: HombreLaser Date: Sat, 22 Jun 2024 11:01:41 -0600 Subject: Add tokenizer --- src/tokenizer.cpp | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/tokenizer.cpp (limited to 'src/tokenizer.cpp') diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp new file mode 100644 index 0000000..a156000 --- /dev/null +++ b/src/tokenizer.cpp @@ -0,0 +1,125 @@ +#include +#include +#include "include/tokenizer.hpp" +#include "exceptions/tokenizer_exception.hpp" + +Tokenizer::Tokenizer() { + tokens.fill(nullptr); + tokens_head = 0; +} + +Tokenizer::~Tokenizer() { + clearTokens(); +} + +void Tokenizer::clearTokens() { + for(auto token{ tokens.begin() }; *token != nullptr; ++token) + delete token; + + tokens.fill(nullptr); +} + +void Tokenizer::insertToken(Type type, int value = 0) { + Token *new_token = new Token { .type = type, .value = value}; + + tokens[tokens_head] = new_token; + tokens_head += 1; +} + + +const std::array +&Tokenizer::tokenize(const std::string &operation) { + to_tokenize = &operation; + current_char_index = 0; + + while(current_char_index < operation.length()) { + size_t unchanged_index = current_char_index; + + matchOperand(operation); + matchParens(operation); + matchOperation(operation); + + // No matches. + if(unchanged_index == current_char_index) + throw TokenizerException("Invalid character detected."); + } + + return getTokens(); +} + +void Tokenizer::matchOperand(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + insertToken(operand, std::stoi(match.str(0))); + current_char_index += match.length(0); +} + +void Tokenizer::matchParens(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + switch(match.str(0)[0]) { + case '(': + insertToken(left_parens); + break; + case ')': + insertToken(right_parens); + break; + } + + current_char_index += 1; +} + +void Tokenizer::matchOperation(const std::string &operation) { + std::smatch match; + + if(current_char_index > operation.length()) + return; + + std::string remaining(operation.substr(current_char_index)); + bool result = std::regex_search(remaining, match, + operand_regex); + + if(!result) + return; + + switch(match.str(0)[0]) { + case '+': + insertToken(sum); + break; + case '-': + insertToken(substraction); + break; + case '*': + insertToken(multiplication); + break; + case '/': + insertToken(division); + break; + } + + current_char_index += 1; +} + +const std::array &Tokenizer::getTokens() { + return tokens; +} -- cgit v1.2.3