#include #include "include/tokenizer.hpp" #include "exceptions/tokenizer_exception.hpp" Tokenizer::Tokenizer() { tokens_head = 0; for(int i = 0; i < TOKENS_ARRAY_LEN; ++i) tokens[i] = nullptr; } Tokenizer::~Tokenizer() { clearTokens(); } void Tokenizer::clearTokens() { tokens_head = 0; for(int i = 0; i < TOKENS_ARRAY_LEN; ++i) { if(tokens[i] != nullptr) { delete tokens[i]; tokens[i] = nullptr; } } } void Tokenizer::insertToken(Type type, int value = 0) { Token *new_token = new Token { .type = type, .value = value}; tokens[tokens_head] = new_token; tokens_head += 1; } Token **Tokenizer::tokenize(const std::string &operation) { clearTokens(); to_tokenize = &operation; current_char_index = 0; while(current_char_index < operation.length()) { size_t unchanged_index = current_char_index; matchOperand(operation); matchParens(operation); matchOperation(operation); // No matches. if(unchanged_index == current_char_index) throw TokenizerException("Invalid character detected."); } // "Nil" terminated array. Intended to be the epsilon terminal in the grammar. tokens[tokens_head] = new Token { .type = nil, .value = 0 }; return getTokens(); } void Tokenizer::matchOperand(const std::string &operation) { std::smatch match; if(current_char_index > operation.length()) return; std::string remaining(operation.substr(current_char_index)); bool result = std::regex_search(remaining, match, operand_regex); if(!result) return; insertToken(operand, std::stoi(match.str(0))); current_char_index += match.length(0); } void Tokenizer::matchParens(const std::string &operation) { std::smatch match; if(current_char_index > operation.length()) return; std::string remaining(operation.substr(current_char_index)); bool result = std::regex_search(remaining, match, parens_regex); if(!result) return; switch(match.str(0)[0]) { case '(': insertToken(left_parens); break; case ')': insertToken(right_parens); break; } current_char_index += 1; } void Tokenizer::matchOperation(const std::string &operation) { std::smatch match; if(current_char_index > operation.length()) return; std::string remaining(operation.substr(current_char_index)); bool result = std::regex_search(remaining, match, operation_regex); if(!result) return; switch(match.str(0)[0]) { case '+': insertToken(sum); break; case '-': insertToken(substraction); break; case '*': insertToken(multiplication); break; case '/': insertToken(division); break; } current_char_index += 1; } Token **Tokenizer::getTokens() { return tokens; }