TudorLang/include/Lexer.hpp
2025-07-12 00:35:49 +08:00

118 lines
3.7 KiB
C++

#pragma once
#include <array>
#include <cstddef>
#include <exception>
#include <functional>
#include <stdexcept>
#include <string>
#include <variant>
#include <vector>
#include "Lexer.hpp"
#include "Tools.hpp"
namespace ztl{
namespace lexer{
struct Token{std::string data;};
struct Keywords:Token{};
struct Identifiers:Token{};
struct Literals:Token{};
struct Operators:Token{};
struct Separators:Token{};
struct Comments:Token{};
struct EOF_Token:Token{};
struct CharProvider{
std::string buffer;
std::vector<char> white_spaces{'\n','\t',' '};
size_t idx=0;
Result<char> peek(){
if (idx<buffer.size()) {
return Result<char>(buffer[idx]);
}else{
return Result<char>(Err("got EOF"));
}
}
Result<char> consume(){
return ztl::match(peek(), [this](char c){
idx++;
return Result<char>(c);
},[](const Err &e){
return Result<char>(e);
});
}
Result<bool> match(char c){
return ztl::match(peek(),[this](char c){
if(c==buffer[idx]){
idx++;
return Result<bool>(true);
}else{
return Result<bool>(false);
}
},[this](const Err &e){
return Result<bool>(e);
});
}
Result<std::string> consume_until(std::function<bool(char)> func){
size_t start_idx = idx;
while(idx<buffer.size()&&func(buffer[idx])){
idx++;
}
if(idx-start_idx==0){
return Result<std::string>(Err("don't hava any string"));
}
return Result<std::string>(buffer.substr(start_idx,idx-start_idx+1));
}
void skip_whitespace(){
while(1) {
if(
!ztl::match(peek(),[this](char c){
bool is_ws = false;
for(char ws:white_spaces){
if(c==ws){
consume();
is_ws=true;
}
}
return is_ws;
},[](const Err &e){
return false;
})
){
return;
}
}
}
};
inline bool is_digit(char c){
return '0'<=c&&c<='9';
}
inline bool is_alpha(char c){
return 'A'<=c&&c<='z';
}
inline bool is_alphanumeric(char c){
return is_digit(c) || is_alpha(c);
}
inline std::vector<Token> lexer(const std::string&s){
CharProvider cp(s);
std::vector<Token> tokens;
while(1){
bool should_break=false;
cp.skip_whitespace();
match(cp.peek(),[&cp,&tokens](char c){
if (is_alpha(c)){
tokens.push_back(Keywords{cp.consume_until(is_alphanumeric).unwrap()});
}
},[&should_break](const Err &e){
should_break=true;
});
if(should_break)break;
}
return tokens;
}
}
}