118 lines
3.7 KiB
C++
118 lines
3.7 KiB
C++
#pragma once
|
|
|
|
#include <array>
|
|
#include <cstddef>
|
|
#include <exception>
|
|
#include <functional>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <variant>
|
|
#include <vector>
|
|
#include "Lexer.hpp"
|
|
#include "Tools.hpp"
|
|
|
|
namespace ztl{
|
|
namespace lexer{
|
|
struct Token{std::string data;};
|
|
struct Keywords:Token{};
|
|
struct Identifiers:Token{};
|
|
struct Literals:Token{};
|
|
struct Operators:Token{};
|
|
struct Separators:Token{};
|
|
struct Comments:Token{};
|
|
struct EOF_Token:Token{};
|
|
|
|
struct CharProvider{
|
|
std::string buffer;
|
|
std::vector<char> white_spaces{'\n','\t',' '};
|
|
size_t idx=0;
|
|
Result<char> peek(){
|
|
if (idx<buffer.size()) {
|
|
return Result<char>(buffer[idx]);
|
|
}else{
|
|
return Result<char>(Err("got EOF"));
|
|
}
|
|
}
|
|
Result<char> consume(){
|
|
return ztl::match(peek(), [this](char c){
|
|
idx++;
|
|
return Result<char>(c);
|
|
},[](const Err &e){
|
|
return Result<char>(e);
|
|
});
|
|
}
|
|
Result<bool> match(char c){
|
|
return ztl::match(peek(),[this](char c){
|
|
if(c==buffer[idx]){
|
|
idx++;
|
|
return Result<bool>(true);
|
|
}else{
|
|
return Result<bool>(false);
|
|
}
|
|
},[this](const Err &e){
|
|
return Result<bool>(e);
|
|
});
|
|
}
|
|
Result<std::string> consume_until(std::function<bool(char)> func){
|
|
size_t start_idx = idx;
|
|
while(idx<buffer.size()&&func(buffer[idx])){
|
|
idx++;
|
|
}
|
|
if(idx-start_idx==0){
|
|
return Result<std::string>(Err("don't hava any string"));
|
|
}
|
|
return Result<std::string>(buffer.substr(start_idx,idx-start_idx+1));
|
|
}
|
|
void skip_whitespace(){
|
|
while(1) {
|
|
if(
|
|
!ztl::match(peek(),[this](char c){
|
|
bool is_ws = false;
|
|
for(char ws:white_spaces){
|
|
if(c==ws){
|
|
consume();
|
|
is_ws=true;
|
|
}
|
|
}
|
|
return is_ws;
|
|
},[](const Err &e){
|
|
return false;
|
|
})
|
|
){
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
inline bool is_digit(char c){
|
|
return '0'<=c&&c<='9';
|
|
}
|
|
|
|
inline bool is_alpha(char c){
|
|
return 'A'<=c&&c<='z';
|
|
}
|
|
|
|
inline bool is_alphanumeric(char c){
|
|
return is_digit(c) || is_alpha(c);
|
|
}
|
|
|
|
inline std::vector<Token> lexer(const std::string&s){
|
|
CharProvider cp(s);
|
|
std::vector<Token> tokens;
|
|
while(1){
|
|
bool should_break=false;
|
|
cp.skip_whitespace();
|
|
match(cp.peek(),[&cp,&tokens](char c){
|
|
if (is_alpha(c)){
|
|
tokens.push_back(Keywords{cp.consume_until(is_alphanumeric).unwrap()});
|
|
}
|
|
},[&should_break](const Err &e){
|
|
should_break=true;
|
|
});
|
|
if(should_break)break;
|
|
}
|
|
return tokens;
|
|
}
|
|
}
|
|
} |