TudorLang/include/Lexer.hpp
2025-07-18 21:25:22 +08:00

118 lines
3.7 KiB
C++

#pragma once
#include <cctype>
#include <cstddef>
#include <format>
#include <fstream>
#include <iterator>
#include <ostream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <variant>
#include <vector>
namespace ztl{
namespace readFileToStrType{
struct CannotOpenFile{};
using ReturnType = std::variant<
std::string,
CannotOpenFile
>;
}
inline readFileToStrType::ReturnType readFileToStr(const std::string_view &filePath){
std::ifstream ifs(filePath.data());
if(!ifs){
return readFileToStrType::CannotOpenFile{};
}
std::string s(std::istreambuf_iterator<char>(ifs),{});
return s;
}
#define ZTL_FOR_EACH ZTL_FOR_EACH_V(Keyword)\
ZTL_FOR_EACH_V(Identifier)\
ZTL_FOR_EACH_V(Literal)\
ZTL_FOR_EACH_V(Operator)\
ZTL_FOR_EACH_V(Separator)\
ZTL_FOR_EACH_V(Whitespace)\
#define ZTL_FOR_EACH_V(v)v,
enum class TokenType{
ZTL_FOR_EACH
};
#undef ZTL_FOR_EACH_V
inline std::string getTokenTypeName(TokenType t){
switch (t) {
#define ZTL_FOR_EACH_V(v)case TokenType::v:{return #v;}
ZTL_FOR_EACH
default:
throw std::runtime_error("unknown TokenType");
}
throw std::runtime_error("unreachable");
}
#undef ZTL_FOR_EACH_V
#undef ZTL_FOR_EACH
struct Token{
TokenType type;
std::string str;
friend std::ostream&operator<<(std::ostream&os,const Token &t){
os<<"{ TokenType: "<<getTokenTypeName(t.type)<<", str: "<<t.str<<" }";
return os;
}
};
struct Lexer{
std::vector<Token> tokens;
Lexer(const std::string &s){
size_t line{1},lineStart{};
for(size_t i=0;i<s.size();i++){
if(isalpha(s[i])){
size_t begin = i;
while(isalnum(s[i+1])&&i+1<s.size()){
i++;
}
size_t end = i+1;
std::string nstr = s.substr(begin,end-begin);
const static std::vector<std::string> keywords = {"int","print"};
bool isKeywords = false;
for(const std::string&k:keywords){
if(nstr==k){
isKeywords=true;
break;
}
}
if(isKeywords){
tokens.emplace_back(TokenType::Keyword,std::move(nstr));
}else{
tokens.emplace_back(TokenType::Identifier,std::move(nstr));
}
}else if(s[i]=='\n'){
while(isspace(s[i+1])&&i+1<s.size()){
i++;
}
line++;
lineStart=i+1;
}
else if(isspace(s[i])){
continue;
}else if(s[i]=='='||s[i]=='+'||s[i]=='-'){
tokens.emplace_back(TokenType::Operator,std::string()+s[i]);
}else if(isdigit(s[i])){
size_t begin = i;
while(isdigit(s[i+1])){
i++;
}
tokens.emplace_back(TokenType::Literal,s.substr(begin,i-begin+1));
}
else if(s[i]==';'||s[i]=='('||s[i]==')'){
tokens.emplace_back(TokenType::Separator,std::string()+s[i]);
}
else{
throw std::runtime_error(std::format("unkown char '{}' at line {}:{}",s[i],line,i-lineStart+1));
}
}
}
};
}