212 lines
7.1 KiB
C++
212 lines
7.1 KiB
C++
#pragma once
|
||
|
||
#include <cstddef>
|
||
#include <functional>
|
||
#include <memory>
|
||
#include <ostream>
|
||
#include <stdexcept>
|
||
#include <string>
|
||
#include <type_traits>
|
||
#include <vector>
|
||
#include <cxxabi.h>
|
||
#include "Tools.hpp"
|
||
|
||
namespace ztl{
|
||
namespace lexer{
|
||
inline std::string demangle(const char* mangled_name) {
|
||
int status = 0;
|
||
// 调用 __cxa_demangle 解 mangling
|
||
char* demangled = abi::__cxa_demangle(
|
||
mangled_name,
|
||
nullptr, // 输出缓冲区(nullptr 表示自动分配)
|
||
nullptr, // 缓冲区大小(nullptr 表示自动计算)
|
||
&status // 输出状态(0 表示成功)
|
||
);
|
||
|
||
// 用智能指针管理动态分配的内存,避免泄漏
|
||
std::unique_ptr<char, decltype(&std::free)> demangled_ptr(demangled, std::free);
|
||
|
||
if (status == 0) {
|
||
return std::string(demangled_ptr.get()); // 解 mangling 成功
|
||
} else {
|
||
return std::string(mangled_name); // 失败时返回原始名称
|
||
}
|
||
}
|
||
struct Token{
|
||
std::string data;
|
||
// std::string got_token_type() const {
|
||
// return demangle(typeid(*this).name());
|
||
// }
|
||
};
|
||
struct Keywords:Token{};
|
||
struct Identifiers:Token{};
|
||
struct Literals:Token{};
|
||
struct Operators:Token{};
|
||
struct Separators:Token{};
|
||
struct Comments:Token{};
|
||
struct EndSeparators:Token{};
|
||
struct EOF_Token:Token{};
|
||
|
||
const std::vector<char> white_spaces{'\n','\t',' '};
|
||
const std::vector<char> symbols{'(',')','"'};
|
||
|
||
struct CharProvider{
|
||
std::string buffer;
|
||
|
||
size_t idx=0;
|
||
Result<char> peek(){
|
||
if (idx<buffer.size()) {
|
||
return Result<char>(buffer[idx]);
|
||
}else{
|
||
return Result<char>(Err("got EOF"));
|
||
}
|
||
}
|
||
Result<char> consume(){
|
||
return ztl::match(peek(), [this](char c){
|
||
idx++;
|
||
return Result<char>(c);
|
||
},[](const Err &e){
|
||
return Result<char>(e);
|
||
});
|
||
}
|
||
Result<bool> match(char c){
|
||
return ztl::match(peek(),[this](char c){
|
||
if(c==buffer[idx]){
|
||
idx++;
|
||
return Result<bool>(true);
|
||
}else{
|
||
return Result<bool>(false);
|
||
}
|
||
},[this](const Err &e){
|
||
return Result<bool>(e);
|
||
});
|
||
}
|
||
Result<std::string> consume_until(std::function<bool(char)> func){
|
||
size_t start_idx = idx;
|
||
while(idx<buffer.size()&&func(buffer[idx])){
|
||
idx++;
|
||
}
|
||
if(idx-start_idx==0){
|
||
return Result<std::string>(Err("don't hava any string"));
|
||
}
|
||
idx++;
|
||
return Result<std::string>(buffer.substr(start_idx,idx-start_idx-1));
|
||
}
|
||
void skip_whitespace(){
|
||
while(1) {
|
||
if(
|
||
!ztl::match(peek(),[this](char c){
|
||
bool is_ws = false;
|
||
for(char ws:white_spaces){
|
||
if(c==ws){
|
||
consume();
|
||
is_ws=true;
|
||
}
|
||
}
|
||
return is_ws;
|
||
},[](const Err &e){
|
||
return false;
|
||
})
|
||
){
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
};
|
||
|
||
inline bool is_digit(char c){
|
||
return '0'<=c&&c<='9';
|
||
}
|
||
|
||
inline bool is_alpha(char c){
|
||
return 'A'<=c&&c<='z';
|
||
}
|
||
|
||
inline bool is_alphanumeric(char c){
|
||
return is_digit(c) || is_alpha(c);
|
||
}
|
||
|
||
inline bool is_symbol(char c){
|
||
bool is_symbol = false;
|
||
for(char i:symbols){
|
||
if(c==i){
|
||
is_symbol=true;
|
||
break;
|
||
}
|
||
}
|
||
return is_symbol;
|
||
}
|
||
|
||
inline std::vector<Token> lexer(const std::string&s){
|
||
CharProvider cp(s);
|
||
std::vector<Token> tokens;
|
||
while(1){
|
||
bool should_break=false;
|
||
cp.skip_whitespace();
|
||
match(cp.peek(),[&cp,&tokens](char c){
|
||
if (is_alpha(c)){
|
||
tokens.push_back(Keywords{cp.consume_until(is_alphanumeric).unwrap()});
|
||
}else if(c=='('){
|
||
tokens.push_back(Separators{std::string()+cp.consume().unwrap()});
|
||
match(cp.match(')'),[&cp](bool matched){
|
||
if(!matched){
|
||
throw std::runtime_error("unmatched '('");
|
||
}
|
||
},[](const Err &e){
|
||
throw std::runtime_error(e);
|
||
});
|
||
}else if(c=='"'){
|
||
cp.consume().unwrap();
|
||
tokens.push_back(Literals{cp.consume_until([](char c)->bool{return c!='"';}).unwrap()});
|
||
match(cp.match('"'),[&cp](bool matched){
|
||
if(!matched){
|
||
throw std::runtime_error("unmatched '\"'");
|
||
}
|
||
},[](const Err &e){
|
||
throw std::runtime_error(e);
|
||
});
|
||
}else if(c==';'){
|
||
tokens.push_back(EndSeparators{std::string()+cp.consume().unwrap()});
|
||
}
|
||
else{
|
||
throw std::runtime_error(std::string("unknown token named :'")+c+"'");
|
||
}
|
||
},[&should_break](const Err &e){
|
||
should_break=true;
|
||
});
|
||
if(should_break)break;
|
||
}
|
||
return tokens;
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
namespace std {
|
||
template <typename T>
|
||
concept DerivedFromToken = std::is_base_of_v<ztl::lexer::Token, T>;
|
||
|
||
template<DerivedFromToken T>
|
||
std::ostream&operator<<(std::ostream &os, const T &t){
|
||
string name = ztl::get_T_name<T>();
|
||
// ztl::logger(ztl::demangle(typeid(t).name()));
|
||
// string name = ztl::demangle(typeid(t).name());
|
||
return os<<name<<" {data: '"<<t.data<<"'}";
|
||
}
|
||
|
||
template<class T>
|
||
std::ostream&operator<<(std::ostream &os, const std::vector<T> &v){
|
||
os<<"std::vector<"<<ztl::get_T_name<T>()<<"> "<<"{";
|
||
if(v.size()>0){
|
||
for(size_t i=0;i<v.size()-1;i++){
|
||
os<<" "<<v[i]<<",";
|
||
}
|
||
os<<" "<<v[v.size()-1];
|
||
}else{
|
||
os<<"}";
|
||
return os;
|
||
}
|
||
os<<"}";
|
||
return os;
|
||
}
|
||
} |