update
This commit is contained in:
parent
185c7f785c
commit
126f995ec3
5
include/Interpreter.hpp
Normal file
5
include/Interpreter.hpp
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
namespace ztl{
|
||||
|
||||
}
|
@ -1,212 +1,118 @@
|
||||
#pragma once
|
||||
|
||||
#include <cctype>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <format>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <ostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
#include <cxxabi.h>
|
||||
#include "Tools.hpp"
|
||||
|
||||
namespace ztl{
|
||||
namespace lexer{
|
||||
inline std::string demangle(const char* mangled_name) {
|
||||
int status = 0;
|
||||
// 调用 __cxa_demangle 解 mangling
|
||||
char* demangled = abi::__cxa_demangle(
|
||||
mangled_name,
|
||||
nullptr, // 输出缓冲区(nullptr 表示自动分配)
|
||||
nullptr, // 缓冲区大小(nullptr 表示自动计算)
|
||||
&status // 输出状态(0 表示成功)
|
||||
);
|
||||
|
||||
// 用智能指针管理动态分配的内存,避免泄漏
|
||||
std::unique_ptr<char, decltype(&std::free)> demangled_ptr(demangled, std::free);
|
||||
|
||||
if (status == 0) {
|
||||
return std::string(demangled_ptr.get()); // 解 mangling 成功
|
||||
} else {
|
||||
return std::string(mangled_name); // 失败时返回原始名称
|
||||
}
|
||||
namespace readFileToStrType{
|
||||
struct CannotOpenFile{};
|
||||
using ReturnType = std::variant<
|
||||
std::string,
|
||||
CannotOpenFile
|
||||
>;
|
||||
}
|
||||
struct Token{
|
||||
std::string data;
|
||||
// std::string got_token_type() const {
|
||||
// return demangle(typeid(*this).name());
|
||||
// }
|
||||
};
|
||||
struct Keywords:Token{};
|
||||
struct Identifiers:Token{};
|
||||
struct Literals:Token{};
|
||||
struct Operators:Token{};
|
||||
struct Separators:Token{};
|
||||
struct Comments:Token{};
|
||||
struct EndSeparators:Token{};
|
||||
struct EOF_Token:Token{};
|
||||
|
||||
const std::vector<char> white_spaces{'\n','\t',' '};
|
||||
const std::vector<char> symbols{'(',')','"'};
|
||||
|
||||
struct CharProvider{
|
||||
std::string buffer;
|
||||
|
||||
size_t idx=0;
|
||||
Result<char> peek(){
|
||||
if (idx<buffer.size()) {
|
||||
return Result<char>(buffer[idx]);
|
||||
}else{
|
||||
return Result<char>(Err("got EOF"));
|
||||
}
|
||||
}
|
||||
Result<char> consume(){
|
||||
return ztl::match(peek(), [this](char c){
|
||||
idx++;
|
||||
return Result<char>(c);
|
||||
},[](const Err &e){
|
||||
return Result<char>(e);
|
||||
});
|
||||
}
|
||||
Result<bool> match(char c){
|
||||
return ztl::match(peek(),[this](char c){
|
||||
if(c==buffer[idx]){
|
||||
idx++;
|
||||
return Result<bool>(true);
|
||||
}else{
|
||||
return Result<bool>(false);
|
||||
}
|
||||
},[this](const Err &e){
|
||||
return Result<bool>(e);
|
||||
});
|
||||
}
|
||||
Result<std::string> consume_until(std::function<bool(char)> func){
|
||||
size_t start_idx = idx;
|
||||
while(idx<buffer.size()&&func(buffer[idx])){
|
||||
idx++;
|
||||
}
|
||||
if(idx-start_idx==0){
|
||||
return Result<std::string>(Err("don't hava any string"));
|
||||
}
|
||||
idx++;
|
||||
return Result<std::string>(buffer.substr(start_idx,idx-start_idx-1));
|
||||
}
|
||||
void skip_whitespace(){
|
||||
while(1) {
|
||||
if(
|
||||
!ztl::match(peek(),[this](char c){
|
||||
bool is_ws = false;
|
||||
for(char ws:white_spaces){
|
||||
if(c==ws){
|
||||
consume();
|
||||
is_ws=true;
|
||||
}
|
||||
}
|
||||
return is_ws;
|
||||
},[](const Err &e){
|
||||
return false;
|
||||
})
|
||||
){
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline bool is_digit(char c){
|
||||
return '0'<=c&&c<='9';
|
||||
inline readFileToStrType::ReturnType readFileToStr(const std::string_view &filePath){
|
||||
std::ifstream ifs(filePath.data());
|
||||
if(!ifs){
|
||||
return readFileToStrType::CannotOpenFile{};
|
||||
}
|
||||
|
||||
inline bool is_alpha(char c){
|
||||
return 'A'<=c&&c<='z';
|
||||
}
|
||||
|
||||
inline bool is_alphanumeric(char c){
|
||||
return is_digit(c) || is_alpha(c);
|
||||
}
|
||||
|
||||
inline bool is_symbol(char c){
|
||||
bool is_symbol = false;
|
||||
for(char i:symbols){
|
||||
if(c==i){
|
||||
is_symbol=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return is_symbol;
|
||||
}
|
||||
|
||||
inline std::vector<Token> lexer(const std::string&s){
|
||||
CharProvider cp(s);
|
||||
std::vector<Token> tokens;
|
||||
while(1){
|
||||
bool should_break=false;
|
||||
cp.skip_whitespace();
|
||||
match(cp.peek(),[&cp,&tokens](char c){
|
||||
if (is_alpha(c)){
|
||||
tokens.push_back(Keywords{cp.consume_until(is_alphanumeric).unwrap()});
|
||||
}else if(c=='('){
|
||||
tokens.push_back(Separators{std::string()+cp.consume().unwrap()});
|
||||
match(cp.match(')'),[&cp](bool matched){
|
||||
if(!matched){
|
||||
throw std::runtime_error("unmatched '('");
|
||||
}
|
||||
},[](const Err &e){
|
||||
throw std::runtime_error(e);
|
||||
});
|
||||
}else if(c=='"'){
|
||||
cp.consume().unwrap();
|
||||
tokens.push_back(Literals{cp.consume_until([](char c)->bool{return c!='"';}).unwrap()});
|
||||
match(cp.match('"'),[&cp](bool matched){
|
||||
if(!matched){
|
||||
throw std::runtime_error("unmatched '\"'");
|
||||
}
|
||||
},[](const Err &e){
|
||||
throw std::runtime_error(e);
|
||||
});
|
||||
}else if(c==';'){
|
||||
tokens.push_back(EndSeparators{std::string()+cp.consume().unwrap()});
|
||||
}
|
||||
else{
|
||||
throw std::runtime_error(std::string("unknown token named :'")+c+"'");
|
||||
}
|
||||
},[&should_break](const Err &e){
|
||||
should_break=true;
|
||||
});
|
||||
if(should_break)break;
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template <typename T>
|
||||
concept DerivedFromToken = std::is_base_of_v<ztl::lexer::Token, T>;
|
||||
|
||||
template<DerivedFromToken T>
|
||||
std::ostream&operator<<(std::ostream &os, const T &t){
|
||||
string name = ztl::get_T_name<T>();
|
||||
// ztl::logger(ztl::demangle(typeid(t).name()));
|
||||
// string name = ztl::demangle(typeid(t).name());
|
||||
return os<<name<<" {data: '"<<t.data<<"'}";
|
||||
std::string s(std::istreambuf_iterator<char>(ifs),{});
|
||||
return s;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::ostream&operator<<(std::ostream &os, const std::vector<T> &v){
|
||||
os<<"std::vector<"<<ztl::get_T_name<T>()<<"> "<<"{";
|
||||
if(v.size()>0){
|
||||
for(size_t i=0;i<v.size()-1;i++){
|
||||
os<<" "<<v[i]<<",";
|
||||
}
|
||||
os<<" "<<v[v.size()-1];
|
||||
}else{
|
||||
os<<"}";
|
||||
#define FOR_EACH FOR_EACH_V(Keyword)\
|
||||
FOR_EACH_V(Identifier)\
|
||||
FOR_EACH_V(Literal)\
|
||||
FOR_EACH_V(Operator)\
|
||||
FOR_EACH_V(Separator)\
|
||||
FOR_EACH_V(Whitespace)\
|
||||
|
||||
#define FOR_EACH_V(v)v,
|
||||
enum class TokenType{
|
||||
FOR_EACH
|
||||
};
|
||||
#undef FOR_EACH_V
|
||||
inline std::string getTokenTypeName(TokenType t){
|
||||
switch (t) {
|
||||
#define FOR_EACH_V(v)case TokenType::v:{return #v;}
|
||||
FOR_EACH
|
||||
default:
|
||||
throw std::runtime_error("unknown TokenType");
|
||||
}
|
||||
throw std::runtime_error("unreachable");
|
||||
}
|
||||
#undef FOR_EACH_V
|
||||
#undef FOR_EACH
|
||||
|
||||
struct Token{
|
||||
TokenType type;
|
||||
std::string str;
|
||||
friend std::ostream&operator<<(std::ostream&os,const Token &t){
|
||||
os<<"{ TokenType: "<<getTokenTypeName(t.type)<<", str: "<<t.str<<" }";
|
||||
return os;
|
||||
}
|
||||
os<<"}";
|
||||
return os;
|
||||
}
|
||||
};
|
||||
struct Lexer{
|
||||
std::vector<Token> tokens;
|
||||
Lexer(const std::string &s){
|
||||
size_t line{1},lineStart{};
|
||||
for(size_t i=0;i<s.size();i++){
|
||||
if(isalpha(s[i])){
|
||||
size_t begin = i;
|
||||
while(isalnum(s[i+1])&&i+1<s.size()){
|
||||
i++;
|
||||
}
|
||||
size_t end = i+1;
|
||||
std::string nstr = s.substr(begin,end-begin);
|
||||
const static std::vector<std::string> keywords = {"int","print"};
|
||||
bool isKeywords = false;
|
||||
for(const std::string&k:keywords){
|
||||
if(nstr==k){
|
||||
isKeywords=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(isKeywords){
|
||||
tokens.emplace_back(TokenType::Keyword,std::move(nstr));
|
||||
}else{
|
||||
tokens.emplace_back(TokenType::Identifier,std::move(nstr));
|
||||
}
|
||||
}else if(s[i]=='\n'){
|
||||
while(isspace(s[i+1])&&i+1<s.size()){
|
||||
i++;
|
||||
}
|
||||
line++;
|
||||
lineStart=i+1;
|
||||
}
|
||||
else if(isspace(s[i])){
|
||||
continue;
|
||||
}else if(s[i]=='='){
|
||||
tokens.emplace_back(TokenType::Operator,"=");
|
||||
}else if(isdigit(s[i])){
|
||||
size_t begin = i;
|
||||
while(isdigit(s[i])){
|
||||
i++;
|
||||
}
|
||||
tokens.emplace_back(TokenType::Literal,s.substr(begin,i-begin));
|
||||
}
|
||||
else if(s[i]==';'||s[i]=='('||s[i]==')'){
|
||||
tokens.emplace_back(TokenType::Separator,std::string()+s[i]);
|
||||
}
|
||||
else{
|
||||
throw std::runtime_error(std::format("unkown char '{}' at line {}:{}",s[i],line,i-lineStart+1));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
@ -1,98 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include<fstream>
|
||||
|
||||
namespace ztl{
|
||||
namespace ztl {
|
||||
template<class ...Ts>
|
||||
void logger(Ts&&...v){
|
||||
#ifndef NDEBUG
|
||||
std::cout<<"log: ";
|
||||
(std::cout<<...<<v);
|
||||
std::cout<<"\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::string get_type_detail(){
|
||||
return __PRETTY_FUNCTION__;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
std::string get_T_name(){
|
||||
std::string type_detail = get_type_detail<T>();
|
||||
// logger(type_detail);
|
||||
size_t start = type_detail.find("T = ");
|
||||
if(start == std::string::npos) {
|
||||
throw std::runtime_error("Failed to get type name");
|
||||
}
|
||||
size_t end = type_detail.find(';', start);
|
||||
if(end == std::string::npos) {
|
||||
end = type_detail.size()-1;
|
||||
}
|
||||
// logger(type_detail.substr(start + 4, end - start - 4));
|
||||
return type_detail.substr(start + 4, end - start - 4);
|
||||
}
|
||||
|
||||
struct overloaded : Ts... {using Ts::operator()...;};
|
||||
template<class ...Ts>
|
||||
struct overloaded:Ts...{using Ts::operator()...;};
|
||||
|
||||
template<class ...Ts>
|
||||
overloaded(Ts...)->overloaded<Ts...>;
|
||||
|
||||
template<class ...Ts, class ...Fs>
|
||||
auto match(std::variant<Ts...>& v, Fs&&... func) {
|
||||
return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, v);
|
||||
}
|
||||
template<class ...Ts, class ...Fs>
|
||||
auto match(const std::variant<Ts...>& v, Fs&&... func) {
|
||||
return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, v);
|
||||
}
|
||||
template<class ...Ts, class ...Fs>
|
||||
auto match(std::variant<Ts...>&& v, Fs&&... func) {
|
||||
return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, std::move(v));
|
||||
}
|
||||
struct Err:std::string{};
|
||||
|
||||
// template<class T>
|
||||
// struct Ok{
|
||||
// T data;
|
||||
// };
|
||||
|
||||
template<class T>
|
||||
struct Result:std::variant<T,Err>{
|
||||
T unwrap(){
|
||||
return match<T>(*this,[](const T&t){
|
||||
return t;
|
||||
},[](const Err &e){
|
||||
throw std::runtime_error(e);
|
||||
T t;
|
||||
return t;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
inline Result<std::string> get_argv(size_t idx){
|
||||
if(idx>=__argc){
|
||||
return Result<std::string>(Err("argv's index out of range"));
|
||||
}else{
|
||||
return Result<std::string>(std::string(__argv[idx]));
|
||||
}
|
||||
}
|
||||
|
||||
inline Result<std::string> get_string_from_file(const std::string &file_path){
|
||||
std::ifstream file(file_path);
|
||||
if (!file) {
|
||||
return Result<std::string>(Err("Could not open file: " + file_path));
|
||||
}
|
||||
std::string content((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
return Result<std::string>(content);
|
||||
}
|
||||
overloaded (Ts...) -> overloaded<Ts...>;
|
||||
}
|
||||
|
||||
namespace std{
|
||||
template<class T>
|
||||
ostream&operator<<(ostream&os,const vector<T>&v){
|
||||
if(v.size()==0){
|
||||
os<<"[]";
|
||||
return os;
|
||||
}
|
||||
os<<"[";
|
||||
for(size_t i=0;i<v.size()-1;i++){
|
||||
os<<" "<<v[i]<<",";
|
||||
}
|
||||
os<<" "<<v[v.size()-1]<<"]";
|
||||
return os;
|
||||
}
|
||||
}
|
27
src/main.cpp
27
src/main.cpp
@ -1,11 +1,26 @@
|
||||
#include "Lexer.hpp"
|
||||
#include "Tools.hpp"
|
||||
#include <format>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <variant>
|
||||
|
||||
int main(){
|
||||
std::string content = ztl::get_string_from_file(ztl::get_argv(1).unwrap()).unwrap();
|
||||
ztl::logger("Reading file: ", ztl::get_argv(1).unwrap());
|
||||
ztl::logger("File content:\n", content);
|
||||
std::vector<ztl::lexer::Token> tokens = ztl::lexer::lexer(content);
|
||||
ztl::logger("Tokens parsed:\n", tokens);
|
||||
|
||||
int main(int argc,char *argv[]){
|
||||
if(argc<2){
|
||||
throw std::runtime_error("Usage : ztl <filepath>");
|
||||
}
|
||||
ztl::Lexer lexer = std::visit(ztl::overloaded{
|
||||
[](std::string &&s){
|
||||
return s;
|
||||
},
|
||||
[&argv](const ztl::readFileToStrType::CannotOpenFile &){
|
||||
throw std::runtime_error(std::format("Cannot open file :{}",argv[1]));
|
||||
return std::string();
|
||||
}
|
||||
},ztl::readFileToStr(argv[1]));
|
||||
|
||||
std::cout<<"Lexer() return vector:\n"<<lexer.tokens<<'\n';
|
||||
|
||||
}
|
@ -1 +1,2 @@
|
||||
print("hello world");
|
||||
int a =1;
|
||||
print(a);
|
@ -1,20 +1,3 @@
|
||||
#include "Lexer.hpp"
|
||||
#include "Tools.hpp"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define NV(v) {std::cout<<#v<<" : '"<<(v)<<"'\n";}
|
||||
|
||||
|
||||
|
||||
int main(){
|
||||
ztl::get_T_name<ztl::lexer::Keywords>();
|
||||
std::string hello_world = R"(
|
||||
print("hello ztl!");
|
||||
)";
|
||||
std::cout<<"This is a ztl's test\n";
|
||||
NV(hello_world)
|
||||
std::vector<ztl::lexer::Token> lexer_hello_world = ztl::lexer::lexer(hello_world);
|
||||
ztl::logger(lexer_hello_world);
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user