update

2025-07-18 18:50:14 +08:00 · 2025-07-18 18:50:14 +08:00 · 126f995ec3
commit 126f995ec3
parent 185c7f785c
6 changed files with 150 additions and 311 deletions
--- a/include/Interpreter.hpp
+++ b/include/Interpreter.hpp
@ -0,0 +1,5 @@
+#pragma once
+
+namespace ztl{
+    
+}
--- a/include/Lexer.hpp
+++ b/include/Lexer.hpp
@ -1,212 +1,118 @@
 #pragma once

+#include <cctype>
 #include <cstddef>
-#include <functional>
-#include <memory>
+#include <format>
+#include <fstream>
+#include <iterator>
 #include <ostream>
 #include <stdexcept>
 #include <string>
-#include <type_traits>
+#include <string_view>
+#include <utility>
+#include <variant>
 #include <vector>
-#include <cxxabi.h>
-#include "Tools.hpp"

 namespace ztl{
-    namespace lexer{
-        inline std::string demangle(const char* mangled_name) {
-        int status = 0;
-        // 调用 __cxa_demangle 解 mangling
-        char* demangled = abi::__cxa_demangle(
-            mangled_name,
-            nullptr,   // 输出缓冲区（nullptr 表示自动分配）
-            nullptr,   // 缓冲区大小（nullptr 表示自动计算）
-            &status    // 输出状态（0 表示成功）
-        );

-        // 用智能指针管理动态分配的内存，避免泄漏
-        std::unique_ptr<char, decltype(&std::free)> demangled_ptr(demangled, std::free);
-
-        if (status == 0) {
-            return std::string(demangled_ptr.get());  // 解 mangling 成功
-        } else {
-            return std::string(mangled_name);  // 失败时返回原始名称
-        }
+    namespace readFileToStrType{
+        struct CannotOpenFile{};
+        using ReturnType = std::variant<
+            std::string,
+            CannotOpenFile
+        >;
    }
-        struct Token{
-            std::string data;
-            // std::string got_token_type() const {
-            //     return demangle(typeid(*this).name());
-            // }
-        };
-        struct Keywords:Token{};
-        struct Identifiers:Token{};
-        struct Literals:Token{};
-        struct Operators:Token{};
-        struct Separators:Token{};
-        struct Comments:Token{};
-        struct EndSeparators:Token{};
-        struct EOF_Token:Token{};
-
-        const std::vector<char> white_spaces{'\n','\t',' '};
-        const std::vector<char> symbols{'(',')','"'};
-
-        struct CharProvider{
-            std::string buffer;
-            
-            size_t idx=0;
-            Result<char> peek(){
-                if (idx<buffer.size()) {
-                    return Result<char>(buffer[idx]);
-                }else{
-                    return Result<char>(Err("got EOF"));
-                }
-            }
-            Result<char> consume(){
-                return ztl::match(peek(), [this](char c){
-                    idx++;
-                    return Result<char>(c);
-                },[](const Err &e){
-                    return Result<char>(e);
-                });
-            }
-            Result<bool> match(char c){
-                return ztl::match(peek(),[this](char c){
-                    if(c==buffer[idx]){
-                        idx++;
-                        return Result<bool>(true);
-                    }else{
-                        return Result<bool>(false);
-                    }
-                },[this](const Err &e){
-                    return Result<bool>(e);
-                });
-            }
-            Result<std::string> consume_until(std::function<bool(char)> func){
-                size_t start_idx = idx;
-                while(idx<buffer.size()&&func(buffer[idx])){
-                    idx++;
-                }
-                if(idx-start_idx==0){
-                    return Result<std::string>(Err("don't hava any string"));
-                }
-                idx++; 
-                return Result<std::string>(buffer.substr(start_idx,idx-start_idx-1));
-            }
-            void skip_whitespace(){
-                while(1) {
-                    if(
-                        !ztl::match(peek(),[this](char c){
-                            bool is_ws = false;
-                            for(char ws:white_spaces){
-                                if(c==ws){
-                                    consume();
-                                    is_ws=true;
-                                }
-                            }
-                            return is_ws;
-                        },[](const Err &e){
-                            return false;
-                        })
-                    ){
-                        return;
-                    }
-                }
-            }
-        };
-
-        inline bool is_digit(char c){
-            return '0'<=c&&c<='9';
+    inline readFileToStrType::ReturnType readFileToStr(const std::string_view &filePath){
+        std::ifstream ifs(filePath.data());
+        if(!ifs){
+            return readFileToStrType::CannotOpenFile{};
        }
-
-        inline bool is_alpha(char c){
-            return 'A'<=c&&c<='z';
-        }
-
-        inline bool is_alphanumeric(char c){
-            return is_digit(c) || is_alpha(c);
-        }
-
-        inline bool is_symbol(char c){
-            bool is_symbol = false;
-            for(char i:symbols){
-                if(c==i){
-                    is_symbol=true;
-                    break;
-                }
-            }
-            return is_symbol;
-        }
-        
-        inline std::vector<Token> lexer(const std::string&s){
-            CharProvider cp(s);
-            std::vector<Token> tokens;
-            while(1){
-                bool should_break=false;
-                cp.skip_whitespace();
-                match(cp.peek(),[&cp,&tokens](char c){
-                    if (is_alpha(c)){
-                        tokens.push_back(Keywords{cp.consume_until(is_alphanumeric).unwrap()});
-                    }else if(c=='('){
-                        tokens.push_back(Separators{std::string()+cp.consume().unwrap()});
-                        match(cp.match(')'),[&cp](bool matched){
-                            if(!matched){
-                                throw std::runtime_error("unmatched '('");
-                            }
-                        },[](const Err &e){
-                            throw std::runtime_error(e);
-                        });
-                    }else if(c=='"'){
-                        cp.consume().unwrap();
-                        tokens.push_back(Literals{cp.consume_until([](char c)->bool{return c!='"';}).unwrap()});
-                        match(cp.match('"'),[&cp](bool matched){
-                            if(!matched){
-                                throw std::runtime_error("unmatched '\"'");
-                            }
-                        },[](const Err &e){
-                            throw std::runtime_error(e);
-                        });
-                    }else if(c==';'){
-                        tokens.push_back(EndSeparators{std::string()+cp.consume().unwrap()});
-                    }
-                    else{
-                        throw std::runtime_error(std::string("unknown token named :'")+c+"'");
-                    }
-                },[&should_break](const Err &e){
-                    should_break=true;
-                });
-                if(should_break)break;
-            }
-            return tokens;
-        }
-    }
-    
-}
-
-namespace std {
-    template <typename T>
-    concept DerivedFromToken = std::is_base_of_v<ztl::lexer::Token, T>;
-
-    template<DerivedFromToken T>
-    std::ostream&operator<<(std::ostream &os, const T &t){
-        string name = ztl::get_T_name<T>();
-        // ztl::logger(ztl::demangle(typeid(t).name()));
-        // string name = ztl::demangle(typeid(t).name());
-        return os<<name<<" {data: '"<<t.data<<"'}";
+        std::string s(std::istreambuf_iterator<char>(ifs),{});
+        return s;
    }

-    template<class T>
-    std::ostream&operator<<(std::ostream &os, const std::vector<T> &v){
-        os<<"std::vector<"<<ztl::get_T_name<T>()<<"> "<<"{";
-        if(v.size()>0){
-            for(size_t i=0;i<v.size()-1;i++){
-                os<<" "<<v[i]<<",";
-            }
-            os<<" "<<v[v.size()-1];
-        }else{
-            os<<"}";
+    #define FOR_EACH FOR_EACH_V(Keyword)\
+    FOR_EACH_V(Identifier)\
+    FOR_EACH_V(Literal)\
+    FOR_EACH_V(Operator)\
+    FOR_EACH_V(Separator)\
+    FOR_EACH_V(Whitespace)\
+
+    #define FOR_EACH_V(v)v,
+    enum class TokenType{
+        FOR_EACH
+    };
+    #undef FOR_EACH_V
+    inline std::string getTokenTypeName(TokenType t){
+        switch (t) {
+            #define FOR_EACH_V(v)case TokenType::v:{return #v;}
+            FOR_EACH
+            default:
+                throw std::runtime_error("unknown TokenType");
+        }
+        throw std::runtime_error("unreachable");
+    }
+    #undef FOR_EACH_V
+    #undef FOR_EACH
+
+    struct Token{
+        TokenType type;
+        std::string str;
+        friend std::ostream&operator<<(std::ostream&os,const Token &t){
+            os<<"{ TokenType: "<<getTokenTypeName(t.type)<<", str: "<<t.str<<" }";
            return os;
        }
-        os<<"}";
-        return os;
-    }
+    };
+    struct Lexer{
+        std::vector<Token> tokens;
+        Lexer(const std::string &s){
+            size_t line{1},lineStart{};
+            for(size_t i=0;i<s.size();i++){
+                if(isalpha(s[i])){
+                    size_t begin = i;
+                    while(isalnum(s[i+1])&&i+1<s.size()){
+                        i++;
+                    }
+                    size_t end = i+1;
+                    std::string nstr = s.substr(begin,end-begin);
+                    const static std::vector<std::string> keywords = {"int","print"};
+                    bool isKeywords = false;
+                    for(const std::string&k:keywords){
+                        if(nstr==k){
+                            isKeywords=true;
+                            break;
+                        }
+                    }
+                    if(isKeywords){
+                        tokens.emplace_back(TokenType::Keyword,std::move(nstr));
+                    }else{
+                        tokens.emplace_back(TokenType::Identifier,std::move(nstr));
+                    }
+                }else if(s[i]=='\n'){
+                    while(isspace(s[i+1])&&i+1<s.size()){
+                        i++;
+                    }
+                    line++;
+                    lineStart=i+1;
+                }
+                else if(isspace(s[i])){
+                    continue;
+                }else if(s[i]=='='){
+                    tokens.emplace_back(TokenType::Operator,"=");
+                }else if(isdigit(s[i])){
+                    size_t begin = i;
+                    while(isdigit(s[i])){
+                        i++;
+                    }
+                    tokens.emplace_back(TokenType::Literal,s.substr(begin,i-begin));
+                }
+                else if(s[i]==';'||s[i]=='('||s[i]==')'){
+                    tokens.emplace_back(TokenType::Separator,std::string()+s[i]);
+                }
+                else{
+                    throw std::runtime_error(std::format("unkown char '{}' at line {}:{}",s[i],line,i-lineStart+1));
+                }
+            }
+        }
+    };
 }
--- a/include/Tools.hpp
+++ b/include/Tools.hpp
@ -1,98 +1,27 @@
 #pragma once
+
 #include <cstddef>
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <utility>
-#include <variant>
+#include <ostream>
 #include <vector>
-#include<fstream>
-
-namespace ztl{
+namespace ztl {
    template<class ...Ts>
-    void logger(Ts&&...v){
-        #ifndef NDEBUG
-        std::cout<<"log: ";
-        (std::cout<<...<<v);
-        std::cout<<"\n";
-        #endif
-    }
-
-    template<class T>
-    std::string get_type_detail(){
-        return __PRETTY_FUNCTION__;
-    }
-
-    template<class T>
-    std::string get_T_name(){
-        std::string type_detail = get_type_detail<T>();
-        // logger(type_detail);
-        size_t start = type_detail.find("T = ");
-        if(start == std::string::npos) {
-            throw std::runtime_error("Failed to get type name");
-        }
-        size_t end = type_detail.find(';', start);
-        if(end == std::string::npos) {
-            end = type_detail.size()-1;
-        }
-        // logger(type_detail.substr(start + 4, end - start - 4));
-        return type_detail.substr(start + 4, end - start - 4);
-    }
-
+    struct overloaded : Ts... {using Ts::operator()...;};
    template<class ...Ts>
-    struct overloaded:Ts...{using Ts::operator()...;};
-
-    template<class ...Ts>
-    overloaded(Ts...)->overloaded<Ts...>;
-
-    template<class ...Ts, class ...Fs>
-    auto match(std::variant<Ts...>& v, Fs&&... func) {
-        return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, v);
-    }
-    template<class ...Ts, class ...Fs>
-    auto match(const std::variant<Ts...>& v, Fs&&... func) {
-        return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, v);
-    }
-    template<class ...Ts, class ...Fs>
-    auto match(std::variant<Ts...>&& v, Fs&&... func) {
-        return std::visit(ztl::overloaded{std::forward<Fs>(func)...}, std::move(v));
-    }
-    struct Err:std::string{};
-
-    // template<class T>
-    // struct Ok{
-    //     T data;
-    // };
-
-    template<class T>
-    struct Result:std::variant<T,Err>{
-        T unwrap(){
-            return match<T>(*this,[](const T&t){
-                return t;
-            },[](const Err &e){
-                throw std::runtime_error(e);
-                T t;
-                return t;
-            });
-        }
-    };
-
-    inline Result<std::string> get_argv(size_t idx){
-        if(idx>=__argc){
-            return Result<std::string>(Err("argv's index out of range"));
-        }else{
-            return Result<std::string>(std::string(__argv[idx]));
-        }
-    }
-
-    inline Result<std::string> get_string_from_file(const std::string &file_path){
-        std::ifstream file(file_path);
-        if (!file) {
-            return Result<std::string>(Err("Could not open file: " + file_path));
-        }
-        std::string content((std::istreambuf_iterator<char>(file)),
-                            std::istreambuf_iterator<char>());
-        return Result<std::string>(content);
-    }
+    overloaded (Ts...) -> overloaded<Ts...>;
 }

+namespace std{
+    template<class T>
+    ostream&operator<<(ostream&os,const vector<T>&v){
+        if(v.size()==0){
+            os<<"[]";
+            return os;
+        }
+        os<<"[";
+        for(size_t i=0;i<v.size()-1;i++){
+            os<<" "<<v[i]<<",";
+        }
+        os<<" "<<v[v.size()-1]<<"]";
+        return os;
+    }
+}
--- a/src/main.cpp
+++ b/src/main.cpp
@ -1,11 +1,26 @@
 #include "Lexer.hpp"
 #include "Tools.hpp"
+#include <format>
+#include <iostream>
+#include <stdexcept>
 #include <string>
+#include <variant>

-int main(){
-    std::string content = ztl::get_string_from_file(ztl::get_argv(1).unwrap()).unwrap();
-    ztl::logger("Reading file: ", ztl::get_argv(1).unwrap());
-    ztl::logger("File content:\n", content);
-    std::vector<ztl::lexer::Token> tokens = ztl::lexer::lexer(content);
-    ztl::logger("Tokens parsed:\n", tokens);
+
+int main(int argc,char *argv[]){
+    if(argc<2){
+        throw std::runtime_error("Usage : ztl <filepath>");
+    }
+    ztl::Lexer lexer = std::visit(ztl::overloaded{
+        [](std::string &&s){
+            return s;
+        },
+        [&argv](const ztl::readFileToStrType::CannotOpenFile &){
+            throw std::runtime_error(std::format("Cannot open file :{}",argv[1]));
+            return std::string();
+        }
+    },ztl::readFileToStr(argv[1]));
+
+    std::cout<<"Lexer() return vector:\n"<<lexer.tokens<<'\n';
+    
 }
--- a/tests/main.ztl
+++ b/tests/main.ztl
@ -1 +1,2 @@
-print("hello world");
+int a =1;
+print(a);
--- a/tests/test.cpp
+++ b/tests/test.cpp
@ -1,20 +1,3 @@
-#include "Lexer.hpp"
-#include "Tools.hpp"
-#include <iostream>
-#include <string>
-#include <vector>
-
-#define NV(v) {std::cout<<#v<<" : '"<<(v)<<"'\n";}
-
-
-
 int main(){
-    ztl::get_T_name<ztl::lexer::Keywords>();
-    std::string hello_world = R"(
-        print("hello ztl!");
-    )";
-    std::cout<<"This is a ztl's test\n";
-    NV(hello_world)
-    std::vector<ztl::lexer::Token> lexer_hello_world = ztl::lexer::lexer(hello_world);
-    ztl::logger(lexer_hello_world);
+    
 }