From dbd9d0cf957b24774e8a6a6e4758f113b276af12 Mon Sep 17 00:00:00 2001 From: zs Date: Sun, 30 Nov 2025 21:15:05 +0800 Subject: [PATCH] feat: export DB into souffle facts to allow offline analysis --- .gitignore | 1 + CMakeLists.txt | 1 + ccls_schema.dl | 25 +++ src/main.cc | 10 +- src/pipeline.cc | 20 +- src/pipeline.hh | 2 +- src/souffle_exporter.cc | 438 ++++++++++++++++++++++++++++++++++++++++ src/souffle_exporter.hh | 49 +++++ 8 files changed, 542 insertions(+), 4 deletions(-) create mode 100644 ccls_schema.dl create mode 100644 src/souffle_exporter.cc create mode 100644 src/souffle_exporter.hh diff --git a/.gitignore b/.gitignore index cce6259b..592fe4d8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ !/LICENSE !/README.md !/meow/ +!/ccls_schema.dl diff --git a/CMakeLists.txt b/CMakeLists.txt index af9bcc1a..33413d82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,6 +213,7 @@ target_sources(ccls PRIVATE src/query.cc src/sema_manager.cc src/serializer.cc + src/souffle_exporter.cc src/test.cc src/utils.cc src/working_files.cc diff --git a/ccls_schema.dl b/ccls_schema.dl new file mode 100644 index 00000000..40a53c63 --- /dev/null +++ b/ccls_schema.dl @@ -0,0 +1,25 @@ +// Usr is uint64_t, requires SOUFFLE_DOMAIN_64BIT build +.type Usr <: unsigned +.type File <: symbol +.type Name <: symbol +.type Line <: number +.type Kind <: symbol +.type Role <: symbol + +.decl function(usr: Usr, name: Name, file: File, line: Line, kind: Kind) +.decl type(usr: Usr, name: Name, file: File, line: Line, kind: Kind) +.decl variable(usr: Usr, name: Name, file: File, line: Line) +.decl calls(caller: Usr, callee: Usr) +.decl inherits(derived: Usr, base: Usr) +.decl overrides(derived: Usr, base: Usr) +.decl member_func(type: Usr, func: Usr) +.decl member_var(type: Usr, var: Usr) +.decl has_type(var: Usr, type: Usr) +.decl reference(usr: Usr, file: File, line: Line, role: Role) +.decl declaration(usr: Usr, file: File, line: Line) +.decl definition(usr: Usr, file: File, line: Line) +.decl includes(includer: File, includee: File) +.decl func_derived(base: Usr, derived: Usr) +.decl type_derived(base: Usr, derived: Usr) +.decl type_instances(type: Usr, instance: Usr) +.decl symbol_in_file(file: File, line: Line, column: number, usr: Usr, kind: Kind, role: Role) diff --git a/src/main.cc b/src/main.cc index 5700e426..56e352b8 100644 --- a/src/main.cc +++ b/src/main.cc @@ -41,6 +41,8 @@ opt opt_verbose("v", desc("verbosity, from -3 (fatal) to 2 (verbose)"), ini opt opt_test_index("test-index", ValueOptional, init("!"), desc("run index tests"), cat(C)); opt opt_index("index", desc("standalone mode: index a project and exit"), value_desc("root"), cat(C)); +opt opt_souffle_output("souffle-output", desc("export index results as Souffle facts to directory"), + value_desc("dir"), cat(C)); list opt_init("init", desc("extra initialization options in JSON"), cat(C)); opt opt_log_file("log-file", desc("stderr or log file"), value_desc("file"), init("stderr"), cat(C)); opt opt_log_file_append("log-file-append", desc("append to log file"), cat(C)); @@ -121,7 +123,13 @@ int main(int argc, char **argv) { if (opt_index.size()) { SmallString<256> root(opt_index); sys::fs::make_absolute(root); - pipeline::standalone(std::string(root.data(), root.size())); + std::string souffle_dir; + if (opt_souffle_output.size()) { + SmallString<256> souffle(opt_souffle_output); + sys::fs::make_absolute(souffle); + souffle_dir = std::string(souffle.data(), souffle.size()); + } + pipeline::standalone(std::string(root.data(), root.size()), souffle_dir); } else { // The thread that reads from stdin and dispatchs commands to the main // thread. diff --git a/src/pipeline.cc b/src/pipeline.cc index a6ba1181..1020d90e 100644 --- a/src/pipeline.cc +++ b/src/pipeline.cc @@ -12,6 +12,7 @@ #include "project.hh" #include "query.hh" #include "sema_manager.hh" +#include "souffle_exporter.hh" #include #include @@ -764,14 +765,18 @@ void mainLoop() { quit(manager); } -void standalone(const std::string &root) { +void standalone(const std::string &root, const std::string &souffle_output) { Project project; WorkingFiles wfiles; VFS vfs; + DB db; SemaManager manager( nullptr, nullptr, [](const std::string &, const std::vector &) {}, [](const RequestId &id) {}); MessageHandler handler; + if (!souffle_output.empty()) { + handler.db = &db; + } handler.project = &project; handler.wfiles = &wfiles; handler.vfs = &vfs; @@ -787,7 +792,12 @@ void standalone(const std::string &root) { printf("entries: %4d\n", entries); } while (1) { - (void)on_indexed->dequeueAll(); + auto updates = on_indexed->dequeueAll(); + if (!souffle_output.empty()) { + for (auto &update : updates) { + db.applyIndexUpdate(&update); + } + } int64_t enqueued = stats.enqueued, completed = stats.completed; if (tty) { printf("\rcompleted: %4" PRId64 "/%" PRId64, completed, enqueued); @@ -799,6 +809,12 @@ void standalone(const std::string &root) { } if (tty) puts(""); + + if (!souffle_output.empty()) { + SouffleExporter exporter(souffle_output); + exporter.exportDB(db); + } + quit(manager); } diff --git a/src/pipeline.hh b/src/pipeline.hh index 2c71a97b..7ed7e482 100644 --- a/src/pipeline.hh +++ b/src/pipeline.hh @@ -57,7 +57,7 @@ void launchStdout(); void indexer_Main(SemaManager *manager, VFS *vfs, Project *project, WorkingFiles *wfiles); void indexerSort(const std::unordered_map &dir2prio); void mainLoop(); -void standalone(const std::string &root); +void standalone(const std::string &root, const std::string &souffle_output = ""); void index(const std::string &path, const std::vector &args, IndexMode mode, bool must_exist, RequestId id = {}); diff --git a/src/souffle_exporter.cc b/src/souffle_exporter.cc new file mode 100644 index 00000000..cca19a43 --- /dev/null +++ b/src/souffle_exporter.cc @@ -0,0 +1,438 @@ +// Copyright 2017-2018 ccls Authors +// SPDX-License-Identifier: Apache-2.0 + +#include "souffle_exporter.hh" + +#include "log.hh" +#include "query.hh" + +#include +#include + +namespace ccls { + +namespace { +template void writeFmt(FILE *f, const char *fmt, Ts &&...vals) { + auto str = llvm::formatv(fmt, std::forward(vals)...).str(); + fputs(str.c_str(), f); +} + +std::string escapeTSV(const std::string &str) { + std::string result; + result.reserve(str.size()); + for (auto c : str) { + switch (c) { + case '\t': + result += "\\t"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\\': + result += "\\\\"; + break; + default: + result += c; + break; + } + } + return result; +} +std::string getEscapeFilePath(const DB &db, int file_id) { + if (file_id < 0 || file_id >= (int)db.files.size()) + return ""; + const auto &file = db.files[file_id]; + if (!file.def) + return ""; + return escapeTSV(file.def->path); +} +const char *kind2Str(Kind kind) { + switch (kind) { + case Kind::Invalid: + return "Invalid"; + case Kind::File: + return "File"; + case Kind::Type: + return "Type"; + case Kind::Func: + return "Func"; + case Kind::Var: + return "Var"; + default: + return "ERROR"; + } +} +std::string role2Str(Role role) { + if (role == Role::None) + return "None"; + if (role == Role::All) + return "All"; + + std::string result; + auto add = [&](Role r, const char *name) { + if (uint16_t(role) & uint16_t(r)) { + if (!result.empty()) + result += "|"; + result += name; + } + }; + + add(Role::Declaration, "Declaration"); + add(Role::Definition, "Definition"); + add(Role::Reference, "Reference"); + add(Role::Read, "Read"); + add(Role::Write, "Write"); + add(Role::Call, "Call"); + add(Role::Dynamic, "Dynamic"); + add(Role::Address, "Address"); + add(Role::Implicit, "Implicit"); + + return result.empty() ? "None" : result; +} +const char *symbolKind2Str(SymbolKind kind) { + switch (kind) { + case SymbolKind::Unknown: + return "Unknown"; + case SymbolKind::File: + return "File"; + case SymbolKind::Module: + return "Module"; + case SymbolKind::Namespace: + return "Namespace"; + case SymbolKind::Package: + return "Package"; + case SymbolKind::Class: + return "Class"; + case SymbolKind::Method: + return "Method"; + case SymbolKind::Property: + return "Property"; + case SymbolKind::Field: + return "Field"; + case SymbolKind::Constructor: + return "Constructor"; + case SymbolKind::Enum: + return "Enum"; + case SymbolKind::Interface: + return "Interface"; + case SymbolKind::Function: + return "Function"; + case SymbolKind::Variable: + return "Variable"; + case SymbolKind::Constant: + return "Constant"; + case SymbolKind::String: + return "String"; + case SymbolKind::Number: + return "Number"; + case SymbolKind::Boolean: + return "Boolean"; + case SymbolKind::Array: + return "Array"; + case SymbolKind::Object: + return "Object"; + case SymbolKind::Key: + return "Key"; + case SymbolKind::Null: + return "Null"; + case SymbolKind::EnumMember: + return "EnumMember"; + case SymbolKind::Struct: + return "Struct"; + case SymbolKind::Event: + return "Event"; + case SymbolKind::Operator: + return "Operator"; + case SymbolKind::TypeParameter: + return "TypeParameter"; + case SymbolKind::FirstNonStandard: + return "FirstNonStandard"; + case SymbolKind::TypeAlias: + return "TypeAlias"; + case SymbolKind::Parameter: + return "Parameter"; + case SymbolKind::StaticMethod: + return "StaticMethod"; + case SymbolKind::Macro: + return "Macro"; + default: + return "ERROR"; + } +} +} // namespace + +SouffleExporter::SouffleExporter(const std::string &output_dir) + : function_file_(nullptr), type_file_(nullptr), variable_file_(nullptr), calls_file_(nullptr), + inherits_file_(nullptr), overrides_file_(nullptr), member_func_file_(nullptr), member_var_file_(nullptr), + has_type_file_(nullptr), reference_file_(nullptr), declaration_file_(nullptr), definition_file_(nullptr), + includes_file_(nullptr), func_derived_file_(nullptr), type_derived_file_(nullptr), type_instances_file_(nullptr), + symbol_in_file_file_(nullptr) { + auto ec = llvm::sys::fs::create_directories(output_dir); + if (ec) { + LOG_S(ERROR) << "Failed to create output directory " << output_dir << ": " << ec.message(); + } else { + auto open_file = [&output_dir](const char *name) { + std::string path = output_dir + "/" + name; + FILE *f = fopen(path.c_str(), "w"); + if (!f) { + LOG_S(ERROR) << "Failed to open " << path << " for writing. errno: " << errno; + } + return f; + }; + + function_file_ = open_file("function.facts"); + type_file_ = open_file("type.facts"); + variable_file_ = open_file("variable.facts"); + calls_file_ = open_file("calls.facts"); + inherits_file_ = open_file("inherits.facts"); + overrides_file_ = open_file("overrides.facts"); + member_func_file_ = open_file("member_func.facts"); + member_var_file_ = open_file("member_var.facts"); + has_type_file_ = open_file("has_type.facts"); + reference_file_ = open_file("reference.facts"); + declaration_file_ = open_file("declaration.facts"); + definition_file_ = open_file("definition.facts"); + includes_file_ = open_file("includes.facts"); + func_derived_file_ = open_file("func_derived.facts"); + type_derived_file_ = open_file("type_derived.facts"); + type_instances_file_ = open_file("type_instances.facts"); + symbol_in_file_file_ = open_file("symbol_in_file.facts"); + } +} + +SouffleExporter::~SouffleExporter() { + auto close_file = [](FILE *&f) { + if (f) { + fclose(f); + f = nullptr; + } + }; + + close_file(function_file_); + close_file(type_file_); + close_file(variable_file_); + close_file(calls_file_); + close_file(inherits_file_); + close_file(overrides_file_); + close_file(member_func_file_); + close_file(member_var_file_); + close_file(has_type_file_); + close_file(reference_file_); + close_file(declaration_file_); + close_file(definition_file_); + close_file(includes_file_); + close_file(func_derived_file_); + close_file(type_derived_file_); + close_file(type_instances_file_); + close_file(symbol_in_file_file_); +} + +void SouffleExporter::exportDB(const DB &db) { + int total_symbols = 0; + for (auto &file : db.files) { + total_symbols += file.symbol2refcnt.size(); + } + LOG_S(INFO) << "Exporting " << total_symbols << " symbols, " << db.funcs.size() << " functions, " << db.types.size() + << " types, " << db.vars.size() << " variables, " << db.files.size() << " files"; + + for (auto &file : db.files) { + exportFile(file); + } + + for (auto &func : db.funcs) { + exportFunction(func, db); + } + + for (auto &type : db.types) { + exportType(type, db); + } + + for (auto &var : db.vars) { + exportVariable(var, db); + } +} + +void SouffleExporter::exportFile(const QueryFile &file) { + if (!file.def) { + return; + } + + auto &file_path = file.def->path; + + if (includes_file_) { + for (auto &include : file.def->includes) { + writeFmt(includes_file_, "{0}\t{1}\n", escapeTSV(file_path), escapeTSV(include.resolved_path)); + } + } + + if (symbol_in_file_file_) { + for (auto [sym, refcnt] : file.symbol2refcnt) { + if (refcnt > 0) { + writeFmt(symbol_in_file_file_, "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n", escapeTSV(file_path), sym.range.start.line + 1, + sym.range.start.column, sym.usr, kind2Str(sym.kind), role2Str(sym.role)); + } + } + } +} + +void SouffleExporter::exportFunction(const QueryFunc &func, const DB &db) { + if (!function_file_ || !definition_file_ || !calls_file_ || !overrides_file_ || !declaration_file_ || + !func_derived_file_ || !reference_file_) { + return; + } + + auto def = func.anyDef(); + + if (def) { + if (def->spell) { + auto def_file = getEscapeFilePath(db, def->spell->file_id); + auto def_line = def->spell->range.start.line + 1; + + if (!def_file.empty()) { + writeFmt(function_file_, "{0}\t{1}\t{2}\t{3}\t{4}\n", func.usr, escapeTSV(def->detailed_name), def_file, + def_line, symbolKind2Str(def->kind)); + } + + if (def->spell->role & Role::Definition) { + writeFmt(definition_file_, "{0}\t{1}\t{2}\n", func.usr, def_file, def_line); + } + } + + for (auto &callee : def->callees) { + if (callee.kind == Kind::Func) { + writeFmt(calls_file_, "{0}\t{1}\n", func.usr, callee.usr); + } + } + + for (auto &base_usr : def->bases) { + writeFmt(overrides_file_, "{0}\t{1}\n", func.usr, base_usr); + } + } + + for (auto &decl : func.declarations) { + std::string decl_file = getEscapeFilePath(db, decl.file_id); + int decl_line = decl.range.start.line + 1; + if (!decl_file.empty()) { + writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", func.usr, decl_file, decl_line); + } + } + + for (auto &derived_usr : func.derived) { + writeFmt(func_derived_file_, "{0}\t{1}\n", func.usr, derived_usr); + } + + for (auto &use : func.uses) { + std::string use_file = getEscapeFilePath(db, use.file_id); + int use_line = use.range.start.line + 1; + if (!use_file.empty()) { + writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", func.usr, use_file, use_line, role2Str(use.role)); + } + } +} + +void SouffleExporter::exportType(const QueryType &type, const DB &db) { + if (!type_file_ || !definition_file_ || !inherits_file_ || !member_func_file_ || !member_var_file_ || + !declaration_file_ || !type_derived_file_ || !type_instances_file_ || !reference_file_) { + return; + } + + auto def = type.anyDef(); + + if (def) { + if (def->spell) { + auto def_file = getEscapeFilePath(db, def->spell->file_id); + auto def_line = def->spell->range.start.line + 1; + + if (!def_file.empty()) { + writeFmt(type_file_, "{0}\t{1}\t{2}\t{3}\t{4}\n", type.usr, escapeTSV(def->detailed_name), def_file, def_line, + symbolKind2Str(def->kind)); + } + + if (def->spell->role & Role::Definition) { + writeFmt(definition_file_, "{0}\t{1}\t{2}\n", type.usr, def_file, def_line); + } + } + + for (auto &base_usr : def->bases) { + writeFmt(inherits_file_, "{0}\t{1}\n", type.usr, base_usr); + } + + for (auto &func_usr : def->funcs) { + writeFmt(member_func_file_, "{0}\t{1}\n", type.usr, func_usr); + } + + for (auto &[var_usr, offset] : def->vars) { + writeFmt(member_var_file_, "{0}\t{1}\n", type.usr, var_usr); + } + } + + for (auto &decl : type.declarations) { + auto decl_file = getEscapeFilePath(db, decl.file_id); + auto decl_line = decl.range.start.line + 1; + if (!decl_file.empty()) { + writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", type.usr, decl_file, decl_line); + } + } + + for (auto &derived_usr : type.derived) { + writeFmt(type_derived_file_, "{0}\t{1}\n", type.usr, derived_usr); + } + + for (auto &instance_usr : type.instances) { + writeFmt(type_instances_file_, "{0}\t{1}\n", type.usr, instance_usr); + } + + for (auto &use : type.uses) { + auto use_file = getEscapeFilePath(db, use.file_id); + auto use_line = use.range.start.line + 1; + if (!use_file.empty()) { + writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", type.usr, use_file, use_line, role2Str(use.role)); + } + } +} + +void SouffleExporter::exportVariable(const QueryVar &var, const DB &db) { + if (!variable_file_ || !definition_file_ || !has_type_file_ || !declaration_file_ || !reference_file_) { + return; + } + + auto def = var.anyDef(); + + if (def && def->spell) { + auto def_file = getEscapeFilePath(db, def->spell->file_id); + auto def_line = def->spell->range.start.line + 1; + + if (!def_file.empty()) { + writeFmt(variable_file_, "{0}\t{1}\t{2}\t{3}\n", var.usr, escapeTSV(def->detailed_name), def_file, def_line); + } + + if (def->spell->role & Role::Definition) { + writeFmt(definition_file_, "{0}\t{1}\t{2}\n", var.usr, def_file, def_line); + } + + if (def->type && has_type_file_) { + writeFmt(has_type_file_, "{0}\t{1}\n", var.usr, def->type); + } + } + + for (auto &decl : var.declarations) { + auto decl_file = getEscapeFilePath(db, decl.file_id); + auto decl_line = decl.range.start.line + 1; + if (!decl_file.empty()) { + writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", var.usr, decl_file, decl_line); + } + } + + for (auto &use : var.uses) { + auto use_file = getEscapeFilePath(db, use.file_id); + auto use_line = use.range.start.line + 1; + if (!use_file.empty()) { + writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", var.usr, use_file, use_line, role2Str(use.role)); + } + } +} + +} // namespace ccls diff --git a/src/souffle_exporter.hh b/src/souffle_exporter.hh new file mode 100644 index 00000000..b4908d35 --- /dev/null +++ b/src/souffle_exporter.hh @@ -0,0 +1,49 @@ +// Copyright 2017-2018 ccls Authors +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include +#include + +namespace ccls { + +struct QueryFunc; +struct QueryType; +struct QueryVar; +struct QueryFile; +struct DB; + +class SouffleExporter { +public: + SouffleExporter(const std::string &output_dir); + ~SouffleExporter(); + + void exportDB(const DB &db); + +private: + void exportFile(const QueryFile &file); + void exportFunction(const QueryFunc &func, const DB &db); + void exportType(const QueryType &type, const DB &db); + void exportVariable(const QueryVar &var, const DB &db); + + FILE *function_file_; + FILE *type_file_; + FILE *variable_file_; + FILE *calls_file_; + FILE *inherits_file_; + FILE *overrides_file_; + FILE *member_func_file_; + FILE *member_var_file_; + FILE *has_type_file_; + FILE *reference_file_; + FILE *declaration_file_; + FILE *definition_file_; + FILE *includes_file_; + FILE *func_derived_file_; + FILE *type_derived_file_; + FILE *type_instances_file_; + FILE *symbol_in_file_file_; +}; + +} // namespace ccls