feat: export DB into souffle facts to allow offline analysis

This commit is contained in:
zs 2025-11-30 21:15:05 +08:00
parent 80d1964394
commit dbd9d0cf95
8 changed files with 542 additions and 4 deletions

1
.gitignore vendored
View File

@ -9,3 +9,4 @@
!/LICENSE
!/README.md
!/meow/
!/ccls_schema.dl

View File

@ -213,6 +213,7 @@ target_sources(ccls PRIVATE
src/query.cc
src/sema_manager.cc
src/serializer.cc
src/souffle_exporter.cc
src/test.cc
src/utils.cc
src/working_files.cc

25
ccls_schema.dl Normal file
View File

@ -0,0 +1,25 @@
// Usr is uint64_t, requires SOUFFLE_DOMAIN_64BIT build
.type Usr <: unsigned
.type File <: symbol
.type Name <: symbol
.type Line <: number
.type Kind <: symbol
.type Role <: symbol
.decl function(usr: Usr, name: Name, file: File, line: Line, kind: Kind)
.decl type(usr: Usr, name: Name, file: File, line: Line, kind: Kind)
.decl variable(usr: Usr, name: Name, file: File, line: Line)
.decl calls(caller: Usr, callee: Usr)
.decl inherits(derived: Usr, base: Usr)
.decl overrides(derived: Usr, base: Usr)
.decl member_func(type: Usr, func: Usr)
.decl member_var(type: Usr, var: Usr)
.decl has_type(var: Usr, type: Usr)
.decl reference(usr: Usr, file: File, line: Line, role: Role)
.decl declaration(usr: Usr, file: File, line: Line)
.decl definition(usr: Usr, file: File, line: Line)
.decl includes(includer: File, includee: File)
.decl func_derived(base: Usr, derived: Usr)
.decl type_derived(base: Usr, derived: Usr)
.decl type_instances(type: Usr, instance: Usr)
.decl symbol_in_file(file: File, line: Line, column: number, usr: Usr, kind: Kind, role: Role)

View File

@ -41,6 +41,8 @@ opt<int> opt_verbose("v", desc("verbosity, from -3 (fatal) to 2 (verbose)"), ini
opt<std::string> opt_test_index("test-index", ValueOptional, init("!"), desc("run index tests"), cat(C));
opt<std::string> opt_index("index", desc("standalone mode: index a project and exit"), value_desc("root"), cat(C));
opt<std::string> opt_souffle_output("souffle-output", desc("export index results as Souffle facts to directory"),
value_desc("dir"), cat(C));
list<std::string> opt_init("init", desc("extra initialization options in JSON"), cat(C));
opt<std::string> opt_log_file("log-file", desc("stderr or log file"), value_desc("file"), init("stderr"), cat(C));
opt<bool> opt_log_file_append("log-file-append", desc("append to log file"), cat(C));
@ -121,7 +123,13 @@ int main(int argc, char **argv) {
if (opt_index.size()) {
SmallString<256> root(opt_index);
sys::fs::make_absolute(root);
pipeline::standalone(std::string(root.data(), root.size()));
std::string souffle_dir;
if (opt_souffle_output.size()) {
SmallString<256> souffle(opt_souffle_output);
sys::fs::make_absolute(souffle);
souffle_dir = std::string(souffle.data(), souffle.size());
}
pipeline::standalone(std::string(root.data(), root.size()), souffle_dir);
} else {
// The thread that reads from stdin and dispatchs commands to the main
// thread.

View File

@ -12,6 +12,7 @@
#include "project.hh"
#include "query.hh"
#include "sema_manager.hh"
#include "souffle_exporter.hh"
#include <rapidjson/document.h>
#include <rapidjson/writer.h>
@ -764,14 +765,18 @@ void mainLoop() {
quit(manager);
}
void standalone(const std::string &root) {
void standalone(const std::string &root, const std::string &souffle_output) {
Project project;
WorkingFiles wfiles;
VFS vfs;
DB db;
SemaManager manager(
nullptr, nullptr, [](const std::string &, const std::vector<Diagnostic> &) {}, [](const RequestId &id) {});
MessageHandler handler;
if (!souffle_output.empty()) {
handler.db = &db;
}
handler.project = &project;
handler.wfiles = &wfiles;
handler.vfs = &vfs;
@ -787,7 +792,12 @@ void standalone(const std::string &root) {
printf("entries: %4d\n", entries);
}
while (1) {
(void)on_indexed->dequeueAll();
auto updates = on_indexed->dequeueAll();
if (!souffle_output.empty()) {
for (auto &update : updates) {
db.applyIndexUpdate(&update);
}
}
int64_t enqueued = stats.enqueued, completed = stats.completed;
if (tty) {
printf("\rcompleted: %4" PRId64 "/%" PRId64, completed, enqueued);
@ -799,6 +809,12 @@ void standalone(const std::string &root) {
}
if (tty)
puts("");
if (!souffle_output.empty()) {
SouffleExporter exporter(souffle_output);
exporter.exportDB(db);
}
quit(manager);
}

View File

@ -57,7 +57,7 @@ void launchStdout();
void indexer_Main(SemaManager *manager, VFS *vfs, Project *project, WorkingFiles *wfiles);
void indexerSort(const std::unordered_map<std::string, int> &dir2prio);
void mainLoop();
void standalone(const std::string &root);
void standalone(const std::string &root, const std::string &souffle_output = "");
void index(const std::string &path, const std::vector<const char *> &args, IndexMode mode, bool must_exist,
RequestId id = {});

438
src/souffle_exporter.cc Normal file
View File

@ -0,0 +1,438 @@
// Copyright 2017-2018 ccls Authors
// SPDX-License-Identifier: Apache-2.0
#include "souffle_exporter.hh"
#include "log.hh"
#include "query.hh"
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/FormatVariadic.h>
namespace ccls {
namespace {
template <typename... Ts> void writeFmt(FILE *f, const char *fmt, Ts &&...vals) {
auto str = llvm::formatv(fmt, std::forward<Ts>(vals)...).str();
fputs(str.c_str(), f);
}
std::string escapeTSV(const std::string &str) {
std::string result;
result.reserve(str.size());
for (auto c : str) {
switch (c) {
case '\t':
result += "\\t";
break;
case '\n':
result += "\\n";
break;
case '\r':
result += "\\r";
break;
case '\\':
result += "\\\\";
break;
default:
result += c;
break;
}
}
return result;
}
std::string getEscapeFilePath(const DB &db, int file_id) {
if (file_id < 0 || file_id >= (int)db.files.size())
return "";
const auto &file = db.files[file_id];
if (!file.def)
return "";
return escapeTSV(file.def->path);
}
const char *kind2Str(Kind kind) {
switch (kind) {
case Kind::Invalid:
return "Invalid";
case Kind::File:
return "File";
case Kind::Type:
return "Type";
case Kind::Func:
return "Func";
case Kind::Var:
return "Var";
default:
return "ERROR";
}
}
std::string role2Str(Role role) {
if (role == Role::None)
return "None";
if (role == Role::All)
return "All";
std::string result;
auto add = [&](Role r, const char *name) {
if (uint16_t(role) & uint16_t(r)) {
if (!result.empty())
result += "|";
result += name;
}
};
add(Role::Declaration, "Declaration");
add(Role::Definition, "Definition");
add(Role::Reference, "Reference");
add(Role::Read, "Read");
add(Role::Write, "Write");
add(Role::Call, "Call");
add(Role::Dynamic, "Dynamic");
add(Role::Address, "Address");
add(Role::Implicit, "Implicit");
return result.empty() ? "None" : result;
}
const char *symbolKind2Str(SymbolKind kind) {
switch (kind) {
case SymbolKind::Unknown:
return "Unknown";
case SymbolKind::File:
return "File";
case SymbolKind::Module:
return "Module";
case SymbolKind::Namespace:
return "Namespace";
case SymbolKind::Package:
return "Package";
case SymbolKind::Class:
return "Class";
case SymbolKind::Method:
return "Method";
case SymbolKind::Property:
return "Property";
case SymbolKind::Field:
return "Field";
case SymbolKind::Constructor:
return "Constructor";
case SymbolKind::Enum:
return "Enum";
case SymbolKind::Interface:
return "Interface";
case SymbolKind::Function:
return "Function";
case SymbolKind::Variable:
return "Variable";
case SymbolKind::Constant:
return "Constant";
case SymbolKind::String:
return "String";
case SymbolKind::Number:
return "Number";
case SymbolKind::Boolean:
return "Boolean";
case SymbolKind::Array:
return "Array";
case SymbolKind::Object:
return "Object";
case SymbolKind::Key:
return "Key";
case SymbolKind::Null:
return "Null";
case SymbolKind::EnumMember:
return "EnumMember";
case SymbolKind::Struct:
return "Struct";
case SymbolKind::Event:
return "Event";
case SymbolKind::Operator:
return "Operator";
case SymbolKind::TypeParameter:
return "TypeParameter";
case SymbolKind::FirstNonStandard:
return "FirstNonStandard";
case SymbolKind::TypeAlias:
return "TypeAlias";
case SymbolKind::Parameter:
return "Parameter";
case SymbolKind::StaticMethod:
return "StaticMethod";
case SymbolKind::Macro:
return "Macro";
default:
return "ERROR";
}
}
} // namespace
SouffleExporter::SouffleExporter(const std::string &output_dir)
: function_file_(nullptr), type_file_(nullptr), variable_file_(nullptr), calls_file_(nullptr),
inherits_file_(nullptr), overrides_file_(nullptr), member_func_file_(nullptr), member_var_file_(nullptr),
has_type_file_(nullptr), reference_file_(nullptr), declaration_file_(nullptr), definition_file_(nullptr),
includes_file_(nullptr), func_derived_file_(nullptr), type_derived_file_(nullptr), type_instances_file_(nullptr),
symbol_in_file_file_(nullptr) {
auto ec = llvm::sys::fs::create_directories(output_dir);
if (ec) {
LOG_S(ERROR) << "Failed to create output directory " << output_dir << ": " << ec.message();
} else {
auto open_file = [&output_dir](const char *name) {
std::string path = output_dir + "/" + name;
FILE *f = fopen(path.c_str(), "w");
if (!f) {
LOG_S(ERROR) << "Failed to open " << path << " for writing. errno: " << errno;
}
return f;
};
function_file_ = open_file("function.facts");
type_file_ = open_file("type.facts");
variable_file_ = open_file("variable.facts");
calls_file_ = open_file("calls.facts");
inherits_file_ = open_file("inherits.facts");
overrides_file_ = open_file("overrides.facts");
member_func_file_ = open_file("member_func.facts");
member_var_file_ = open_file("member_var.facts");
has_type_file_ = open_file("has_type.facts");
reference_file_ = open_file("reference.facts");
declaration_file_ = open_file("declaration.facts");
definition_file_ = open_file("definition.facts");
includes_file_ = open_file("includes.facts");
func_derived_file_ = open_file("func_derived.facts");
type_derived_file_ = open_file("type_derived.facts");
type_instances_file_ = open_file("type_instances.facts");
symbol_in_file_file_ = open_file("symbol_in_file.facts");
}
}
SouffleExporter::~SouffleExporter() {
auto close_file = [](FILE *&f) {
if (f) {
fclose(f);
f = nullptr;
}
};
close_file(function_file_);
close_file(type_file_);
close_file(variable_file_);
close_file(calls_file_);
close_file(inherits_file_);
close_file(overrides_file_);
close_file(member_func_file_);
close_file(member_var_file_);
close_file(has_type_file_);
close_file(reference_file_);
close_file(declaration_file_);
close_file(definition_file_);
close_file(includes_file_);
close_file(func_derived_file_);
close_file(type_derived_file_);
close_file(type_instances_file_);
close_file(symbol_in_file_file_);
}
void SouffleExporter::exportDB(const DB &db) {
int total_symbols = 0;
for (auto &file : db.files) {
total_symbols += file.symbol2refcnt.size();
}
LOG_S(INFO) << "Exporting " << total_symbols << " symbols, " << db.funcs.size() << " functions, " << db.types.size()
<< " types, " << db.vars.size() << " variables, " << db.files.size() << " files";
for (auto &file : db.files) {
exportFile(file);
}
for (auto &func : db.funcs) {
exportFunction(func, db);
}
for (auto &type : db.types) {
exportType(type, db);
}
for (auto &var : db.vars) {
exportVariable(var, db);
}
}
void SouffleExporter::exportFile(const QueryFile &file) {
if (!file.def) {
return;
}
auto &file_path = file.def->path;
if (includes_file_) {
for (auto &include : file.def->includes) {
writeFmt(includes_file_, "{0}\t{1}\n", escapeTSV(file_path), escapeTSV(include.resolved_path));
}
}
if (symbol_in_file_file_) {
for (auto [sym, refcnt] : file.symbol2refcnt) {
if (refcnt > 0) {
writeFmt(symbol_in_file_file_, "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n", escapeTSV(file_path), sym.range.start.line + 1,
sym.range.start.column, sym.usr, kind2Str(sym.kind), role2Str(sym.role));
}
}
}
}
void SouffleExporter::exportFunction(const QueryFunc &func, const DB &db) {
if (!function_file_ || !definition_file_ || !calls_file_ || !overrides_file_ || !declaration_file_ ||
!func_derived_file_ || !reference_file_) {
return;
}
auto def = func.anyDef();
if (def) {
if (def->spell) {
auto def_file = getEscapeFilePath(db, def->spell->file_id);
auto def_line = def->spell->range.start.line + 1;
if (!def_file.empty()) {
writeFmt(function_file_, "{0}\t{1}\t{2}\t{3}\t{4}\n", func.usr, escapeTSV(def->detailed_name), def_file,
def_line, symbolKind2Str(def->kind));
}
if (def->spell->role & Role::Definition) {
writeFmt(definition_file_, "{0}\t{1}\t{2}\n", func.usr, def_file, def_line);
}
}
for (auto &callee : def->callees) {
if (callee.kind == Kind::Func) {
writeFmt(calls_file_, "{0}\t{1}\n", func.usr, callee.usr);
}
}
for (auto &base_usr : def->bases) {
writeFmt(overrides_file_, "{0}\t{1}\n", func.usr, base_usr);
}
}
for (auto &decl : func.declarations) {
std::string decl_file = getEscapeFilePath(db, decl.file_id);
int decl_line = decl.range.start.line + 1;
if (!decl_file.empty()) {
writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", func.usr, decl_file, decl_line);
}
}
for (auto &derived_usr : func.derived) {
writeFmt(func_derived_file_, "{0}\t{1}\n", func.usr, derived_usr);
}
for (auto &use : func.uses) {
std::string use_file = getEscapeFilePath(db, use.file_id);
int use_line = use.range.start.line + 1;
if (!use_file.empty()) {
writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", func.usr, use_file, use_line, role2Str(use.role));
}
}
}
void SouffleExporter::exportType(const QueryType &type, const DB &db) {
if (!type_file_ || !definition_file_ || !inherits_file_ || !member_func_file_ || !member_var_file_ ||
!declaration_file_ || !type_derived_file_ || !type_instances_file_ || !reference_file_) {
return;
}
auto def = type.anyDef();
if (def) {
if (def->spell) {
auto def_file = getEscapeFilePath(db, def->spell->file_id);
auto def_line = def->spell->range.start.line + 1;
if (!def_file.empty()) {
writeFmt(type_file_, "{0}\t{1}\t{2}\t{3}\t{4}\n", type.usr, escapeTSV(def->detailed_name), def_file, def_line,
symbolKind2Str(def->kind));
}
if (def->spell->role & Role::Definition) {
writeFmt(definition_file_, "{0}\t{1}\t{2}\n", type.usr, def_file, def_line);
}
}
for (auto &base_usr : def->bases) {
writeFmt(inherits_file_, "{0}\t{1}\n", type.usr, base_usr);
}
for (auto &func_usr : def->funcs) {
writeFmt(member_func_file_, "{0}\t{1}\n", type.usr, func_usr);
}
for (auto &[var_usr, offset] : def->vars) {
writeFmt(member_var_file_, "{0}\t{1}\n", type.usr, var_usr);
}
}
for (auto &decl : type.declarations) {
auto decl_file = getEscapeFilePath(db, decl.file_id);
auto decl_line = decl.range.start.line + 1;
if (!decl_file.empty()) {
writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", type.usr, decl_file, decl_line);
}
}
for (auto &derived_usr : type.derived) {
writeFmt(type_derived_file_, "{0}\t{1}\n", type.usr, derived_usr);
}
for (auto &instance_usr : type.instances) {
writeFmt(type_instances_file_, "{0}\t{1}\n", type.usr, instance_usr);
}
for (auto &use : type.uses) {
auto use_file = getEscapeFilePath(db, use.file_id);
auto use_line = use.range.start.line + 1;
if (!use_file.empty()) {
writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", type.usr, use_file, use_line, role2Str(use.role));
}
}
}
void SouffleExporter::exportVariable(const QueryVar &var, const DB &db) {
if (!variable_file_ || !definition_file_ || !has_type_file_ || !declaration_file_ || !reference_file_) {
return;
}
auto def = var.anyDef();
if (def && def->spell) {
auto def_file = getEscapeFilePath(db, def->spell->file_id);
auto def_line = def->spell->range.start.line + 1;
if (!def_file.empty()) {
writeFmt(variable_file_, "{0}\t{1}\t{2}\t{3}\n", var.usr, escapeTSV(def->detailed_name), def_file, def_line);
}
if (def->spell->role & Role::Definition) {
writeFmt(definition_file_, "{0}\t{1}\t{2}\n", var.usr, def_file, def_line);
}
if (def->type && has_type_file_) {
writeFmt(has_type_file_, "{0}\t{1}\n", var.usr, def->type);
}
}
for (auto &decl : var.declarations) {
auto decl_file = getEscapeFilePath(db, decl.file_id);
auto decl_line = decl.range.start.line + 1;
if (!decl_file.empty()) {
writeFmt(declaration_file_, "{0}\t{1}\t{2}\n", var.usr, decl_file, decl_line);
}
}
for (auto &use : var.uses) {
auto use_file = getEscapeFilePath(db, use.file_id);
auto use_line = use.range.start.line + 1;
if (!use_file.empty()) {
writeFmt(reference_file_, "{0}\t{1}\t{2}\t{3}\n", var.usr, use_file, use_line, role2Str(use.role));
}
}
}
} // namespace ccls

49
src/souffle_exporter.hh Normal file
View File

@ -0,0 +1,49 @@
// Copyright 2017-2018 ccls Authors
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <stdio.h>
#include <string>
namespace ccls {
struct QueryFunc;
struct QueryType;
struct QueryVar;
struct QueryFile;
struct DB;
class SouffleExporter {
public:
SouffleExporter(const std::string &output_dir);
~SouffleExporter();
void exportDB(const DB &db);
private:
void exportFile(const QueryFile &file);
void exportFunction(const QueryFunc &func, const DB &db);
void exportType(const QueryType &type, const DB &db);
void exportVariable(const QueryVar &var, const DB &db);
FILE *function_file_;
FILE *type_file_;
FILE *variable_file_;
FILE *calls_file_;
FILE *inherits_file_;
FILE *overrides_file_;
FILE *member_func_file_;
FILE *member_var_file_;
FILE *has_type_file_;
FILE *reference_file_;
FILE *declaration_file_;
FILE *definition_file_;
FILE *includes_file_;
FILE *func_derived_file_;
FILE *type_derived_file_;
FILE *type_instances_file_;
FILE *symbol_in_file_file_;
};
} // namespace ccls