diff --git a/command_line.cc b/command_line.cc new file mode 100644 index 00000000..1f4bc18b --- /dev/null +++ b/command_line.cc @@ -0,0 +1,184 @@ +#include +#include +#include + +#include "compilation_database_loader.h" +#include "indexer.h" +#include "query.h" + +bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) { +#define PARSE_AS(name, string) \ + if (content == #string) { \ + *obj = name; \ + return true; \ + } + + PARSE_AS(PreferredSymbolLocation::Declaration, "declaration"); + PARSE_AS(PreferredSymbolLocation::Definition, "definition"); + + return false; +#undef PARSE_AS +} + +bool ParseCommand(const std::string& content, Command* obj) { +#define PARSE_AS(name, string) \ + if (content == #string) { \ + *obj = name; \ + return true; \ + } + + PARSE_AS(Command::Callees, "callees"); + PARSE_AS(Command::Callers, "callers"); + PARSE_AS(Command::FindAllUsages, "find-all-usages"); + PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages"); + PARSE_AS(Command::GotoReferenced, "goto-referenced"); + PARSE_AS(Command::Hierarchy, "hierarchy"); + PARSE_AS(Command::Outline, "outline"); + PARSE_AS(Command::Search, "search"); + + return false; +#undef PARSE_AS +} + + +std::unordered_map ParseOptions(int argc, char** argv) { + std::unordered_map output; + + std::string previous_arg; + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg[0] != '-') { + if (previous_arg.size() == 0) { + std::cerr << "Invalid arguments; switches must start with -" << std::endl; + exit(1); + } + + output[previous_arg] = arg; + previous_arg = ""; + } + else { + output[arg] = ""; + previous_arg = arg; + } + } + + return output; +} + +bool HasOption(const std::unordered_map& options, const std::string& option) { + return options.find(option) != options.end(); +} + +int main2(int argc, char** argv) { + std::unordered_map options = ParseOptions(argc, argv); + + if (argc == 1 || options.find("--help") != options.end()) { + std::cout << R"help(clang-indexer help: + + General: + --help Print this help information. + --help-commands + Print all available query commands. + --project Path to compile_commands.json. Needed for the server, and + optionally by clients if there are multiple servers running. + --print-config + Emit all configuration data this executable is using. + + + Server: + --server If present, this binary will run in server mode. The binary + will not return until killed or an exit is requested. The + server computes and caches an index of the entire program + which is then queried by short-lived client processes. A + client is created by running this binary with a --command + flag. + --cache-dir Directory to cache the index and other useful information. If + a previous cache is present, the database will try to reuse + it. If this flag is not present, the database will be + in-memory only. + --threads Number of threads to use for indexing and querying tasks. + This value is optional; a good estimate is computed by + default. + + + Client: + --command Execute a query command against the index. See + --command-help for a listing of valid commands and a + description of what they do. Presence of this flag indicates + that the indexer is in client mode; this flag is mutually + exclusive with --server. + --location Location of the query. Some commands require only a file, + other require a line and column as well. Format is + filename[:line:column]. For example, "foobar.cc" and + "foobar.cc:1:10" are valid inputs. + --preferred-symbol-location + When looking up symbols, try to return either the + 'declaration' or the 'definition'. Defaults to 'definition'. +)help"; + exit(0); + } + + if (HasOption(options, "--help-commands")) { + std::cout << R"(Available commands: + + callees: + callers: + Emit all functions (with location) that this function calls ("callees") or + that call this function ("callers"). Requires a location. + + find-all-usages: + Emit every usage of the given symbol. This is intended to support a rename + refactoring. This output contains many uninteresting usages of symbols; + prefer find-interesting-usges. Requires a location. + + find-interesting-usages: + Emit only usages of the given symbol which are semantically interesting. + Requires a location. + + goto-referenced: + Find an associated reference (either definition or declaration) for the + given symbol. Requires a location. + + hierarchy: + List the type hierarchy (ie, inherited and derived members) for the given + method or type. Requires a location. + + outline: + Emit a file outline, listing all of the symbols in the file. + + search: + Search for a symbol by name. +)"; + exit(0); + } + + if (HasOption(options, "--project")) { + std::vector entries = LoadCompilationEntriesFromDirectory(options["--project"]); + + + std::vector dbs; + for (const CompilationEntry& entry : entries) { + std::cout << "Parsing " << entry.filename << std::endl; + //IndexedFile db = Parse(2, entry.filename, entry.args); + //dbs.emplace_back(db); + //std::cout << db.ToString() << std::endl << std::endl; + } + + std::cin.get(); + exit(0); + } + + if (HasOption(options, "--command")) { + Command command; + if (!ParseCommand(options["--command"], &command)) + Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands"); + + + } + + std::cout << "Invalid arguments. Try --help."; + exit(1); + return 0; +} diff --git a/compilation_database_loader.h b/compilation_database_loader.h index d95c26d3..cbb5638a 100644 --- a/compilation_database_loader.h +++ b/compilation_database_loader.h @@ -9,4 +9,8 @@ struct CompilationEntry { std::vector args; }; +// TODO: Add support for loading when there is no compilation_database.json +// file. We will just recursively scan the directory and support a global +// set of defines and include directories. + std::vector LoadCompilationEntriesFromDirectory(const std::string& project_directory); \ No newline at end of file diff --git a/full_tests/index_delta/a_v0.cc b/full_tests/index_delta/a_v0.cc new file mode 100644 index 00000000..f3d9c2f8 --- /dev/null +++ b/full_tests/index_delta/a_v0.cc @@ -0,0 +1,7 @@ +void called(); + +void caller() { + +} + +void missing() {} \ No newline at end of file diff --git a/full_tests/index_delta/a_v1.cc b/full_tests/index_delta/a_v1.cc new file mode 100644 index 00000000..5ab07112 --- /dev/null +++ b/full_tests/index_delta/a_v1.cc @@ -0,0 +1,7 @@ +void called(); + +void caller() { + called(); +} + +void added() {} \ No newline at end of file diff --git a/function_output_iterator.hpp b/function_output_iterator.hpp new file mode 100644 index 00000000..3c5a68dd --- /dev/null +++ b/function_output_iterator.hpp @@ -0,0 +1,62 @@ +// (C) Copyright Jeremy Siek 2001. +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Revision History: + +// 27 Feb 2001 Jeremy Siek +// Initial checkin. + +#ifndef BOOST_FUNCTION_OUTPUT_ITERATOR_HPP +#define BOOST_FUNCTION_OUTPUT_ITERATOR_HPP + +#include + +namespace boost { +namespace iterators { + + template + class function_output_iterator { + typedef function_output_iterator self; + public: + typedef std::output_iterator_tag iterator_category; + typedef void value_type; + typedef void difference_type; + typedef void pointer; + typedef void reference; + + explicit function_output_iterator() {} + + explicit function_output_iterator(const UnaryFunction& f) + : m_f(f) {} + + struct output_proxy { + output_proxy(UnaryFunction& f) : m_f(f) { } + template output_proxy& operator=(const T& value) { + m_f(value); + return *this; + } + UnaryFunction& m_f; + }; + output_proxy operator*() { return output_proxy(m_f); } + self& operator++() { return *this; } + self& operator++(int) { return *this; } + private: + UnaryFunction m_f; + }; + + template + inline function_output_iterator + make_function_output_iterator(const UnaryFunction& f = UnaryFunction()) { + return function_output_iterator(f); + } + +} // namespace iterators + +using iterators::function_output_iterator; +using iterators::make_function_output_iterator; + +} // namespace boost + +#endif // BOOST_FUNCTION_OUTPUT_ITERATOR_HPP \ No newline at end of file diff --git a/indexer.cpp b/indexer.cpp index 113949dc..82ebb7cb 100644 --- a/indexer.cpp +++ b/indexer.cpp @@ -2,37 +2,47 @@ #include "serializer.h" -IndexedFile::IndexedFile() {} +IndexedFile::IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db) + : usr_to_id(usr_to_id), file_db(file_db) { + + // Preallocate any existing resolved ids. + for (const auto& entry : usr_to_id->usr_to_type_id) + types.push_back(IndexedTypeDef(entry.second, entry.first)); + for (const auto& entry : usr_to_id->usr_to_func_id) + funcs.push_back(IndexedFuncDef(entry.second, entry.first)); + for (const auto& entry : usr_to_id->usr_to_var_id) + vars.push_back(IndexedVarDef(entry.second, entry.first)); +} // TODO: Optimize for const char*? TypeId IndexedFile::ToTypeId(const std::string& usr) { - auto it = usr_to_type_id.find(usr); - if (it != usr_to_type_id.end()) + auto it = usr_to_id->usr_to_type_id.find(usr); + if (it != usr_to_id->usr_to_type_id.end()) return it->second; - TypeId id(types.size()); + TypeId id(usr_to_id->group, types.size()); types.push_back(IndexedTypeDef(id, usr)); - usr_to_type_id[usr] = id; + usr_to_id->usr_to_type_id[usr] = id; return id; } FuncId IndexedFile::ToFuncId(const std::string& usr) { - auto it = usr_to_func_id.find(usr); - if (it != usr_to_func_id.end()) + auto it = usr_to_id->usr_to_func_id.find(usr); + if (it != usr_to_id->usr_to_func_id.end()) return it->second; - FuncId id(funcs.size()); + FuncId id(usr_to_id->group, funcs.size()); funcs.push_back(IndexedFuncDef(id, usr)); - usr_to_func_id[usr] = id; + usr_to_id->usr_to_func_id[usr] = id; return id; } VarId IndexedFile::ToVarId(const std::string& usr) { - auto it = usr_to_var_id.find(usr); - if (it != usr_to_var_id.end()) + auto it = usr_to_id->usr_to_var_id.find(usr); + if (it != usr_to_id->usr_to_var_id.end()) return it->second; - VarId id(vars.size()); + VarId id(usr_to_id->group, vars.size()); vars.push_back(IndexedVarDef(id, usr)); - usr_to_var_id[usr] = id; + usr_to_id->usr_to_var_id[usr] = id; return id; } @@ -50,13 +60,13 @@ VarId IndexedFile::ToVarId(const CXCursor& cursor) { IndexedTypeDef* IndexedFile::Resolve(TypeId id) { - return &types[id.local_id]; + return &types[id.id]; } IndexedFuncDef* IndexedFile::Resolve(FuncId id) { - return &funcs[id.local_id]; + return &funcs[id.id]; } IndexedVarDef* IndexedFile::Resolve(VarId id) { - return &vars[id.local_id]; + return &vars[id.id]; } std::string IndexedFile::ToString() { @@ -311,7 +321,7 @@ void VisitDeclForTypeUsageVisitorHandler(clang::Cursor cursor, VisitDeclForTypeU if (param->is_interesting) { IndexedTypeDef* ref_type_def = db->Resolve(ref_type_id); - Location loc = db->file_db.Resolve(cursor, true /*interesting*/); + Location loc = db->file_db->Resolve(cursor, true /*interesting*/); ref_type_def->AddUsage(loc); } } @@ -434,7 +444,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { var_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, var_def->def.short_name); //} - Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/); + Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/); if (decl->isDefinition) var_def->def.definition = decl_loc; else @@ -480,7 +490,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { func_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, func_def->def.short_name); //} - Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/); + Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/); if (decl->isDefinition) func_def->def.definition = decl_loc; else @@ -597,7 +607,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { type_def->def.short_name = decl->entityInfo->name; type_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, type_def->def.short_name); - Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/); + Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/); type_def->def.definition = decl_loc.WithInteresting(false); type_def->AddUsage(decl_loc); break; @@ -631,7 +641,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { // } assert(decl->isDefinition); - Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/); + Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/); type_def->def.definition = decl_loc.WithInteresting(false); type_def->AddUsage(decl_loc); @@ -660,7 +670,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { } default: - std::cout << "!! Unhandled indexDeclaration: " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db.Resolve(decl->loc, false /*interesting*/).ToString() << std::endl; + std::cout << "!! Unhandled indexDeclaration: " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db->Resolve(decl->loc, false /*interesting*/).ToString() << std::endl; std::cout << " entityInfo->kind = " << decl->entityInfo->kind << std::endl; std::cout << " entityInfo->USR = " << decl->entityInfo->USR << std::endl; if (decl->declAsContainer) @@ -696,7 +706,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re { VarId var_id = db->ToVarId(ref->referencedEntity->cursor); IndexedVarDef* var_def = db->Resolve(var_id); - var_def->uses.push_back(db->file_db.Resolve(ref->loc, false /*interesting*/)); + var_def->uses.push_back(db->file_db->Resolve(ref->loc, false /*interesting*/)); break; } @@ -718,7 +728,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re // Don't report duplicate usages. // TODO: search full history? - Location loc = db->file_db.Resolve(ref->loc, false /*interesting*/); + Location loc = db->file_db->Resolve(ref->loc, false /*interesting*/); if (param->last_func_usage_location == loc) break; param->last_func_usage_location = loc; @@ -746,8 +756,8 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re if (ref->referencedEntity->kind == CXIdxEntity_CXXConstructor || ref->referencedEntity->kind == CXIdxEntity_CXXDestructor) { - Location parent_loc = db->file_db.Resolve(ref->parentEntity->cursor, true /*interesting*/); - Location our_loc = db->file_db.Resolve(ref->loc, true /*is_interesting*/); + Location parent_loc = db->file_db->Resolve(ref->parentEntity->cursor, true /*interesting*/); + Location our_loc = db->file_db->Resolve(ref->loc, true /*is_interesting*/); if (!parent_loc.IsEqualTo(our_loc)) { IndexedFuncDef* called_def = db->Resolve(called_id); assert(called_def->def.declaring_type.has_value()); @@ -783,16 +793,16 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re // Foo f; // } // - referenced_def->AddUsage(db->file_db.Resolve(ref->loc, false /*interesting*/)); + referenced_def->AddUsage(db->file_db->Resolve(ref->loc, false /*interesting*/)); break; } default: - std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; + std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; std::cout << " ref->referencedEntity->kind = " << ref->referencedEntity->kind << std::endl; if (ref->parentEntity) std::cout << " ref->parentEntity->kind = " << ref->parentEntity->kind << std::endl; - std::cout << " ref->loc = " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; + std::cout << " ref->loc = " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; std::cout << " ref->kind = " << ref->kind << std::endl; if (ref->parentEntity) std::cout << " parentEntity = " << clang::Cursor(ref->parentEntity->cursor).ToString() << std::endl; @@ -807,7 +817,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re static bool DUMP_AST = true; -IndexedFile Parse(std::string filename, std::vector args) { +IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector args) { clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/); clang::TranslationUnit tu(index, filename, args); @@ -830,7 +840,7 @@ IndexedFile Parse(std::string filename, std::vector args) { */ }; - IndexedFile db; + IndexedFile db(usr_to_id, file_db); NamespaceHelper ns; IndexParam param(&db, &ns); clang_indexTranslationUnit(index_action, ¶m, callbacks, sizeof(callbacks), @@ -971,7 +981,9 @@ int main(int argc, char** argv) { // Run test. std::cout << "[START] " << path << std::endl; - IndexedFile db = Parse(path, {}); + UsrToIdResolver usr_to_id(1); + FileDb file_db(1); + IndexedFile db = Parse(&usr_to_id, &file_db, path, {}); std::string actual_output = db.ToString(); //WriteToFile("output.json", actual_output); diff --git a/indexer.h b/indexer.h index a4662972..acd975c5 100644 --- a/indexer.h +++ b/indexer.h @@ -23,25 +23,154 @@ struct IndexedTypeDef; struct IndexedFuncDef; struct IndexedVarDef; -using FileId = int64_t; using namespace std::experimental; + +using GroupId = int; + +template +struct Id { + GroupId group; + uint64_t id; + + Id() : id(0) {} // Needed for containers. Do not use directly. + Id(GroupId group, uint64_t id) : group(group), id(id) {} + + bool operator==(const Id& other) const { + assert(group == other.group && "Cannot compare Ids from different groups"); + return id == other.id; + } + + bool operator<(const Id& other) const { + assert(group == other.group); + return id < other.id; + } +}; + +namespace std { + template + struct hash> { + size_t operator()(const Id& k) const { + return ((hash()(k.id) ^ (hash()(k.group) << 1)) >> 1); + } + }; +} + + +template +bool operator==(const Id& a, const Id& b) { + assert(a.group == b.group && "Cannot compare Ids from different groups"); + return a.id == b.id; +} + +struct _FakeFileType {}; +using FileId = Id<_FakeFileType>; +using TypeId = Id; +using FuncId = Id; +using VarId = Id; + +struct Location { + bool interesting; + int raw_file_group; + int raw_file_id; + int line; + int column; + + Location() { + interesting = false; + raw_file_group = -1; + raw_file_id = -1; + line = -1; + column = -1; + } + + Location(bool interesting, FileId file, uint32_t line, uint32_t column) { + this->interesting = interesting; + this->raw_file_group = file.group; + this->raw_file_id = file.id; + this->line = line; + this->column = column; + } + + FileId file_id() { + return FileId(raw_file_id, raw_file_group); + } + + std::string ToString() { + // Output looks like this: + // + // *1:2:3 + // + // * => interesting + // 1 => file id + // 2 => line + // 3 => column + + std::string result; + if (interesting) + result += '*'; + result += std::to_string(raw_file_id); + result += ':'; + result += std::to_string(line); + result += ':'; + result += std::to_string(column); + return result; + } + + // Compare two Locations and check if they are equal. Ignores the value of + // |interesting|. + // operator== doesn't seem to work properly... + bool IsEqualTo(const Location& o) const { + // When comparing, ignore the value of |interesting|. + return + raw_file_group == o.raw_file_group && + raw_file_id == o.raw_file_id && + line == o.line && + column == o.column; + } + + bool operator==(const Location& o) const { + return IsEqualTo(o); + } + bool operator<(const Location& o) const { + return + interesting < o.interesting && + raw_file_group < o.raw_file_group && + raw_file_id < o.raw_file_id && + line < o.line && + column < o.column; + } + + Location WithInteresting(bool interesting) { + Location result = *this; + result.interesting = interesting; + return result; + } +}; + +#if false // TODO: Move off of this weird wrapper, use struct with custom wrappers // directly. BEGIN_BITFIELD_TYPE(Location, uint64_t) -ADD_BITFIELD_MEMBER(interesting, /*start:*/ 0, /*len:*/ 1); // 2 values -ADD_BITFIELD_MEMBER(file_id, /*start:*/ 1, /*len:*/ 29); // 536,870,912 values -ADD_BITFIELD_MEMBER(line, /*start:*/ 30, /*len:*/ 20); // 1,048,576 values -ADD_BITFIELD_MEMBER(column, /*start:*/ 50, /*len:*/ 14); // 16,384 values +ADD_BITFIELD_MEMBER(interesting, /*start:*/ 0, /*len:*/ 1); // 2 values +ADD_BITFIELD_MEMBER(raw_file_group, /*start:*/ 1, /*len:*/ 4); // 16 values, ok if they wrap around. +ADD_BITFIELD_MEMBER(raw_file_id, /*start:*/ 5, /*len:*/ 25); // 33,554,432 values +ADD_BITFIELD_MEMBER(line, /*start:*/ 30, /*len:*/ 20); // 1,048,576 values +ADD_BITFIELD_MEMBER(column, /*start:*/ 50, /*len:*/ 14); // 16,384 values -Location(bool interesting, FileId file_id, uint32_t line, uint32_t column) { +Location(bool interesting, FileId file, uint32_t line, uint32_t column) { this->interesting = interesting; - this->file_id = file_id; + this->raw_file_group = file.group; + this->raw_file_id = file.id; this->line = line; this->column = column; } +FileId file_id() { + return FileId(raw_file_id, raw_file_group); +} + std::string ToString() { // Output looks like this: // @@ -55,7 +184,7 @@ std::string ToString() { std::string result; if (interesting) result += '*'; - result += std::to_string(file_id); + result += std::to_string(raw_file_id); result += ':'; result += std::to_string(line); result += ':'; @@ -78,15 +207,17 @@ Location WithInteresting(bool interesting) { } END_BITFIELD_TYPE() +#endif -struct IndexedFileDb { +struct FileDb { + GroupId group; std::unordered_map file_path_to_file_id; std::unordered_map file_id_to_file_path; - IndexedFileDb() { + FileDb(GroupId group) : group(group) { // Reserve id 0 for unfound. - file_path_to_file_id[""] = 0; - file_id_to_file_path[0] = ""; + file_path_to_file_id[""] = FileId(group, 0); + file_id_to_file_path[FileId(group, 0)] = ""; } Location Resolve(const CXSourceLocation& cx_loc, bool interesting) { @@ -103,7 +234,7 @@ struct IndexedFileDb { file_id = it->second; } else { - file_id = file_path_to_file_id.size(); + file_id = FileId(group, file_path_to_file_id.size()); file_path_to_file_id[path] = file_id; file_id_to_file_path[file_id] = path; } @@ -128,34 +259,32 @@ struct IndexedFileDb { template -struct LocalId { - uint64_t local_id; +struct Ref { + Id id; + Location loc; - LocalId() : local_id(0) {} // Needed for containers. Do not use directly. - explicit LocalId(uint64_t local_id) : local_id(local_id) {} + Ref(Id id, Location loc) : id(id), loc(loc) {} - bool operator==(const LocalId& other) { - return local_id == other.local_id; + bool operator==(const Ref& other) { + return id == other.id && loc == other.loc; + } + bool operator!=(const Ref& other) { + return !(*this == other); + } + bool operator<(const Ref& other) const { + return id < other.id && loc < other.loc; } }; template -bool operator==(const LocalId& a, const LocalId& b) { - return a.local_id == b.local_id; +bool operator==(const Ref& a, const Ref& b) { + return a.id == b.id && a.loc == b.loc; +} +template +bool operator!=(const Ref& a, const Ref& b) { + return !(a == b); } -using TypeId = LocalId; -using FuncId = LocalId; -using VarId = LocalId; - - -template -struct Ref { - LocalId id; - Location loc; - - Ref(LocalId id, Location loc) : id(id), loc(loc) {} -}; using TypeRef = Ref; using FuncRef = Ref; using VarRef = Ref; @@ -197,6 +326,24 @@ struct TypeDefDefinitionData { std::vector vars; TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {} + + bool operator==(const TypeDefDefinitionData& other) const { + return + id == other.id && + usr == other.usr && + short_name == other.short_name && + qualified_name == other.qualified_name && + definition == other.definition && + alias_of == other.alias_of && + parents == other.parents && + types == other.types && + funcs == other.funcs && + vars == other.vars; + } + + bool operator!=(const TypeDefDefinitionData& other) const { + return !(*this == other); + } }; struct IndexedTypeDef { @@ -213,8 +360,21 @@ struct IndexedTypeDef { IndexedTypeDef(TypeId id, const std::string& usr); void AddUsage(Location loc, bool insert_if_not_present = true); + + bool operator<(const IndexedTypeDef& other) const { + return def.id < other.def.id; + } }; +namespace std { + template <> + struct hash { + size_t operator()(const IndexedTypeDef& k) const { + return hash()(k.def.usr); + } + }; +} + struct FuncDefDefinitionData { // General metadata. FuncId id; @@ -238,6 +398,23 @@ struct FuncDefDefinitionData { FuncDefDefinitionData(FuncId id, const std::string& usr) : id(id), usr(usr) { assert(usr.size() > 0); } + + bool operator==(const FuncDefDefinitionData& other) const { + return + id == other.id && + usr == other.usr && + short_name == other.short_name && + qualified_name == other.qualified_name && + definition == other.definition && + declaring_type == other.declaring_type && + base == other.base && + locals == other.locals && + callees == other.callees; + } + + bool operator!=(const FuncDefDefinitionData& other) const { + return !(*this == other); + } }; struct IndexedFuncDef { @@ -265,8 +442,22 @@ struct IndexedFuncDef { IndexedFuncDef(FuncId id, const std::string& usr) : def(id, usr) { assert(usr.size() > 0); } + + bool operator<(const IndexedFuncDef& other) const { + return def.id < other.def.id; + } }; +namespace std { + template <> + struct hash { + size_t operator()(const IndexedFuncDef& k) const { + return hash()(k.def.usr); + } + }; +} + + struct VarDefDefinitionData { // General metadata. VarId id; @@ -285,6 +476,22 @@ struct VarDefDefinitionData { optional declaring_type; VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {} + + bool operator==(const VarDefDefinitionData& other) const { + return + id == other.id && + usr == other.usr && + short_name == other.short_name && + qualified_name == other.qualified_name && + declaration == other.declaration && + definition == other.definition && + variable_type == other.variable_type && + declaring_type == other.declaring_type; + } + + bool operator!=(const VarDefDefinitionData& other) const { + return !(*this == other); + } }; struct IndexedVarDef { @@ -292,29 +499,47 @@ struct IndexedVarDef { // Usages. std::vector uses; - + bool is_system_def = false; IndexedVarDef(VarId id, const std::string& usr) : def(id, usr) { assert(usr.size() > 0); } + + bool operator<(const IndexedVarDef& other) const { + return def.id < other.def.id; + } }; +namespace std { + template <> + struct hash { + size_t operator()(const IndexedVarDef& k) const { + return hash()(k.def.usr); + } + }; +} -struct IndexedFile { +struct UsrToIdResolver { // NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs // to get fixed up when inserting into the real db. + GroupId group; std::unordered_map usr_to_type_id; std::unordered_map usr_to_func_id; std::unordered_map usr_to_var_id; + UsrToIdResolver(GroupId group) : group(group) {} +}; + +struct IndexedFile { + FileDb* file_db; + UsrToIdResolver* usr_to_id; + std::vector types; std::vector funcs; std::vector vars; - IndexedFileDb file_db; - - IndexedFile(); + IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db); TypeId ToTypeId(const std::string& usr); FuncId ToFuncId(const std::string& usr); @@ -332,32 +557,4 @@ struct IndexedFile { -// TODO: Maybe instead of clearing/adding diffs, we should just clear out the -// entire previous index and readd the new one? That would be simpler. -// TODO: ^^^ I don't think we can do this. It will probably stall the main -// indexer for far too long since we will have to iterate over tons of -// data. -// TODO: Idea: when indexing and joining to the main db, allow many dbs that -// are joined to. So that way even if the main db is busy we can -// still be joining. Joining the partially joined db to the main -// db should be faster since we will have larger data lanes to use. -struct IndexedTypeDefDiff {}; -struct IndexedFuncDefDiff {}; -struct IndexedVarDefDiff {}; - -struct IndexedFileDiff { - std::vector removed_types; - std::vector removed_funcs; - std::vector removed_vars; - - std::vector added_types; - std::vector added_funcs; - std::vector added_vars; - - // TODO: Instead of change, maybe we just remove and then add again? not sure. - std::vector changed_types; - std::vector changed_funcs; - std::vector changed_vars; -}; - -IndexedFile Parse(std::string filename, std::vector args); \ No newline at end of file +IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector args); \ No newline at end of file diff --git a/query.cc b/query.cc index 334b19fa..4b32c93f 100644 --- a/query.cc +++ b/query.cc @@ -1,19 +1,200 @@ #include "query.h" #include +#include +#include #include #include #include +#include "function_output_iterator.hpp" #include "compilation_database_loader.h" #include "optional.h" #include "indexer.h" -struct FileDatabase { - std::unordered_map filename_to_file_id; - std::unordered_map file_id_to_filename; +//#define CATCH_CONFIG_MAIN +//#include "catch.hpp" + +// TODO: Make all copy constructors explicit. + +struct IdMap { + // The first vector is indexed by TId::group. + // The second vector is indexed by TId::id. + template + using GroupMap = std::vector>; + + GroupId target_group; + int64_t next_file_id = 1; + int64_t next_type_id = 1; + int64_t next_func_id = 1; + int64_t next_var_id = 1; + + GroupMap remap_file_id; + GroupMap remap_type_id; + GroupMap remap_func_id; + GroupMap remap_var_id; + + IdMap(GroupId target_group) : target_group(target_group) {} + + template + inline TId GenericRemap(GroupMap* map, int64_t* next_id, TId from) { + // PERF: If this function is a hot-spot we can pull the group computation + // out, ie, + // + // IdMap id_map; + // GroupIdMap group_map = id_map.ResolveIdGroup(file.group) + // for (...) + // group_map.Remap(id) + + // Find the group that |from| belongs to. Create groups if needed. + if (from.group >= map->size()) + map->resize(from.group + 1); + + // If the group doesn't have an ID already mapped out for |from|, map it. + /* + // TODO: The concern with this approach is that it going to waste huge + // amounts of memory, because the first 16k+ ids can be unused. + std::vector& group = (*map)[from.group]; + + if (from.id >= group.size()) { + group.reserve(from.id + 1); + for (size_t i = group.size(); i < from.id; ++i) + group.emplace_back(TId(target_group, (*next_id)++)); + } + */ + + std::unordered_map group = (*map)[from.group]; + + // Lookup the id from the group or add it. + auto it = group.find(from); + if (it == group.end()) { + TId result(target_group, (*next_id)++); + group[from] = result; + return result; + } + return it->second; + } + + template + inline std::vector GenericVectorRemap(GroupMap* map, int64_t* next_id, const std::vector& from) { + if (from.empty()) + return {}; + + int group_id = from[0].group; + if (group_id >= map->size()) + map->resize(group_id + 1); + + std::unordered_map group = (*map)[group_id]; + + std::vector result; + result.reserve(from.size()); + for (TId id : from) { + // Lookup the id from the group or add it. + auto it = group.find(id); + if (it == group.end()) { + TId new_id(target_group, (*next_id)++); + group[id] = new_id; + result.push_back(new_id); + } + else { + result.push_back(it->second); + } + } + + return result; + } + + FileId Remap(FileId from) { + return GenericRemap(&remap_file_id, &next_file_id, from); + } + Location Remap(Location from) { + FileId file = Remap(from.file_id()); + from.raw_file_group = file.group; + from.raw_file_id = file.id; + return from; + } + TypeId Remap(TypeId from) { + return GenericRemap(&remap_type_id, &next_type_id, from); + } + FuncId Remap(FuncId from) { + return GenericRemap(&remap_func_id, &next_func_id, from); + } + VarId Remap(VarId from) { + return GenericRemap(&remap_var_id, &next_var_id, from); + } + FuncRef Remap(FuncRef from) { + from.id = Remap(from.id); + from.loc = Remap(from.loc); + return from; + } + TypeDefDefinitionData Remap(TypeDefDefinitionData def) { + def.id = Remap(def.id); + if (def.definition) + def.definition = Remap(def.definition.value()); + if (def.alias_of) + def.alias_of = Remap(def.alias_of.value()); + def.parents = Remap(def.parents); + def.types = Remap(def.types); + def.funcs = Remap(def.funcs); + def.vars = Remap(def.vars); + return def; + } + FuncDefDefinitionData Remap(FuncDefDefinitionData def) { + def.id = Remap(def.id); + if (def.definition) + def.definition = Remap(def.definition.value()); + if (def.declaring_type) + def.declaring_type = Remap(def.declaring_type.value()); + if (def.base) + def.base = Remap(def.base.value()); + def.locals = Remap(def.locals); + def.callees = Remap(def.callees); + return def; + } + VarDefDefinitionData Remap(VarDefDefinitionData def) { + def.id = Remap(def.id); + if (def.declaration) + def.declaration = Remap(def.declaration.value()); + if (def.definition) + def.definition = Remap(def.definition.value()); + if (def.variable_type) + def.variable_type = Remap(def.variable_type.value()); + if (def.declaring_type) + def.declaring_type = Remap(def.declaring_type.value()); + return def; + } + + + //std::vector Remap(const std::vector& from) { + // return GenericVectorRemap(&remap_file_id, &next_file_id, from); + //} + std::vector Remap(const std::vector& from) { + std::vector result; + result.reserve(from.size()); + for (Location l : from) + result.push_back(Remap(l)); + return result; + } + std::vector Remap(const std::vector& from) { + return GenericVectorRemap(&remap_type_id, &next_type_id, from); + } + std::vector Remap(const std::vector& from) { + return GenericVectorRemap(&remap_func_id, &next_func_id, from); + } + std::vector Remap(const std::vector& from) { + return GenericVectorRemap(&remap_var_id, &next_var_id, from); + } + std::vector Remap(const std::vector& from) { + std::vector result; + result.reserve(from.size()); + for (FuncRef r : from) + result.push_back(Remap(r)); + return result; + } }; + + enum class SymbolKind { Type, Func, Var }; struct SymbolIdx { SymbolKind kind; @@ -45,6 +226,20 @@ struct MergeableUpdate { std::vector to_remove; }; +template +MergeableUpdate MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector& removed, const std::vector& added) { + MergeableUpdate update; + update.id = id_map->Remap(symbol_id); + update.to_remove = id_map->Remap(removed); + update.to_add = id_map->Remap(added); + return update; +} + +// NOTE: When not inside of a |def| object, there can be duplicates of the same +// information if that information is contributed from separate sources. +// If we need to avoid this duplication in the future, we will have to +// add a refcount. + struct QueryableTypeDef { TypeDefDefinitionData def; std::vector derived; @@ -53,6 +248,12 @@ struct QueryableTypeDef { using DefUpdate = TypeDefDefinitionData; using DerivedUpdate = MergeableUpdate; using UsesUpdate = MergeableUpdate; + + QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed) + : def(id_map.Remap(indexed.def)) { + derived = id_map.Remap(indexed.derived); + uses = id_map.Remap(indexed.uses); + } }; struct QueryableFuncDef { @@ -67,6 +268,14 @@ struct QueryableFuncDef { using DerivedUpdate = MergeableUpdate; using CallersUpdate = MergeableUpdate; using UsesUpdate = MergeableUpdate; + + QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed) + : def(id_map.Remap(indexed.def)) { + declarations = id_map.Remap(indexed.declarations); + derived = id_map.Remap(indexed.derived); + callers = id_map.Remap(indexed.callers); + uses = id_map.Remap(indexed.uses); + } }; struct QueryableVarDef { @@ -75,9 +284,16 @@ struct QueryableVarDef { using DefUpdate = VarDefDefinitionData; using UsesUpdate = MergeableUpdate; + + QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed) + : def(id_map.Remap(indexed.def)) { + uses = id_map.Remap(indexed.uses); + } }; struct QueryableFile { + FileId file_id; + // Symbols declared in the file. std::vector declared_symbols; // Symbols which have definitions in the file. @@ -91,6 +307,8 @@ struct QueryableEntry { // The query database is heavily optimized for fast queries. It is stored // in-memory. struct QueryableDatabase { + IdMap id_map; + // Indicies between lookup vectors are related to symbols, ie, index 5 in // |qualified_names| matches index 5 in |symbols|. std::vector qualified_names; @@ -102,220 +320,324 @@ struct QueryableDatabase { std::vector vars; // |files| is indexed by FileId. Retrieve a FileId from a path using - // |file_locator|. - FileDatabase file_locator; + // |file_db|. + FileDb file_db; std::vector files; + + // When importing data into the global db we need to remap ids from an + // arbitrary group into the global group. + IdMap local_id_group_to_global_id_group; }; -struct Query { -}; + + + + + struct CachedIndexedFile { // Path to the file indexed. std::string path; - - // Full in-memory storage for the index. Empty if not loaded into memory. - // |path| can be used to fetch the index from disk. - optional index; + // GroupId of the indexed file. + GroupId group; + + // TODO: Make sure that |previous_index| and |current_index| use the same id + // to USR mapping. This lets us greatly speed up difference computation. + + // The previous index. This is used for index updates, so we only apply a + // an update diff when changing the global db. + optional previous_index; + IndexedFile current_index; + + CachedIndexedFile(const IndexedFile& indexed) + : group(indexed.usr_to_id->group), current_index(indexed) {} }; -struct DocumentDiff { +struct IndexUpdate { + IdMap* id_map; + + // Type updates. + std::vector types_removed; + std::vector types_added; + std::vector types_def_changed; + std::vector types_derived; + std::vector types_uses; + + // Function updates. + std::vector funcs_removed; + std::vector funcs_added; + std::vector funcs_def_changed; + std::vector funcs_declarations; + std::vector funcs_derived; + std::vector funcs_callers; + std::vector funcs_uses; + + // Variable updates. + std::vector vars_removed; + std::vector vars_added; + std::vector vars_def_changed; + std::vector vars_uses; + + IndexUpdate(IdMap* id_map) : id_map(id_map) {} }; -// Compute a diff between |original| and |updated|. -//rapidjson::Document DiffIndex(rapidjson::Document original, rapidjson::Document updated) { - -//} - - -bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) { -#define PARSE_AS(name, string) \ - if (content == #string) { \ - *obj = name; \ - return true; \ - } - - PARSE_AS(PreferredSymbolLocation::Declaration, "declaration"); - PARSE_AS(PreferredSymbolLocation::Definition, "definition"); - - return false; -#undef PARSE_AS +template +TValue* TryFind(std::unordered_set& set, TValue* value) { + // TODO: Make |value| a const ref? + auto it = set.find(value); + if (it == set.end()) + return nullptr; + return *it; } -bool ParseCommand(const std::string& content, Command* obj) { -#define PARSE_AS(name, string) \ - if (content == #string) { \ - *obj = name; \ - return true; \ - } - - PARSE_AS(Command::Callees, "callees"); - PARSE_AS(Command::Callers, "callers"); - PARSE_AS(Command::FindAllUsages, "find-all-usages"); - PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages"); - PARSE_AS(Command::GotoReferenced, "goto-referenced"); - PARSE_AS(Command::Hierarchy, "hierarchy"); - PARSE_AS(Command::Outline, "outline"); - PARSE_AS(Command::Search, "search"); - - return false; -#undef PARSE_AS +template +std::unordered_set CreateSet(std::vector& elements) { + std::unordered_set result; + result.reserve(elements.size()); + for (T& element : elements) + result.insert(&element); + return result; } + +// Compares |previous| and |current|, adding all elements that are +// in |previous| but not |current| to |removed|, and all elements +// that are in |current| but not |previous| to |added|. +// +// Returns true iff |removed| or |added| are non-empty. +template +bool ComputeDifferenceForUpdate( + std::vector& previous, std::vector& current, + std::vector* removed, std::vector* added) { + + // We need to sort to use std::set_difference. + std::sort(previous.begin(), previous.end()); + std::sort(current.begin(), current.end()); + + // Returns the elements in |previous| that are not in |current|. + std::set_difference( + previous.begin(), previous.end(), + current.begin(), current.end(), + std::back_inserter(*removed)); + // Returns the elmeents in |current| that are not in |previous|. + std::set_difference( + current.begin(), current.end(), + previous.begin(), previous.end(), + std::back_inserter(*added)); + + return !removed->empty() || !added->empty(); +} + +#if false +template +void CompareGroups( + std::vector& previous_data, std::vector& current_data, + std::function on_removed, std::function on_added, std::function on_found) { + // TODO: It could be faster to use set_intersection and set_difference to + // compute these values. We will have to presort the input by ID, though. + + // Precompute sets so we stay around O(3N) instead of O(N^2). Otherwise + // lookups for duplicate elements will be O(N) and we need them to be O(1). + std::unordered_set previous_set = CreateSet(previous_data); + std::unordered_set current_set = CreateSet(current_data); + + // TODO: TryFind is just comparing pointers which obviously fails because they point to different memory... + + for (T* current_entry : current_set) { + // Possibly updated. + if (T* previous_entry = TryFind(previous_set, current_entry)) + on_found(previous_entry, current_entry); + // Added + else + on_added(current_entry); + } + for (T* previous_entry : previous_set) { + // Removed + if (!TryFind(current_set, previous_entry)) + on_removed(previous_entry); +} +} +#endif + +template +void CompareGroups( + std::vector& previous_data, std::vector& current_data, + std::function on_removed, std::function on_added, std::function on_found) { + // TODO: It could be faster to use set_intersection and set_difference to + // compute these values. We will have to presort the input by ID, though. + + std::sort(previous_data.begin(), previous_data.end()); + std::sort(current_data.begin(), current_data.end()); + + /* + std::set_difference( + current_data.begin(), current_data.end(), + previous_data.begin(), previous_data.end(), + boost::make_function_output_iterator([](const T& val) { + + })); + */ + + auto prev_it = previous_data.begin(); + auto curr_it = current_data.begin(); + while (prev_it != previous_data.end() && curr_it != current_data.end()) { + // same id + if (prev_it->def.id == curr_it->def.id) { + on_found(&*prev_it, &*curr_it); + ++prev_it; + ++curr_it; + } + + // prev_id is smaller - prev_it has data curr_it does not have. + else if (prev_it->def.id < curr_it->def.id) { + on_removed(&*prev_it); + ++prev_it; + } + + // prev_id is bigger - curr_it has data prev_it does not have. + else { + on_added(&*curr_it); + ++curr_it; + } + } + + // if prev_it still has data, that means it is not in curr_it and was removed. + while (prev_it != previous_data.end()) { + on_removed(&*prev_it); + ++prev_it; + } + + // if curr_it still has data, that means it is not in prev_it and was added. + while (curr_it != current_data.end()) { + on_added(&*curr_it); + ++curr_it; + } +} + +// TODO: make this const correct. +IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& current) { +#define JOIN(a, b) a##b + // |query_name| is the name of the variable on the query type. + // |index_name| is the name of the variable on the index type. + // |type| is the type of the variable. +#define PROCESS_UPDATE_DIFF(query_name, index_name, type) \ + { \ + /* Check for changes. */ \ + std::vector removed, added; \ + bool did_add = ComputeDifferenceForUpdate(JOIN(previous->, index_name), JOIN(current->, index_name), &removed, &added); \ + if (did_add) {\ + std::cout << "Adding mergeable update on " << current->def.short_name << " (" << current->def.usr << ") for field " << #index_name << std::endl; \ + JOIN(update., query_name).push_back(MakeMergeableUpdate(id_map, current->def.id, removed, added)); \ + } \ + } + + assert(previous.usr_to_id == current.usr_to_id); + assert(previous.file_db == current.file_db); + IndexUpdate update(id_map); + + // Types + CompareGroups(previous.types, current.types, + /*onRemoved:*/[&update, &id_map](IndexedTypeDef* def) { + update.types_removed.push_back(id_map->Remap(def->def.id)); + }, + /*onAdded:*/[&update, &id_map](IndexedTypeDef* def) { + update.types_added.push_back(QueryableTypeDef(*id_map, *def)); + }, + /*onChanged:*/[&update, &id_map](IndexedTypeDef* previous, IndexedTypeDef* current) { + if (previous->def != current->def) + update.types_def_changed.push_back(id_map->Remap(current->def)); + + PROCESS_UPDATE_DIFF(types_derived, derived, TypeId); + PROCESS_UPDATE_DIFF(types_uses, uses, Location); + }); + + // Functions + CompareGroups(previous.funcs, current.funcs, + /*onRemoved:*/[&update, &id_map](IndexedFuncDef* def) { + update.funcs_removed.push_back(id_map->Remap(def->def.id)); + }, + /*onAdded:*/[&update, &id_map](IndexedFuncDef* def) { + update.funcs_added.push_back(QueryableFuncDef(*id_map, *def)); + }, + /*onChanged:*/[&update, &id_map](IndexedFuncDef* previous, IndexedFuncDef* current) { + if (previous->def != current->def) + update.funcs_def_changed.push_back(id_map->Remap(current->def)); + PROCESS_UPDATE_DIFF(funcs_declarations, declarations, Location); + PROCESS_UPDATE_DIFF(funcs_derived, derived, FuncId); + PROCESS_UPDATE_DIFF(funcs_callers, callers, FuncRef); + PROCESS_UPDATE_DIFF(funcs_uses, uses, Location); + }); + + // Variables + CompareGroups(previous.vars, current.vars, + /*onRemoved:*/[&update, &id_map](IndexedVarDef* def) { + update.vars_removed.push_back(id_map->Remap(def->def.id)); + }, + /*onAdded:*/[&update, &id_map](IndexedVarDef* def) { + update.vars_added.push_back(QueryableVarDef(*id_map, *def)); + }, + /*onChanged:*/[&update, &id_map](IndexedVarDef* previous, IndexedVarDef* current) { + if (previous->def != current->def) + update.vars_def_changed.push_back(id_map->Remap(current->def)); + PROCESS_UPDATE_DIFF(vars_uses, uses, Location); + }); + + return update; + +#undef PROCESS_UPDATE_DIFF +#undef JOIN +} + +// Merge the contents of |source| into |destination|. +void Merge(const IndexUpdate& source, IndexUpdate* destination) { + // TODO. +} + +// Insert the contents of |update| into |db|. +void ApplyIndexUpdate(const IndexUpdate& update, QueryableDatabase* db) { + +} + + + +int ma333in(int argc, char** argv) { + // TODO: Unify UserToIdResolver and FileDb + UsrToIdResolver usr_to_id(1); + FileDb file_db(1); + + IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {}); + std::cout << indexed_file_a.ToString() << std::endl; + + std::cout << std::endl; + IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {}); + std::cout << indexed_file_b.ToString() << std::endl; + + // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate. + IdMap dest_ids(2); + IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b); + + return 0; +} + + + + +// TODO: Idea: when indexing and joining to the main db, allow many dbs that +// are joined to. So that way even if the main db is busy we can +// still be joining. Joining the partially joined db to the main +// db should be faster since we will have larger data lanes to use. // TODO: I think we can run libclang multiple times in one process. So we might // only need two processes. Still, for perf reasons it would be good if -// we could stay in one process. +// we could stay in one process. We could probably just use shared +// memory. May want to run libclang in separate process to protect from +// crashes/issues there. // TODO: allow user to store configuration as json? file in home dir; also // allow local overrides (scan up dirs) // TODO: add opt to dump config when starting (--dump-config) -// TODO: allow user to decide some indexer choices, ie, do we define -// TODO: may want to run indexer in separate process to avoid indexer/compiler crashes? - -std::unordered_map ParseOptions(int argc, char** argv) { - std::unordered_map output; - - std::string previous_arg; - - for (int i = 1; i < argc; ++i) { - std::string arg = argv[i]; - - if (arg[0] != '-') { - if (previous_arg.size() == 0) { - std::cerr << "Invalid arguments; switches must start with -" << std::endl; - exit(1); - } - - output[previous_arg] = arg; - previous_arg = ""; - } - else { - output[arg] = ""; - previous_arg = arg; - } - } - - return output; -} - -bool HasOption(const std::unordered_map& options, const std::string& option) { - return options.find(option) != options.end(); -} - -int main2(int argc, char** argv) { - std::unordered_map options = ParseOptions(argc, argv); - - if (argc == 1 || options.find("--help") != options.end()) { - std::cout << R"help(clang-indexer help: - - General: - --help Print this help information. - --help-commands - Print all available query commands. - --project Path to compile_commands.json. Needed for the server, and - optionally by clients if there are multiple servers running. - --print-config - Emit all configuration data this executable is using. - - - Server: - --server If present, this binary will run in server mode. The binary - will not return until killed or an exit is requested. The - server computes and caches an index of the entire program - which is then queried by short-lived client processes. A - client is created by running this binary with a --command - flag. - --cache-dir Directory to cache the index and other useful information. If - a previous cache is present, the database will try to reuse - it. If this flag is not present, the database will be - in-memory only. - --threads Number of threads to use for indexing and querying tasks. - This value is optional; a good estimate is computed by - default. - - - Client: - --command Execute a query command against the index. See - --command-help for a listing of valid commands and a - description of what they do. Presence of this flag indicates - that the indexer is in client mode; this flag is mutually - exclusive with --server. - --location Location of the query. Some commands require only a file, - other require a line and column as well. Format is - filename[:line:column]. For example, "foobar.cc" and - "foobar.cc:1:10" are valid inputs. - --preferred-symbol-location - When looking up symbols, try to return either the - 'declaration' or the 'definition'. Defaults to 'definition'. -)help"; - exit(0); - } - - if (HasOption(options, "--help-commands")) { - std::cout << R"(Available commands: - - callees: - callers: - Emit all functions (with location) that this function calls ("callees") or - that call this function ("callers"). Requires a location. - - find-all-usages: - Emit every usage of the given symbol. This is intended to support a rename - refactoring. This output contains many uninteresting usages of symbols; - prefer find-interesting-usges. Requires a location. - - find-interesting-usages: - Emit only usages of the given symbol which are semantically interesting. - Requires a location. - - goto-referenced: - Find an associated reference (either definition or declaration) for the - given symbol. Requires a location. - - hierarchy: - List the type hierarchy (ie, inherited and derived members) for the given - method or type. Requires a location. - - outline: - Emit a file outline, listing all of the symbols in the file. - - search: - Search for a symbol by name. -)"; - exit(0); - } - - if (HasOption(options, "--project")) { - std::vector entries = LoadCompilationEntriesFromDirectory(options["--project"]); - - - std::vector dbs; - for (const CompilationEntry& entry : entries) { - std::cout << "Parsing " << entry.filename << std::endl; - IndexedFile db = Parse(entry.filename, entry.args); - - dbs.emplace_back(db); - std::cout << db.ToString() << std::endl << std::endl; - } - - std::cin.get(); - exit(0); - } - - if (HasOption(options, "--command")) { - Command command; - if (!ParseCommand(options["--command"], &command)) - Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands"); - - - } - - std::cout << "Invalid arguments. Try --help."; - exit(1); - return 0; -} +// TODO: allow user to decide some indexer choices, ie, do we mark prototype parameters as usages? diff --git a/serializer.cc b/serializer.cc index 9176044f..02d4a48b 100644 --- a/serializer.cc +++ b/serializer.cc @@ -2,110 +2,6 @@ #include "indexer.h" -#if false -template -void Emit(Reader& a, const char* key, T& v) { - static_assert(false); // Must be specialized. -} -template -void Emit(Writer& a, const char* key, T& v) { - static_assert(false); // Must be specialized. -} - -template<> -void Emit(Reader& r, const char* key, int& v) { - v = r[key].GetInt(); -} - -template<> -void Emit(Writer& w, const char* key, int &v) { - w.Key(key); - w.Int(v); -} - -void StartObject(Reader& r) {} -void StartObject(Writer& w) { - w.StartObject(); -} - -void EndObject(Reader& r) {} -void EndObject(Writer& w) { - w.EndObject(); -} - -void StartArray(Reader& r) {} -void StartArray(Writer& w) { - w.StartArray(); -} - -void EndArray(Reader& r) {} -void EndArray(Writer& w) { - w.EndArray(); -} - -struct Object { - //Location l; - int a = 0, b = 0, c = 0; -}; - -/* -void EmitKey(Reader& r, const char* key) { - w.Key(key); -} -void EmitKey(Writer& w, const char* key) { - w = w[key]; -} -*/ - -template -void Serialize(S& stream, Object& obj) { - StartObject(stream); - Emit(stream, "a", obj.a); - Emit(stream, "b", obj.b); - Emit(stream, "b", obj.c); - EndObject(stream); -} - -/* -template -C& operator&(C& stream, T& t) { -t.serialize(stream); -} -*/ - -int main(int argc, char** argv) { - - rapidjson::StringBuffer output; - rapidjson::PrettyWriter writer(output); - writer.SetFormatOptions( - rapidjson::PrettyFormatOptions::kFormatSingleLineArray); - writer.SetIndent(' ', 2); - - Object foo; - foo.a = 10; - Serialize(writer, foo); - std::cout << output.GetString() << std::endl; - - std::cout << "----" << std::endl; - - rapidjson::Document doc; - //doc = doc["foo"]; - doc.Parse(output.GetString()); - Object foo2; - Serialize(doc, foo2); - - std::cin.get(); - //Reader r; - //foo.Serialize(r); - - return 0; -} -#endif - - - - - @@ -134,26 +30,26 @@ void Serialize(Writer& writer, const char* key, const std::vector& loc } template -void Serialize(Writer& writer, const char* key, LocalId id) { +void Serialize(Writer& writer, const char* key, Id id) { if (key) writer.Key(key); - writer.Uint64(id.local_id); + writer.Uint64(id.id); } template -void Serialize(Writer& writer, const char* key, optional> id) { +void Serialize(Writer& writer, const char* key, optional> id) { if (id) { Serialize(writer, key, id.value()); } } template -void Serialize(Writer& writer, const char* key, const std::vector>& ids) { +void Serialize(Writer& writer, const char* key, const std::vector>& ids) { if (ids.size() == 0) return; if (key) writer.Key(key); writer.StartArray(); - for (LocalId id : ids) + for (Id id : ids) Serialize(writer, nullptr, id); writer.EndArray(); } @@ -161,7 +57,7 @@ void Serialize(Writer& writer, const char* key, const std::vector>& i template void Serialize(Writer& writer, const char* key, Ref ref) { if (key) writer.Key(key); - std::string s = std::to_string(ref.id.local_id) + "@" + ref.loc.ToString(); + std::string s = std::to_string(ref.id.id) + "@" + ref.loc.ToString(); writer.String(s.c_str()); } @@ -191,8 +87,8 @@ void Serialize(Writer& writer, const char* key, uint64_t value) { } void Serialize(Writer& writer, IndexedFile* file) { - auto it = file->usr_to_type_id.find(""); - if (it != file->usr_to_type_id.end()) { + auto it = file->usr_to_id->usr_to_type_id.find(""); + if (it != file->usr_to_id->usr_to_type_id.end()) { file->Resolve(it->second)->def.short_name = ""; assert(file->Resolve(it->second)->uses.size() == 0); }