From aaa3542670a9e4ada6973d3077df8d73d9e0ab40 Mon Sep 17 00:00:00 2001 From: Jacob Dufault Date: Sun, 26 Feb 2017 11:45:59 -0800 Subject: [PATCH] rework-id --- indexer.cpp | 46 +++--- indexer.h | 35 +++-- query.cc | 399 +++++++++++++++++++++++++++++++++++++------------- query.h | 62 ++++---- serializer.cc | 4 +- 5 files changed, 376 insertions(+), 170 deletions(-) diff --git a/indexer.cpp b/indexer.cpp index f058860f..3f0652fb 100644 --- a/indexer.cpp +++ b/indexer.cpp @@ -2,47 +2,51 @@ #include "serializer.h" -IndexedFile::IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db) - : usr_to_id(usr_to_id), file_db(file_db) { +IndexedFile::IndexedFile(IdCache* id_cache, FileDb* file_db) + : id_cache(id_cache), file_db(file_db) { + // TODO: Reconsider if we should still be reusing the same id_cache. // Preallocate any existing resolved ids. - for (const auto& entry : usr_to_id->usr_to_type_id) + for (const auto& entry : id_cache->usr_to_type_id) types.push_back(IndexedTypeDef(entry.second, entry.first)); - for (const auto& entry : usr_to_id->usr_to_func_id) + for (const auto& entry : id_cache->usr_to_func_id) funcs.push_back(IndexedFuncDef(entry.second, entry.first)); - for (const auto& entry : usr_to_id->usr_to_var_id) + for (const auto& entry : id_cache->usr_to_var_id) vars.push_back(IndexedVarDef(entry.second, entry.first)); } // TODO: Optimize for const char*? TypeId IndexedFile::ToTypeId(const std::string& usr) { - auto it = usr_to_id->usr_to_type_id.find(usr); - if (it != usr_to_id->usr_to_type_id.end()) + auto it = id_cache->usr_to_type_id.find(usr); + if (it != id_cache->usr_to_type_id.end()) return it->second; - TypeId id(usr_to_id->group, types.size()); + TypeId id(id_cache->group, types.size()); types.push_back(IndexedTypeDef(id, usr)); - usr_to_id->usr_to_type_id[usr] = id; + id_cache->usr_to_type_id[usr] = id; + id_cache->type_id_to_usr[id] = usr; return id; } FuncId IndexedFile::ToFuncId(const std::string& usr) { - auto it = usr_to_id->usr_to_func_id.find(usr); - if (it != usr_to_id->usr_to_func_id.end()) + auto it = id_cache->usr_to_func_id.find(usr); + if (it != id_cache->usr_to_func_id.end()) return it->second; - FuncId id(usr_to_id->group, funcs.size()); + FuncId id(id_cache->group, funcs.size()); funcs.push_back(IndexedFuncDef(id, usr)); - usr_to_id->usr_to_func_id[usr] = id; + id_cache->usr_to_func_id[usr] = id; + id_cache->func_id_to_usr[id] = usr; return id; } VarId IndexedFile::ToVarId(const std::string& usr) { - auto it = usr_to_id->usr_to_var_id.find(usr); - if (it != usr_to_id->usr_to_var_id.end()) + auto it = id_cache->usr_to_var_id.find(usr); + if (it != id_cache->usr_to_var_id.end()) return it->second; - VarId id(usr_to_id->group, vars.size()); + VarId id(id_cache->group, vars.size()); vars.push_back(IndexedVarDef(id, usr)); - usr_to_id->usr_to_var_id[usr] = id; + id_cache->usr_to_var_id[usr] = id; + id_cache->var_id_to_usr[id] = usr; return id; } @@ -822,7 +826,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re static bool DUMP_AST = true; -IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector args) { +IndexedFile Parse(IdCache* id_cache, FileDb* file_db, std::string filename, std::vector args) { clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/); clang::TranslationUnit tu(index, filename, args); @@ -845,7 +849,7 @@ IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filen */ }; - IndexedFile db(usr_to_id, file_db); + IndexedFile db(id_cache, file_db); NamespaceHelper ns; IndexParam param(&db, &ns); clang_indexTranslationUnit(index_action, ¶m, callbacks, sizeof(callbacks), @@ -986,9 +990,9 @@ int main55555(int argc, char** argv) { // Run test. std::cout << "[START] " << path << std::endl; - UsrToIdResolver usr_to_id(1); + IdCache id_cache(1); FileDb file_db(1); - IndexedFile db = Parse(&usr_to_id, &file_db, path, {}); + IndexedFile db = Parse(&id_cache, &file_db, path, {}); std::string actual_output = db.ToString(); //WriteToFile("output.json", actual_output); diff --git a/indexer.h b/indexer.h index d6a272f7..cc98a4b5 100644 --- a/indexer.h +++ b/indexer.h @@ -295,6 +295,7 @@ using VarRef = Ref; // TODO: Either eliminate the defs created as a by-product of cross-referencing, // or do not emit things we don't have definitions for. +template struct TypeDefDefinitionData { // General metadata. TypeId id; @@ -327,7 +328,7 @@ struct TypeDefDefinitionData { TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {} - bool operator==(const TypeDefDefinitionData& other) const { + bool operator==(const TypeDefDefinitionData& other) const { return id == other.id && usr == other.usr && @@ -341,13 +342,13 @@ struct TypeDefDefinitionData { vars == other.vars; } - bool operator!=(const TypeDefDefinitionData& other) const { + bool operator!=(const TypeDefDefinitionData& other) const { return !(*this == other); } }; struct IndexedTypeDef { - TypeDefDefinitionData def; + TypeDefDefinitionData<> def; // Immediate derived types. std::vector derived; @@ -375,6 +376,7 @@ namespace std { }; } +template struct FuncDefDefinitionData { // General metadata. FuncId id; @@ -399,7 +401,7 @@ struct FuncDefDefinitionData { assert(usr.size() > 0); } - bool operator==(const FuncDefDefinitionData& other) const { + bool operator==(const FuncDefDefinitionData& other) const { return id == other.id && usr == other.usr && @@ -412,13 +414,13 @@ struct FuncDefDefinitionData { callees == other.callees; } - bool operator!=(const FuncDefDefinitionData& other) const { + bool operator!=(const FuncDefDefinitionData& other) const { return !(*this == other); } }; struct IndexedFuncDef { - FuncDefDefinitionData def; + FuncDefDefinitionData<> def; // Places the function is forward-declared. std::vector declarations; @@ -457,7 +459,7 @@ namespace std { }; } - +template struct VarDefDefinitionData { // General metadata. VarId id; @@ -477,7 +479,7 @@ struct VarDefDefinitionData { VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {} - bool operator==(const VarDefDefinitionData& other) const { + bool operator==(const VarDefDefinitionData& other) const { return id == other.id && usr == other.usr && @@ -489,13 +491,13 @@ struct VarDefDefinitionData { declaring_type == other.declaring_type; } - bool operator!=(const VarDefDefinitionData& other) const { + bool operator!=(const VarDefDefinitionData& other) const { return !(*this == other); } }; struct IndexedVarDef { - VarDefDefinitionData def; + VarDefDefinitionData<> def; // Usages. std::vector uses; @@ -520,26 +522,29 @@ namespace std { }; } -struct UsrToIdResolver { +struct IdCache { // NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs // to get fixed up when inserting into the real db. GroupId group; std::unordered_map usr_to_type_id; std::unordered_map usr_to_func_id; std::unordered_map usr_to_var_id; + std::unordered_map type_id_to_usr; + std::unordered_map func_id_to_usr; + std::unordered_map var_id_to_usr; - UsrToIdResolver(GroupId group) : group(group) {} + IdCache(GroupId group) : group(group) {} }; struct IndexedFile { FileDb* file_db; - UsrToIdResolver* usr_to_id; + IdCache* id_cache; std::vector types; std::vector funcs; std::vector vars; - IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db); + IndexedFile(IdCache* id_cache, FileDb* file_db); TypeId ToTypeId(const std::string& usr); FuncId ToFuncId(const std::string& usr); @@ -557,4 +562,4 @@ struct IndexedFile { -IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector args); \ No newline at end of file +IndexedFile Parse(IdCache* id_cache, FileDb* file_db, std::string filename, std::vector args); \ No newline at end of file diff --git a/query.cc b/query.cc index 5b86b414..58f92d24 100644 --- a/query.cc +++ b/query.cc @@ -17,6 +17,7 @@ // TODO: Make all copy constructors explicit. +#if false struct IdMap { // TODO: id resolution is broken. We need to resolve same fundamental USR to same ID. Problem is that multiple USRs // can have different source IDs. @@ -164,13 +165,13 @@ struct IdMap { FileId Remap(FileId from) { return GenericRemap(&remap_file_id, &group_file_id_to_usr, &usr_to_file_id, &next_file_id, from); } - TypeId Remap(TypeId from) { + Usr Remap(TypeId from) { return GenericRemap(&remap_type_id, &group_type_id_to_usr, &usr_to_type_id, &next_type_id, from); } - FuncId Remap(FuncId from) { + Usr Remap(FuncId from) { return GenericRemap(&remap_func_id, &group_func_id_to_usr, &usr_to_func_id, &next_func_id, from); } - VarId Remap(VarId from) { + Usr Remap(VarId from) { return GenericRemap(&remap_var_id, &group_var_id_to_usr, &usr_to_var_id, &next_var_id, from); } Location Remap(Location from) { @@ -179,12 +180,12 @@ struct IdMap { from.raw_file_id = file.id; return from; } - FuncRef Remap(FuncRef from) { + UsrRef Remap(FuncRef from) { from.id = Remap(from.id); from.loc = Remap(from.loc); return from; } - TypeDefDefinitionData Remap(TypeDefDefinitionData def) { + QueryableTypeDef::DefUpdate Remap(QueryableTypeDef::DefUpdate def) { def.id = Remap(def.id); if (def.definition) def.definition = Remap(def.definition.value()); @@ -196,7 +197,7 @@ struct IdMap { def.vars = Remap(def.vars); return def; } - FuncDefDefinitionData Remap(FuncDefDefinitionData def) { + QueryableFuncDef::DefUpdate Remap(QueryableFuncDef::DefUpdate def) { def.id = Remap(def.id); if (def.definition) def.definition = Remap(def.definition.value()); @@ -208,7 +209,7 @@ struct IdMap { def.callees = Remap(def.callees); return def; } - VarDefDefinitionData Remap(VarDefDefinitionData def) { + QueryableVarDef::DefUpdate Remap(QueryableVarDef::DefUpdate def) { def.id = Remap(def.id); if (def.declaration) def.declaration = Remap(def.declaration.value()); @@ -267,61 +268,149 @@ struct IdMap { result.push_back(Remap(l)); return result; } - std::vector Remap(const std::vector& from) { + std::vector Remap(const std::vector& from) { return GenericVectorRemap(&remap_type_id, &next_type_id, from); } - std::vector Remap(const std::vector& from) { + std::vector Remap(const std::vector& from) { return GenericVectorRemap(&remap_func_id, &next_func_id, from); } - std::vector Remap(const std::vector& from) { + std::vector Remap(const std::vector& from) { return GenericVectorRemap(&remap_var_id, &next_var_id, from); } - std::vector Remap(const std::vector& from) { - std::vector result; + std::vector Remap(const std::vector& from) { + std::vector result; result.reserve(from.size()); for (FuncRef r : from) result.push_back(Remap(r)); return result; } }; +#endif + + + + +// TODO: Switch over to QueryableLocation. Figure out if there is +// a good way to get the indexer using it. I don't think so +// since we may discover more files while indexing a file. +// +// We could also reuse planned USR caching system for file +// paths. +struct CachedFileDb { + using Id = int64_t; + std::vector file_names; +}; + +struct QueryableLocation { + CachedFileDb::Id id; + int line; + int column; + bool is_interesting; +}; -template -MergeableUpdate MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector& removed, const std::vector& added) { - MergeableUpdate update; - update.id = id_map->Remap(symbol_id); - update.to_remove = id_map->Remap(removed); - update.to_add = id_map->Remap(added); - return update; -} // NOTE: When not inside of a |def| object, there can be duplicates of the same // information if that information is contributed from separate sources. // If we need to avoid this duplication in the future, we will have to // add a refcount. - -QueryableTypeDef::QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed) - : def(id_map.Remap(indexed.def)) { - derived = id_map.Remap(indexed.derived); - uses = id_map.Remap(indexed.uses); +template +std::vector Transform(const std::vector& input, std::function op) { + std::vector result; + result.reserve(input.size()); + for (const In& in : input) + result.push_back(op(in)); + return result; } -QueryableFuncDef::QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed) - : def(id_map.Remap(indexed.def)) { - declarations = id_map.Remap(indexed.declarations); - derived = id_map.Remap(indexed.derived); - callers = id_map.Remap(indexed.callers); - uses = id_map.Remap(indexed.uses); +Usr MapIdToUsr(IdCache& id_cache, TypeId& id) { + return id_cache.type_id_to_usr[id]; +} +Usr MapIdToUsr(IdCache& id_cache, FuncId& id) { + return id_cache.func_id_to_usr[id]; +} +Usr MapIdToUsr(IdCache& id_cache, VarId& id) { + return id_cache.var_id_to_usr[id]; +} +Location MapIdToUsr(IdCache& id_cache, Location& ids); // FIXME: We will need additional data to map locations. + +std::vector MapIdToUsr(IdCache& id_cache, std::vector& ids) { + return Transform(ids, [&](TypeId id) { return id_cache.type_id_to_usr[id]; }); +} +std::vector MapIdToUsr(IdCache& id_cache, std::vector& ids) { + return Transform(ids, [&](FuncId id) { return id_cache.func_id_to_usr[id]; }); +} +std::vector MapIdToUsr(IdCache& id_cache, std::vector& ids) { + return Transform(ids, [&](VarId id) { return id_cache.var_id_to_usr[id]; }); +} +std::vector MapIdToUsr(IdCache& id_cache, std::vector& ids) { + return Transform(ids, [&](FuncRef ref) { + UsrRef result; + result.loc = ref.loc; // FIXME: Patch proper location. Fix when fixing MapIdToUsr(Location). I'm thinking we will have a GlobalLocation type. + result.usr = id_cache.func_id_to_usr[ref.id]; + return result; + }); +} +std::vector MapIdToUsr(IdCache& id_cache, std::vector& ids); // FIXME: We will need additional data to map locations. +QueryableTypeDef::DefUpdate MapIdToUsr(IdCache& id_cache, TypeDefDefinitionData<>& def) { + QueryableTypeDef::DefUpdate result(def.usr, def.usr); + if (result.definition) + result.definition = MapIdToUsr(id_cache, def.definition.value()); + if (result.alias_of) + result.alias_of = MapIdToUsr(id_cache, def.alias_of.value()); + result.parents = MapIdToUsr(id_cache, def.parents); + result.types = MapIdToUsr(id_cache, def.types); + result.funcs = MapIdToUsr(id_cache, def.funcs); + result.vars = MapIdToUsr(id_cache, def.vars); + return result; +} +QueryableFuncDef::DefUpdate MapIdToUsr(IdCache& id_cache, FuncDefDefinitionData<>& def) { + QueryableFuncDef::DefUpdate result(def.usr, def.usr); + if (result.definition) + result.definition = MapIdToUsr(id_cache, def.definition.value()); + if (result.declaring_type) + result.declaring_type = MapIdToUsr(id_cache, def.declaring_type.value()); + if (result.base) + result.base = MapIdToUsr(id_cache, def.base.value()); + result.locals = MapIdToUsr(id_cache, def.locals); + result.callees = MapIdToUsr(id_cache, def.callees); + return result; +} +QueryableVarDef::DefUpdate MapIdToUsr(IdCache& id_cache, VarDefDefinitionData<>& def) { + QueryableVarDef::DefUpdate result(def.usr, def.usr); + if (result.declaration) + result.declaration = MapIdToUsr(id_cache, def.declaration.value()); + if (result.definition) + result.definition = MapIdToUsr(id_cache, def.definition.value()); + if (result.variable_type) + result.variable_type = MapIdToUsr(id_cache, def.variable_type.value()); + if (result.declaring_type) + result.declaring_type = MapIdToUsr(id_cache, def.declaring_type.value()); + return result; } -QueryableVarDef::QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed) - : def(id_map.Remap(indexed.def)) { - uses = id_map.Remap(indexed.uses); +QueryableTypeDef::QueryableTypeDef(IdCache& id_cache, IndexedTypeDef& indexed) + : def(MapIdToUsr(id_cache, indexed.def)) { + derived = MapIdToUsr(id_cache, indexed.derived); + uses = MapIdToUsr(id_cache, indexed.uses); +} + +QueryableFuncDef::QueryableFuncDef(IdCache& id_cache, IndexedFuncDef& indexed) + : def(MapIdToUsr(id_cache, indexed.def)) { + declarations = MapIdToUsr(id_cache, indexed.declarations); + derived = MapIdToUsr(id_cache, indexed.derived); + callers = MapIdToUsr(id_cache, indexed.callers); + uses = MapIdToUsr(id_cache, indexed.uses); +} + +QueryableVarDef::QueryableVarDef(IdCache& id_cache, IndexedVarDef& indexed) + : def(MapIdToUsr(id_cache, indexed.def)) { + uses = MapIdToUsr(id_cache, indexed.uses); } struct QueryableEntry { @@ -335,14 +424,16 @@ struct QueryableEntry { +// TODO: For space reasons, it may make sense to map Usr -> offset inside of global storage. But not for intermediate or disk-storage. +// We can probably eliminate most of that pain by coming up with our own UsrDb concept which interns the Usr strings. We can make +// the pain of a global UsrDb less by +// (parallel)clangindex -> (main)commit USRs to global -> (parallel)transfer IDs to global USRs -> (main)import - +// TODO: remove GroupId concept. struct CachedIndexedFile { // Path to the file indexed. std::string path; - // GroupId of the indexed file. - GroupId group; // TODO: Make sure that |previous_index| and |current_index| use the same id // to USR mapping. This lets us greatly speed up difference computation. @@ -352,8 +443,7 @@ struct CachedIndexedFile { optional previous_index; IndexedFile current_index; - CachedIndexedFile(const IndexedFile& indexed) - : group(indexed.usr_to_id->group), current_index(indexed) {} + CachedIndexedFile(const IndexedFile& indexed) : current_index(indexed) {} }; template @@ -372,18 +462,33 @@ void RemoveRange(std::vector* dest, const std::vector& to_remove) { dest->erase(it); } -struct IndexUpdate { - IdMap* id_map; + + + + + + + + + + + + + + + + +struct IndexUpdate { // Type updates. - std::vector types_removed; + std::vector types_removed; std::vector types_added; std::vector types_def_changed; std::vector types_derived; std::vector types_uses; // Function updates. - std::vector funcs_removed; + std::vector funcs_removed; std::vector funcs_added; std::vector funcs_def_changed; std::vector funcs_declarations; @@ -392,14 +497,14 @@ struct IndexUpdate { std::vector funcs_uses; // Variable updates. - std::vector vars_removed; + std::vector vars_removed; std::vector vars_added; std::vector vars_def_changed; std::vector vars_uses; - IndexUpdate(IdMap* id_map) : id_map(id_map) {} - IndexUpdate(IdMap* id_map, IndexedFile& file); + IndexUpdate(IndexedFile& file); +#if false void Remap(IdMap* map) { id_map = map; @@ -427,7 +532,9 @@ struct IndexUpdate { #undef INDEX_UPDATE_REMAP } +#endif +#if false // Merges the contents of |update| into this IndexUpdate instance. void Merge(const IndexUpdate& update) { #define INDEX_UPDATE_MERGE(name) \ @@ -454,19 +561,27 @@ struct IndexUpdate { #undef INDEX_UPDATE_MERGE } +#endif }; -IndexUpdate::IndexUpdate(IdMap* id_map, IndexedFile& file) : id_map(id_map) { - id_map->Import(file.file_db, file.usr_to_id); - +IndexUpdate::IndexUpdate(IndexedFile& file) { for (IndexedTypeDef& def : file.types) - types_added.push_back(QueryableTypeDef(*id_map, def)); + types_added.push_back(QueryableTypeDef(*file.id_cache, def)); for (IndexedFuncDef& def : file.funcs) - funcs_added.push_back(QueryableFuncDef(*id_map, def)); + funcs_added.push_back(QueryableFuncDef(*file.id_cache, def)); for (IndexedVarDef& def : file.vars) - vars_added.push_back(QueryableVarDef(*id_map, def)); + vars_added.push_back(QueryableVarDef(*file.id_cache, def)); } +#if false +template +MergeableUpdate MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector& removed, const std::vector& added) { + MergeableUpdate update; + update.id = id_map->Remap(symbol_id); + update.to_remove = id_map->Remap(removed); + update.to_add = id_map->Remap(added); + return update; +} template TValue* TryFind(std::unordered_set& set, TValue* value) { @@ -639,6 +754,23 @@ IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& curre #undef JOIN } +#endif + + + + + + + + + + + + + + + + @@ -660,40 +792,106 @@ struct QueryableDatabase { std::vector funcs; std::vector vars; - // TypeId to index in |types| (same for funcs, vars) - std::unordered_map type_id_to_index; - std::unordered_map func_id_to_index; - std::unordered_map var_id_to_index; + // Lookup symbol based on a usr. + std::unordered_map usr_to_symbol; // |files| is indexed by FileId. Retrieve a FileId from a path using // |file_db|. FileDb file_db; std::vector files; - // When importing data into the global db we need to remap ids from an - // arbitrary group into the global group. - IdMap id_map; - QueryableDatabase(GroupId group); // Insert the contents of |update| into |db|. void ApplyIndexUpdate(IndexUpdate* update); + + void RemoveUsrs(const std::vector& to_remove); + void Import(const std::vector& defs); + void Import(const std::vector& defs); + void Import(const std::vector& defs); + void Update(const std::vector& updates); + void Update(const std::vector& updates); + void Update(const std::vector& updates); }; -template -void RemoveAll(std::unordered_map* id_map, std::vector* defs, const std::vector& ids_to_remove) { - auto to_erase = std::remove_if(defs->begin(), defs->end(), [&](const TDef& def) { +void QueryableDatabase::RemoveUsrs(const std::vector& to_remove) { + // TODO: Removing usrs is tricky because it means we will have to rebuild idx locations. I'm thinking we just nullify + // the entry instead of actually removing the data. The index could be massive. + + /* + usr_to_symbol.erase(std::remove_if(usr_to_symbol.begin(), usr_to_symbol.end(), [&to_remove](const std::string& usr) { // TODO: make ids_to_remove a set? - return std::find(ids_to_remove.begin(), ids_to_remove.end(), def.def.id) != ids_to_remove.end(); - }); + return std::find(to_remove.begin(), to_remove.end(), usr) != to_remove.end(); + }), usr_to_symbol.end()); - for (auto it = to_erase; it != defs->end(); ++it) { - id_map->erase(it->def.id); + + types.erase(std::remove_if(types.begin(), types.end(), [&to_remove](const QueryableTypeDef& def) { + return false; + }), types.end()); + */ + /* + for (auto it = to_erase; it != usr_to_symbol.end(); ++it) { + switch (it->second.kind) { + case SymbolKind::Type: + types.erase(types.begin() + it->second.type_idx); + break; + case SymbolKind::Func: + funcs.erase(funcs.begin() + it->second.func_idx); + break; + case SymbolKind::Var: + vars.erase(vars.begin() + it->second.var_idx); + break; + } } - - defs->erase(to_erase, defs->end()); + */ } +void QueryableDatabase::Import(const std::vector& defs) { + for (auto& def : defs) { + usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Type, types.size()); + types.push_back(def); + } +} + +void QueryableDatabase::Import(const std::vector& defs) { + for (auto& def : defs) { + usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Func, funcs.size()); + funcs.push_back(def); + } +} + +void QueryableDatabase::Import(const std::vector& defs) { + for (auto& def : defs) { + usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Var, vars.size()); + vars.push_back(def); + } +} + +void QueryableDatabase::Update(const std::vector& updates) { + for (auto& def : updates) { + SymbolIdx idx = usr_to_symbol[def.usr]; + assert(idx.kind == SymbolKind::Type); + types[idx.idx].def = def; + } +} + +void QueryableDatabase::Update(const std::vector& updates) { + for (auto& def : updates) { + SymbolIdx idx = usr_to_symbol[def.usr]; + assert(idx.kind == SymbolKind::Func); + funcs[idx.idx].def = def; + } +} + +void QueryableDatabase::Update(const std::vector& updates) { + for (auto& def : updates) { + SymbolIdx idx = usr_to_symbol[def.usr]; + assert(idx.kind == SymbolKind::Var); + vars[idx.idx].def = def; + } +} + + template void AddAll(std::unordered_map* id_map, std::vector* defs, const std::vector& to_add) { for (const TDef& def : to_add) { @@ -711,40 +909,36 @@ void ApplyUpdates(std::unordered_map* id_map, std::vector* defs, } } -QueryableDatabase::QueryableDatabase(GroupId group) : id_map(group), file_db(group) {} +QueryableDatabase::QueryableDatabase(GroupId group) : file_db(group) {} void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { - id_map.Import(update->id_map); - #define JOIN(a, b) a##b -#define HANDLE_MERGEABLE(update_var_name, def_var_name, index_name, storage_name) \ +#define HANDLE_MERGEABLE(update_var_name, def_var_name, storage_name) \ for (auto merge_update : JOIN(update->, update_var_name)) { \ - int index = JOIN(index_name, [merge_update.id]); \ - auto* def = &JOIN(storage_name, [index]); \ + SymbolIdx index = usr_to_symbol[merge_update.usr]; \ + auto* def = &JOIN(storage_name, [index.idx]); \ AddRange(JOIN(&def->, def_var_name), merge_update.to_add); \ RemoveRange(JOIN(&def->, def_var_name), merge_update.to_remove); \ } - update->Remap(&id_map); + RemoveUsrs(update->types_removed); + Import(update->types_added); + Update(update->types_def_changed); + HANDLE_MERGEABLE(types_derived, derived, types); + HANDLE_MERGEABLE(types_uses, uses, types); - RemoveAll(&type_id_to_index, &types, update->types_removed); - AddAll(&type_id_to_index, &types, update->types_added); - ApplyUpdates(&type_id_to_index, &types, update->types_def_changed); - HANDLE_MERGEABLE(types_derived, derived, type_id_to_index, types); - HANDLE_MERGEABLE(types_uses, uses, type_id_to_index, types); + RemoveUsrs(update->funcs_removed); + Import(update->funcs_added); + Update(update->funcs_def_changed); + HANDLE_MERGEABLE(funcs_declarations, declarations, funcs); + HANDLE_MERGEABLE(funcs_derived, derived, funcs); + HANDLE_MERGEABLE(funcs_callers, callers, funcs); + HANDLE_MERGEABLE(funcs_uses, uses, funcs); - RemoveAll(&func_id_to_index, &funcs, update->funcs_removed); - AddAll(&func_id_to_index, &funcs, update->funcs_added); - ApplyUpdates(&func_id_to_index, &funcs, update->funcs_def_changed); - HANDLE_MERGEABLE(funcs_declarations, declarations, func_id_to_index, funcs); - HANDLE_MERGEABLE(funcs_derived, derived, func_id_to_index, funcs); - HANDLE_MERGEABLE(funcs_callers, callers, func_id_to_index, funcs); - HANDLE_MERGEABLE(funcs_uses, uses, func_id_to_index, funcs); - - RemoveAll(&var_id_to_index, &vars, update->vars_removed); - AddAll(&var_id_to_index, &vars, update->vars_added); - ApplyUpdates(&var_id_to_index, &vars, update->vars_def_changed); - HANDLE_MERGEABLE(vars_uses, uses, var_id_to_index, vars); + RemoveUsrs(update->vars_removed); + Import(update->vars_added); + Update(update->vars_def_changed); + HANDLE_MERGEABLE(vars_uses, uses, vars); #undef HANDLE_MERGEABLE #undef JOIN @@ -761,25 +955,24 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { int main(int argc, char** argv) { // TODO: Unify UserToIdResolver and FileDb - UsrToIdResolver usr_to_id(1); + IdCache id_cache(1); FileDb file_db(1); - IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {}); + IndexedFile indexed_file_a = Parse(&id_cache, &file_db, "full_tests/index_delta/a_v0.cc", {}); std::cout << indexed_file_a.ToString() << std::endl; std::cout << std::endl; - IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {}); + IndexedFile indexed_file_b = Parse(&id_cache, &file_db, "full_tests/index_delta/a_v1.cc", {}); std::cout << indexed_file_b.ToString() << std::endl; - // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate. - IdMap dest_ids(2); - IndexUpdate import(&dest_ids, indexed_file_a); - dest_ids.Import(indexed_file_b.file_db, indexed_file_b.usr_to_id); - IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b); - + IndexUpdate import(indexed_file_a); + /* + dest_ids.Import(indexed_file_b.file_db, indexed_file_b.id_cache); + IndexUpdate update = ComputeDiff(indexed_file_a, indexed_file_b); + */ QueryableDatabase db(5); db.ApplyIndexUpdate(&import); - db.ApplyIndexUpdate(&update); + //db.ApplyIndexUpdate(&update); return 0; } diff --git a/query.h b/query.h index d1617cc0..7f30edeb 100644 --- a/query.h +++ b/query.h @@ -23,8 +23,14 @@ enum class PreferredSymbolLocation { }; using Usr = std::string; +struct UsrRef { + Usr usr; + Location loc; -struct IdMap; + bool operator==(const UsrRef& other) const { + return usr == other.usr && loc == other.loc; + } +}; // There are two sources of reindex updates: the (single) definition of a // symbol has changed, or one of many users of the symbol has changed. @@ -36,63 +42,61 @@ struct IdMap; // that it can be merged with other updates before actually being applied to // the main database. See |MergeableUpdate|. -template +template struct MergeableUpdate { // The type/func/var which is getting new usages. - TId id; + Usr usr; // Entries to add and remove. std::vector to_add; std::vector to_remove; }; - struct QueryableTypeDef { - TypeDefDefinitionData def; - std::vector derived; + TypeDefDefinitionData def; + std::vector derived; std::vector uses; - using DefUpdate = TypeDefDefinitionData; - using DerivedUpdate = MergeableUpdate; - using UsesUpdate = MergeableUpdate; + using DefUpdate = TypeDefDefinitionData; + using DerivedUpdate = MergeableUpdate; + using UsesUpdate = MergeableUpdate; - QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed); + QueryableTypeDef(IdCache& id_cache, IndexedTypeDef& indexed); }; struct QueryableFuncDef { - FuncDefDefinitionData def; + FuncDefDefinitionData def; std::vector declarations; - std::vector derived; - std::vector callers; + std::vector derived; + std::vector callers; std::vector uses; - using DefUpdate = FuncDefDefinitionData; - using DeclarationsUpdate = MergeableUpdate; - using DerivedUpdate = MergeableUpdate; - using CallersUpdate = MergeableUpdate; - using UsesUpdate = MergeableUpdate; + using DefUpdate = FuncDefDefinitionData; + using DeclarationsUpdate = MergeableUpdate; + using DerivedUpdate = MergeableUpdate; + using CallersUpdate = MergeableUpdate; + using UsesUpdate = MergeableUpdate; - QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed); + QueryableFuncDef(IdCache& id_cache, IndexedFuncDef& indexed); }; struct QueryableVarDef { - VarDefDefinitionData def; + VarDefDefinitionData def; std::vector uses; - using DefUpdate = VarDefDefinitionData; - using UsesUpdate = MergeableUpdate; + using DefUpdate = VarDefDefinitionData; + using UsesUpdate = MergeableUpdate; - QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed); + QueryableVarDef(IdCache& id_cache, IndexedVarDef& indexed); }; -enum class SymbolKind { Type, Func, Var }; +enum class SymbolKind { Invalid, Type, Func, Var }; struct SymbolIdx { SymbolKind kind; - union { - uint64_t type_idx; - uint64_t func_idx; - uint64_t var_idx; - }; + uint64_t idx; + + SymbolIdx() : kind(SymbolKind::Invalid), idx(-1) {} // Default ctor needed by stdlib. Do not use. + SymbolIdx(SymbolKind kind, uint64_t idx) : kind(kind), idx(idx) {} }; diff --git a/serializer.cc b/serializer.cc index 5b0824f8..5c7cfe52 100644 --- a/serializer.cc +++ b/serializer.cc @@ -87,8 +87,8 @@ void Serialize(Writer& writer, const char* key, uint64_t value) { } void Serialize(Writer& writer, IndexedFile* file) { - auto it = file->usr_to_id->usr_to_type_id.find(""); - if (it != file->usr_to_id->usr_to_type_id.end()) { + auto it = file->id_cache->usr_to_type_id.find(""); + if (it != file->id_cache->usr_to_type_id.end()) { file->Resolve(it->second)->def.short_name = ""; assert(file->Resolve(it->second)->uses.size() == 0); }