From 69b1dcadaa2c4c11d75fc9b937a2f4f0e833e553 Mon Sep 17 00:00:00 2001 From: Jacob Dufault Date: Thu, 6 Apr 2017 22:42:57 -0700 Subject: [PATCH] cleanup some query code to prepare for space optimizations --- src/command_line.cc | 18 +-- src/query.cc | 289 ++++++++++++++++++++------------------------ src/query.h | 138 +++++++-------------- src/string_db.cc | 40 ++++++ src/string_db.h | 154 +++++++++++++++++++++++ 5 files changed, 377 insertions(+), 262 deletions(-) create mode 100644 src/string_db.cc create mode 100644 src/string_db.h diff --git a/src/command_line.cc b/src/command_line.cc index 4f36b80e..0c7e3a95 100644 --- a/src/command_line.cc +++ b/src/command_line.cc @@ -272,7 +272,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) { // from the primary file though, so that should be ok. We need to cleanup indexer output. optional old_index = LoadCachedFile(request->path); if (old_index.has_value()) { - IndexUpdate update(old_index.value()); + IndexUpdate update = IndexUpdate::CreateImport(old_index.value()); IndexTranslationUnitResponse response(update); responses->Enqueue(response); time.ResetAndPrint("Loading cached index"); @@ -296,7 +296,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) { time.ResetAndPrint("Loading previous index"); if (old_index) { // Apply delta update. - IndexUpdate update(old_index.value(), new_index); + IndexUpdate update = IndexUpdate::CreateDelta(old_index.value(), new_index); IndexTranslationUnitResponse response(update); time.ResetAndPrint("Creating delta index update/response"); responses->Enqueue(response); @@ -304,7 +304,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) { } else { // Apply full update. - IndexUpdate update(new_index); + IndexUpdate update = IndexUpdate::CreateImport(new_index); IndexTranslationUnitResponse response(update); time.ResetAndPrint("Creating index update/response"); responses->Enqueue(response); @@ -322,7 +322,7 @@ QueryableFile* FindFile(QueryableDatabase* db, const std::string& filename) { // TODO: hashmap lookup. for (auto& file : db->files) { // std::cerr << " - Have file " << file.file_id << std::endl; - if (file.file_id == filename) { + if (file.def.usr == filename) { //std::cerr << "Found file " << filename << std::endl; return &file; } @@ -516,7 +516,7 @@ void QueryDbMainLoop( int target_line = msg->params.position.line + 1; int target_column = msg->params.position.character + 1; - for (const UsrRef& ref : file->all_symbols) { + for (const UsrRef& ref : file->def.all_symbols) { if (ref.loc.start.line >= target_line && ref.loc.end.line <= target_line && ref.loc.start.column <= target_column && ref.loc.end.column >= target_column) { optional location = GetDefinitionSpellingOfUsr(db, ref.usr); @@ -542,8 +542,8 @@ void QueryDbMainLoop( break; } - std::cerr << "File outline size is " << file->outline.size() << std::endl; - for (UsrRef ref : file->outline) { + std::cerr << "File outline size is " << file->def.outline.size() << std::endl; + for (UsrRef ref : file->def.outline) { SymbolIdx symbol = db->usr_to_symbol[ref.usr]; lsSymbolInformation info; @@ -606,7 +606,7 @@ void QueryDbMainLoop( break; } - for (UsrRef ref : file->outline) { + for (UsrRef ref : file->def.outline) { // NOTE: We OffsetColumn so that the code lens always show up in a // predictable order. Otherwise, the client may randomize it. @@ -950,7 +950,7 @@ int main(int argc, char** argv) { //bool loop = true; //while (loop) // std::this_thread::sleep_for(std::chrono::milliseconds(10)); - //std::this_thread::sleep_for(std::chrono::seconds(3)); + std::this_thread::sleep_for(std::chrono::seconds(3)); PlatformInit(); RegisterMessageTypes(); diff --git a/src/query.cc b/src/query.cc index 01e42bb3..66eabd14 100644 --- a/src/query.cc +++ b/src/query.cc @@ -17,12 +17,6 @@ - -// NOTE: When not inside of a |def| object, there can be duplicates of the same -// information if that information is contributed from separate sources. -// If we need to avoid this duplication in the future, we will have to -// add a refcount. - template std::vector Transform(const std::vector& input, std::function op) { std::vector result; @@ -137,21 +131,15 @@ QueryableVarDef::DefUpdate MapIdToUsr(const IdCache& id_cache, const IndexedVarD return result; } -QueryableFile::QueryableFile(const IndexedFile& indexed) - : file_id(indexed.path) { +QueryableFile::Def BuildFileDef(const IndexedFile& indexed) { + QueryableFile::Def def; + def.usr = indexed.path; - // TODO: investigate this - //std::cerr << "Adding QueryableFile for " << indexed.path - // << ", file_path_to_file_id.size()=" << indexed.id_cache.file_path_to_file_id.size() << std::endl; - //for (auto& entry : indexed.id_cache.file_path_to_file_id) - // std::cerr << "-" << entry.first << std::endl; - //assert(indexed.id_cache.file_path_to_file_id.find(indexed.path) != - // indexed.id_cache.file_path_to_file_id.end()); - auto add_outline = [this, &indexed](Usr usr, Range range) { - outline.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range))); + auto add_outline = [&def, &indexed](Usr usr, Range range) { + def.outline.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range))); }; - auto add_all_symbols = [this, &indexed](Usr usr, Range range) { - all_symbols.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range))); + auto add_all_symbols = [&def, &indexed](Usr usr, Range range) { + def.all_symbols.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range))); }; for (const IndexedTypeDef& def : indexed.types) { @@ -183,14 +171,19 @@ QueryableFile::QueryableFile(const IndexedFile& indexed) add_all_symbols(def.def.usr, use); } - std::sort(outline.begin(), outline.end(), [](const UsrRef& a, const UsrRef& b) { + std::sort(def.outline.begin(), def.outline.end(), [](const UsrRef& a, const UsrRef& b) { return a.loc.start < b.loc.start; }); - std::sort(all_symbols.begin(), all_symbols.end(), [](const UsrRef& a, const UsrRef& b) { + std::sort(def.all_symbols.begin(), def.all_symbols.end(), [](const UsrRef& a, const UsrRef& b) { return a.loc.start < b.loc.start; }); + + return def; } +QueryableFile::QueryableFile(const IndexedFile& indexed) + : def(BuildFileDef(indexed)) {} + QueryableTypeDef::QueryableTypeDef(IdCache& id_cache, const IndexedTypeDef& indexed) : def(MapIdToUsr(id_cache, indexed.def)) { derived = MapIdToUsr(id_cache, indexed.derived); @@ -224,8 +217,6 @@ QueryableVarDef::QueryableVarDef(IdCache& id_cache, const IndexedVarDef& indexed // the pain of a global UsrDb less by // (parallel)clangindex -> (main)commit USRs to global -> (parallel)transfer IDs to global USRs -> (main)import -// TODO: remove GroupId concept. - struct CachedIndexedFile { // Path to the file indexed. std::string path; @@ -388,21 +379,50 @@ void CompareGroups( - +#if false IndexUpdate::IndexUpdate(IndexedFile& file) { - // TODO: Do not add empty data (ie, def has nothing but USR) + // TODO: use delta constructor with an empty file. - files_added.push_back(QueryableFile(file)); - for (const IndexedTypeDef& def : file.types) { - types_added.push_back(QueryableTypeDef(file.id_cache, def)); + auto file_def = BuildFileDef(file); + files_def_update.push_back(file_def); + + for (const IndexedTypeDef& indexed : file.types) { + QueryableTypeDef query(file.id_cache, indexed); + types_def_update.push_back(query.def); + types_derived.push_back(QueryableTypeDef::DerivedUpdate(query.def.usr, query.derived)); + types_instantiations.push_back(QueryableTypeDef::InstantiationsUpdate(query.def.usr, query.instantiations)); + types_uses.push_back(QueryableTypeDef::UsesUpdate(query.def.usr, query.uses)); } - for (const IndexedFuncDef& def : file.funcs) { - funcs_added.push_back(QueryableFuncDef(file.id_cache, def)); + + for (const IndexedFuncDef& indexed : file.funcs) { + QueryableFuncDef query(file.id_cache, indexed); + funcs_def_update.push_back(query.def); + funcs_declarations.push_back(QueryableFuncDef::DeclarationsUpdate(query.def.usr, query.declarations)); + funcs_derived.push_back(QueryableFuncDef::DerivedUpdate(query.def.usr, query.derived)); + funcs_callers.push_back(QueryableFuncDef::CallersUpdate(query.def.usr, query.callers)); + funcs_uses.push_back(QueryableFuncDef::UsesUpdate(query.def.usr, query.uses)); } - for (const IndexedVarDef& def : file.vars) { - vars_added.push_back(QueryableVarDef(file.id_cache, def)); + + for (const IndexedVarDef& indexed : file.vars) { + QueryableVarDef query(file.id_cache, indexed); + vars_def_update.push_back(query.def); + vars_uses.push_back(QueryableVarDef::UsesUpdate(query.def.usr, query.uses)); } } +#endif + +// static +IndexUpdate IndexUpdate::CreateImport(IndexedFile& file) { + // Return standard diff constructor but with an empty file so everything is + // added. + IndexedFile previous(file.path); + return IndexUpdate(previous, file); +} + +// static +IndexUpdate IndexUpdate::CreateDelta(IndexedFile& current, IndexedFile& updated) { + return IndexUpdate(current, updated); +} IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) { // |query_name| is the name of the variable on the query type. @@ -423,24 +443,8 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) } \ } - // File - do { - // Outline is a special property and needs special handling, because it is a computed property - // of the IndexedFile (ie, to view it we need to construct a QueryableFile instance). - assert(previous_file.path == current_file.path); - QueryableFile previous_queryable_file(previous_file); - QueryableFile current_queryable_file(previous_file); - std::vector removed, added; - bool did_add = ComputeDifferenceForUpdate( - previous_queryable_file.outline, - current_queryable_file.outline, - &removed, &added); - if (did_add) { - std::cerr << "Adding mergeable update on outline (" << current_file.path << ")" << std::endl; - files_outline.push_back(MergeableUpdate(current_file.path, removed, added)); - } - } while (false); // do while false instead of just {} to appease Visual Studio code formatter. + files_def_update.push_back(BuildFileDef(current_file)); // Types CompareGroups(previous_file.types, current_file.types, @@ -448,15 +452,20 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) types_removed.push_back(def->def.usr); }, /*onAdded:*/[this, ¤t_file](IndexedTypeDef* def) { - types_added.push_back(QueryableTypeDef(current_file.id_cache, *def)); + QueryableTypeDef query(current_file.id_cache, *def); + types_def_update.push_back(query.def); + types_derived.push_back(QueryableTypeDef::DerivedUpdate(query.def.usr, query.derived)); + types_instantiations.push_back(QueryableTypeDef::InstantiationsUpdate(query.def.usr, query.instantiations)); + types_uses.push_back(QueryableTypeDef::UsesUpdate(query.def.usr, query.uses)); }, /*onFound:*/[this, &previous_file, ¤t_file](IndexedTypeDef* previous_def, IndexedTypeDef* current_def) { QueryableTypeDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def); QueryableTypeDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def); if (previous_remapped_def != current_remapped_def) - types_def_changed.push_back(current_remapped_def); + types_def_update.push_back(current_remapped_def); PROCESS_UPDATE_DIFF(types_derived, derived, Usr); + PROCESS_UPDATE_DIFF(types_instantiations, instantiations, Usr); PROCESS_UPDATE_DIFF(types_uses, uses, QueryableRange); }); @@ -466,13 +475,18 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) funcs_removed.push_back(def->def.usr); }, /*onAdded:*/[this, ¤t_file](IndexedFuncDef* def) { - funcs_added.push_back(QueryableFuncDef(current_file.id_cache, *def)); + QueryableFuncDef query(current_file.id_cache, *def); + funcs_def_update.push_back(query.def); + funcs_declarations.push_back(QueryableFuncDef::DeclarationsUpdate(query.def.usr, query.declarations)); + funcs_derived.push_back(QueryableFuncDef::DerivedUpdate(query.def.usr, query.derived)); + funcs_callers.push_back(QueryableFuncDef::CallersUpdate(query.def.usr, query.callers)); + funcs_uses.push_back(QueryableFuncDef::UsesUpdate(query.def.usr, query.uses)); }, /*onFound:*/[this, &previous_file, ¤t_file](IndexedFuncDef* previous_def, IndexedFuncDef* current_def) { QueryableFuncDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def); QueryableFuncDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def); if (previous_remapped_def != current_remapped_def) - funcs_def_changed.push_back(current_remapped_def); + funcs_def_update.push_back(current_remapped_def); PROCESS_UPDATE_DIFF(funcs_declarations, declarations, QueryableRange); PROCESS_UPDATE_DIFF(funcs_derived, derived, Usr); @@ -486,13 +500,15 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) vars_removed.push_back(def->def.usr); }, /*onAdded:*/[this, ¤t_file](IndexedVarDef* def) { - vars_added.push_back(QueryableVarDef(current_file.id_cache, *def)); + QueryableVarDef query(current_file.id_cache, *def); + vars_def_update.push_back(query.def); + vars_uses.push_back(QueryableVarDef::UsesUpdate(query.def.usr, query.uses)); }, /*onFound:*/[this, &previous_file, ¤t_file](IndexedVarDef* previous_def, IndexedVarDef* current_def) { QueryableVarDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def); QueryableVarDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def); if (previous_remapped_def != current_remapped_def) - vars_def_changed.push_back(current_remapped_def); + vars_def_update.push_back(current_remapped_def); PROCESS_UPDATE_DIFF(vars_uses, uses, QueryableRange); }); @@ -505,28 +521,23 @@ void IndexUpdate::Merge(const IndexUpdate& update) { AddRange(&name, update.name); INDEX_UPDATE_MERGE(files_removed); - INDEX_UPDATE_MERGE(files_added); - INDEX_UPDATE_MERGE(files_outline); - INDEX_UPDATE_MERGE(files_all_symbols); + INDEX_UPDATE_MERGE(files_def_update); INDEX_UPDATE_MERGE(types_removed); - INDEX_UPDATE_MERGE(types_added); - INDEX_UPDATE_MERGE(types_def_changed); + INDEX_UPDATE_MERGE(types_def_update); INDEX_UPDATE_MERGE(types_derived); INDEX_UPDATE_MERGE(types_instantiations); INDEX_UPDATE_MERGE(types_uses); INDEX_UPDATE_MERGE(funcs_removed); - INDEX_UPDATE_MERGE(funcs_added); - INDEX_UPDATE_MERGE(funcs_def_changed); + INDEX_UPDATE_MERGE(funcs_def_update); INDEX_UPDATE_MERGE(funcs_declarations); INDEX_UPDATE_MERGE(funcs_derived); INDEX_UPDATE_MERGE(funcs_callers); INDEX_UPDATE_MERGE(funcs_uses); INDEX_UPDATE_MERGE(vars_removed); - INDEX_UPDATE_MERGE(vars_added); - INDEX_UPDATE_MERGE(vars_def_changed); + INDEX_UPDATE_MERGE(vars_def_update); INDEX_UPDATE_MERGE(vars_uses); #undef INDEX_UPDATE_MERGE @@ -552,8 +563,6 @@ void IndexUpdate::Merge(const IndexUpdate& update) { - - @@ -565,107 +574,93 @@ void QueryableDatabase::RemoveUsrs(const std::vector& to_remove) { // TODO: also remove from qualified_names? } -void QueryableDatabase::Import(const std::vector& defs) { - for (auto& def : defs) { - auto it = usr_to_symbol.find(def.file_id); +void QueryableDatabase::ImportOrUpdate(const std::vector& updates) { + for (auto& def : updates) { + auto it = usr_to_symbol.find(def.usr); if (it == usr_to_symbol.end()) { - qualified_names.push_back(def.file_id); + qualified_names.push_back(def.usr); symbols.push_back(SymbolIdx(SymbolKind::File, files.size())); - usr_to_symbol[def.file_id] = SymbolIdx(SymbolKind::File, files.size()); + usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::File, files.size()); - files.push_back(def); + QueryableFile query; + query.def = def; + files.push_back(query); } else { QueryableFile& existing = files[it->second.idx]; - // Replace the entire file. We don't ever want to merge files. - existing = def; + existing.def = def; } } } -void QueryableDatabase::Import(const std::vector& defs) { - for (auto& def : defs) { - auto it = usr_to_symbol.find(def.def.usr); +void QueryableDatabase::ImportOrUpdate(const std::vector& updates) { + for (auto& def : updates) { + auto it = usr_to_symbol.find(def.usr); if (it == usr_to_symbol.end()) { - qualified_names.push_back(def.def.qualified_name); + qualified_names.push_back(def.qualified_name); symbols.push_back(SymbolIdx(SymbolKind::Type, types.size())); - usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Type, types.size()); - types.push_back(def); + usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Type, types.size()); + + QueryableTypeDef query; + query.def = def; + types.push_back(query); } else { QueryableTypeDef& existing = types[it->second.idx]; - if (def.def.definition_extent) - existing.def = def.def; - AddRange(&existing.derived, def.derived); - AddRange(&existing.uses, def.uses); + if (def.definition_extent) + existing.def = def; } } } -void QueryableDatabase::Import(const std::vector& defs) { - for (auto& def : defs) { - auto it = usr_to_symbol.find(def.def.usr); +void QueryableDatabase::ImportOrUpdate(const std::vector& updates) { + for (auto& def : updates) { + auto it = usr_to_symbol.find(def.usr); if (it == usr_to_symbol.end()) { - qualified_names.push_back(def.def.qualified_name); + qualified_names.push_back(def.qualified_name); symbols.push_back(SymbolIdx(SymbolKind::Func, funcs.size())); - usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Func, funcs.size()); - funcs.push_back(def); + usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Func, funcs.size()); + + QueryableFuncDef query; + query.def = def; + funcs.push_back(query); } else { QueryableFuncDef& existing = funcs[it->second.idx]; - if (def.def.definition_extent) - existing.def = def.def; - AddRange(&existing.callers, def.callers); - AddRange(&existing.declarations, def.declarations); - AddRange(&existing.derived, def.derived); - AddRange(&existing.uses, def.uses); + if (def.definition_extent) + existing.def = def; } } } -void QueryableDatabase::Import(const std::vector& defs) { - for (auto& def : defs) { - auto it = usr_to_symbol.find(def.def.usr); +void QueryableDatabase::ImportOrUpdate(const std::vector& updates) { + for (auto& def : updates) { + auto it = usr_to_symbol.find(def.usr); if (it == usr_to_symbol.end()) { - qualified_names.push_back(def.def.qualified_name); + qualified_names.push_back(def.qualified_name); symbols.push_back(SymbolIdx(SymbolKind::Var, vars.size())); - usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Var, vars.size()); - vars.push_back(def); + usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Var, vars.size()); + + QueryableVarDef query; + query.def = def; + vars.push_back(query); } else { QueryableVarDef& existing = vars[it->second.idx]; - if (def.def.definition_extent) - existing.def = def.def; - AddRange(&existing.uses, def.uses); + if (def.definition_extent) + existing.def = def; } } } -void QueryableDatabase::Update(const std::vector& updates) { - for (auto& def : updates) { - SymbolIdx idx = usr_to_symbol[def.usr]; - assert(idx.kind == SymbolKind::Type); - types[idx.idx].def = def; - } -} - -void QueryableDatabase::Update(const std::vector& updates) { - for (auto& def : updates) { - SymbolIdx idx = usr_to_symbol[def.usr]; - assert(idx.kind == SymbolKind::Func); - funcs[idx.idx].def = def; - } -} - -void QueryableDatabase::Update(const std::vector& updates) { - for (auto& def : updates) { - SymbolIdx idx = usr_to_symbol[def.usr]; - assert(idx.kind == SymbolKind::Var); - vars[idx.idx].def = def; - } -} - void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { +#define HANDLE_REPLACEMENT(update_var_name, def_var_name, storage_name) \ + for (auto replacement_update : update->update_var_name) { \ + SymbolIdx index = usr_to_symbol[replacement_update.usr]; \ + auto* def = &storage_name[index.idx]; \ + def->def_var_name = replacement_update.entries; \ + } + #define HANDLE_MERGEABLE(update_var_name, def_var_name, storage_name) \ for (auto merge_update : update->update_var_name) { \ SymbolIdx index = usr_to_symbol[merge_update.usr]; \ @@ -675,26 +670,22 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { } RemoveUsrs(update->files_removed); - Import(update->files_added); - HANDLE_MERGEABLE(files_outline, outline, files); + ImportOrUpdate(update->files_def_update); RemoveUsrs(update->types_removed); - Import(update->types_added); - Update(update->types_def_changed); + ImportOrUpdate(update->types_def_update); HANDLE_MERGEABLE(types_derived, derived, types); HANDLE_MERGEABLE(types_uses, uses, types); RemoveUsrs(update->funcs_removed); - Import(update->funcs_added); - Update(update->funcs_def_changed); + ImportOrUpdate(update->funcs_def_update); HANDLE_MERGEABLE(funcs_declarations, declarations, funcs); HANDLE_MERGEABLE(funcs_derived, derived, funcs); HANDLE_MERGEABLE(funcs_callers, callers, funcs); HANDLE_MERGEABLE(funcs_uses, uses, funcs); RemoveUsrs(update->vars_removed); - Import(update->vars_added); - Update(update->vars_def_changed); + ImportOrUpdate(update->vars_def_update); HANDLE_MERGEABLE(vars_uses, uses, vars); #undef HANDLE_MERGEABLE @@ -708,30 +699,6 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { - -int main233(int argc, char** argv) { - IndexedFile indexed_file_a = Parse("full_tests/index_delta/a_v0.cc", {}); - std::cerr << indexed_file_a.ToString() << std::endl; - - std::cerr << std::endl; - IndexedFile indexed_file_b = Parse("full_tests/index_delta/a_v1.cc", {}); - std::cerr << indexed_file_b.ToString() << std::endl; - // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate. - IndexUpdate import(indexed_file_a); - /* - dest_ids.Import(indexed_file_b.file_db, indexed_file_b.id_cache); - IndexUpdate update = ComputeDiff(indexed_file_a, indexed_file_b); - */ - QueryableDatabase db; - db.ApplyIndexUpdate(&import); - //db.ApplyIndexUpdate(&update); - - return 0; -} - - - - // TODO: Idea: when indexing and joining to the main db, allow many dbs that // are joined to. So that way even if the main db is busy we can // still be joining. Joining the partially joined db to the main diff --git a/src/query.h b/src/query.h index ed4d8f2b..224199cb 100644 --- a/src/query.h +++ b/src/query.h @@ -5,6 +5,9 @@ using Usr = std::string; +// TODO: in types, store refs separately from irefs. Then we can drop +// 'interesting' from location when that is cleaned up. + // TODO: Switch over to QueryableLocation. Figure out if there is // a good way to get the indexer using it. I don't think so // since we may discover more files while indexing a file. @@ -42,7 +45,6 @@ struct QueryableLocation { interesting < o.interesting; } }; -MAKE_REFLECT_STRUCT(QueryableLocation, path, line, column, interesting); struct QueryableRange { QueryableLocation start; @@ -64,7 +66,6 @@ struct QueryableRange { return start < o.start; } }; -MAKE_REFLECT_STRUCT(QueryableRange, start, end); struct UsrRef { Usr usr; @@ -81,7 +82,6 @@ struct UsrRef { return usr < other.usr && loc.start < other.loc.start; } }; -MAKE_REFLECT_STRUCT(UsrRef, usr, loc); // There are two sources of reindex updates: the (single) definition of a // symbol has changed, or one of many users of the symbol has changed. @@ -101,34 +101,39 @@ struct MergeableUpdate { std::vector to_add; std::vector to_remove; - MergeableUpdate() {} // For reflection + MergeableUpdate(Usr usr, const std::vector& to_add) + : usr(usr), to_add(to_add) {} MergeableUpdate(Usr usr, const std::vector& to_add, const std::vector& to_remove) : usr(usr), to_add(to_add), to_remove(to_remove) {} }; -template -void Reflect(TVisitor& visitor, MergeableUpdate& value) { - REFLECT_MEMBER_START(); - REFLECT_MEMBER(usr); - REFLECT_MEMBER(to_add); - REFLECT_MEMBER(to_remove); - REFLECT_MEMBER_END(); -} +template +struct ReplacementUpdate { + // The type/func/var which is getting new usages. + Usr usr; + // New entries. + std::vector values; + + ReplacementUpdate(Usr usr, const std::vector& entries) + : usr(usr), entries(entries) {} +}; struct QueryableFile { - using OutlineUpdate = MergeableUpdate; - using AllSymboslUpdate = MergeableUpdate; + struct Def { + Usr usr; + // Outline of the file (ie, for code lens). + std::vector outline; + // Every symbol found in the file (ie, for goto definition) + std::vector all_symbols; + }; - Usr file_id; - // Outline of the file (ie, for code lens). - std::vector outline; - // Every symbol found in the file (ie, for goto definition) - std::vector all_symbols; + using DefUpdate = Def; - QueryableFile() {} // For serialization. + DefUpdate def; + + QueryableFile() {} QueryableFile(const IndexedFile& indexed); }; -MAKE_REFLECT_STRUCT(QueryableFile, file_id, outline, all_symbols); struct QueryableTypeDef { using DefUpdate = TypeDefDefinitionData; @@ -141,10 +146,9 @@ struct QueryableTypeDef { std::vector instantiations; std::vector uses; - QueryableTypeDef() : def("") {} // For serialization. + QueryableTypeDef() : def("") {} QueryableTypeDef(IdCache& id_cache, const IndexedTypeDef& indexed); }; -MAKE_REFLECT_STRUCT(QueryableTypeDef, def, derived, instantiations, uses); struct QueryableFuncDef { using DefUpdate = FuncDefDefinitionData; @@ -159,10 +163,9 @@ struct QueryableFuncDef { std::vector callers; std::vector uses; - QueryableFuncDef() : def("") {} // For serialization. + QueryableFuncDef() : def("") {} QueryableFuncDef(IdCache& id_cache, const IndexedFuncDef& indexed); }; -MAKE_REFLECT_STRUCT(QueryableFuncDef, def, declarations, derived, callers, uses); struct QueryableVarDef { using DefUpdate = VarDefDefinitionData; @@ -171,10 +174,9 @@ struct QueryableVarDef { DefUpdate def; std::vector uses; - QueryableVarDef() : def("") {} // For serialization. + QueryableVarDef() : def("") {} QueryableVarDef(IdCache& id_cache, const IndexedVarDef& indexed); }; -MAKE_REFLECT_STRUCT(QueryableVarDef, def, uses); enum class SymbolKind { Invalid, File, Type, Func, Var }; struct SymbolIdx { @@ -186,45 +188,29 @@ struct SymbolIdx { }; -// TODO: We need to control Usr, std::vector allocation to make sure it happens on shmem. That or we -// make IndexUpdate a POD type. -// TODO: Instead of all of that work above, we pipe the IndexUpdate across processes as JSON. -// We need to verify we need multiple processes first. Maybe libclang can run in a single process... -// TODO: Compute IndexUpdates in main process, off the blocking thread. Use separate process for running -// libclang. Solves memory worries. -// TODO: Instead of passing to/from json, we can probably bass the IndexedFile type almost directly as -// a raw memory dump - the type has almost zero pointers inside of it. We could do a little bit of fixup -// so that passing from a separate process to the main db is really fast (no need to go through JSON). -/* -namespace foo2 { - using Usr = size_t; - struct UsrTable { - size_t allocated; - size_t used; - const char* usrs[]; - }; -} -*/ - struct IndexUpdate { + // Creates a new IndexUpdate that will import |file|. + static IndexUpdate CreateImport(IndexedFile& file); + static IndexUpdate CreateDelta(IndexedFile& current, IndexedFile& updated); + + // Merge |update| into this update; this can reduce overhead / index update + // work can be parallelized. + void Merge(const IndexUpdate& update); + // File updates. std::vector files_removed; - std::vector files_added; - std::vector files_outline; - std::vector files_all_symbols; + std::vector files_def_update; // Type updates. std::vector types_removed; - std::vector types_added; - std::vector types_def_changed; + std::vector types_def_update; std::vector types_derived; std::vector types_instantiations; std::vector types_uses; // Function updates. std::vector funcs_removed; - std::vector funcs_added; - std::vector funcs_def_changed; + std::vector funcs_def_update; std::vector funcs_declarations; std::vector funcs_derived; std::vector funcs_callers; @@ -232,44 +218,15 @@ struct IndexUpdate { // Variable updates. std::vector vars_removed; - std::vector vars_added; - std::vector vars_def_changed; + std::vector vars_def_update; std::vector vars_uses; - IndexUpdate() {} - - // Creates a new IndexUpdate that will import |file|. - explicit IndexUpdate(IndexedFile& file); - + private: // Creates an index update assuming that |previous| is already // in the index, so only the delta between |previous| and |current| // will be applied. IndexUpdate(IndexedFile& previous, IndexedFile& current); - - // Merges the contents of |update| into this IndexUpdate instance. - void Merge(const IndexUpdate& update); }; -MAKE_REFLECT_STRUCT(IndexUpdate, - files_removed, - files_added, - files_outline, - files_all_symbols, - types_removed, - types_added, - types_def_changed, - types_derived, - types_uses, - funcs_removed, - funcs_added, - funcs_def_changed, - funcs_declarations, - funcs_derived, - funcs_callers, - funcs_uses, - vars_removed, - vars_added, - vars_def_changed, - vars_uses); // The query database is heavily optimized for fast queries. It is stored @@ -293,13 +250,10 @@ struct QueryableDatabase { void ApplyIndexUpdate(IndexUpdate* update); void RemoveUsrs(const std::vector& to_remove); - void Import(const std::vector& defs); - void Import(const std::vector& defs); - void Import(const std::vector& defs); - void Import(const std::vector& defs); - void Update(const std::vector& updates); - void Update(const std::vector& updates); - void Update(const std::vector& updates); + void ImportOrUpdate(const std::vector& updates); + void ImportOrUpdate(const std::vector& updates); + void ImportOrUpdate(const std::vector& updates); + void ImportOrUpdate(const std::vector& updates); }; diff --git a/src/string_db.cc b/src/string_db.cc new file mode 100644 index 00000000..0abb2720 --- /dev/null +++ b/src/string_db.cc @@ -0,0 +1,40 @@ +#if false +#include "string_db.h" + +StringStorage::~StringStorage() { + if (owns_str) { + free((void*)str); + str = nullptr; + owns_str= false; + } +} + +StringStorage StringStorage::CreateUnowned(const char* data, size_t len) { + StringStorage result; + result.str = data; + result.len = len; + result.owns_str = false; + return result; +} + +void StringStorage::Copy() const { + // Copy + char* new_str = (char*)malloc(len + 1); + strncpy(new_str, str, len + 1); + // Assign + str = new_str; + owns_str = true; +} + +bool StringStorage::operator==(const StringStorage& that) const { + return len == that.len && strcmp(str, that.str) == 0; +} + +bool StringStorage::operator!=(const StringStorage& that) const { + return !(*this == that); +} + +bool StringStorage::operator<(const StringStorage& that) const { + return strcmp(str, that.str); +} +#endif \ No newline at end of file diff --git a/src/string_db.h b/src/string_db.h new file mode 100644 index 00000000..14f1d403 --- /dev/null +++ b/src/string_db.h @@ -0,0 +1,154 @@ +#if false +#pragma once + +#include "buffer.h" +#include "utils.h" + +#include + +#include +#include +#include +#include +#include +#include + + +template +struct StringView { + StringView(); + StringView(const StringView& that); + StringView(const char* str, size_t len); + + bool operator==(const StringView& that) const; + bool operator!=(const StringView& that) const; + bool operator<(const StringView& that) const; + StringView& operator=(const StringView& that); + + std::string AsString() const; + + size_t len; + const char* str; +}; +// See MAKE_HASHABLE for QueryUsr, IndexUsr + +struct StringStorage { + ~StringStorage(); + + static StringStorage CreateUnowned(const char* str, size_t len); + void Copy() const; + + bool operator==(const StringStorage& that) const; + bool operator!=(const StringStorage& that) const; + bool operator<(const StringStorage& that) const; + + size_t len; + mutable const char* str; + mutable bool owns_str; +}; +MAKE_HASHABLE(StringStorage, t.len, t.str); + +template +struct StringDb { + TStringView GetString(const char* str, size_t len); + TStringView GetString(const std::string& str); + TStringView GetString(CXString cx_string); + + std::unordered_set data_; +}; + + + + + + + + +struct _DummyQueryType {}; +struct _DummyIndexType {}; + +using QueryUsr = StringView<_DummyQueryType>; +using QueryStringDb = StringDb<_DummyQueryType>; + +using IndexUsr = StringView<_DummyIndexType>; +using IndexStringDb = StringDb<_DummyIndexType>; + + + +// TODO: See if we can move this next to StringView definition. +MAKE_HASHABLE(QueryUsr, t.len, t.str); +MAKE_HASHABLE(IndexUsr, t.len, t.str); + + + + + + + + + + + + + +template +StringView::StringView() : len(0), str(nullptr) {} + +template +StringView::StringView(const StringView& that) : len(that.len), str(that.str) {} + +template +StringView::StringView(const char* str, size_t len) : len(len), str(str) {} + +template +bool StringView::operator==(const StringView& that) const { + return len == that.len && strcmp(str, that.str) == 0; +} + +template +bool StringView::operator!=(const StringView& that) const { + return !(*this == that); +} + +template +bool StringView::operator<(const StringView& that) const { + return strcmp(str, that.str); +} + +template +StringView& StringView::operator=(const StringView& that) { + len = that.len; + str = that.str; + return *this; +} + +template +std::string StringView::AsString() const { + return std::string(str, len); +} + +template +TStringView StringDb::GetString(const char* str, size_t len) { + StringStorage lookup = StringStorage::CreateUnowned(str, len); + + auto it = data_.insert(lookup); + if (it.second) + it.first->Copy(); + + return TStringView(it.first->str, it.first->len); +} + +template +TStringView StringDb::GetString(const std::string& str) { + return GetString(str.c_str()); +} + +template +TStringView StringDb::GetString(CXString cx_string) { + assert(cx_string.data); + const char* str = clang_getCString(cx_string); + StringView result = GetString(str); + clang_disposeString(cx_string); + return result; +} +#endif \ No newline at end of file