some renames and add plan for lower mem usage

This commit is contained in:
Jacob Dufault 2017-04-06 23:57:26 -07:00
parent 443ccd24b9
commit 590797bcf2
4 changed files with 98 additions and 86 deletions

View File

@ -17,59 +17,59 @@ IndexedFile::IndexedFile(const std::string& path) : id_cache(path), path(path) {
}
// TODO: Optimize for const char*?
TypeId IndexedFile::ToTypeId(const std::string& usr) {
IndexTypeId IndexedFile::ToTypeId(const std::string& usr) {
auto it = id_cache.usr_to_type_id.find(usr);
if (it != id_cache.usr_to_type_id.end())
return it->second;
TypeId id(types.size());
IndexTypeId id(types.size());
types.push_back(IndexedTypeDef(id, usr));
id_cache.usr_to_type_id[usr] = id;
id_cache.type_id_to_usr[id] = usr;
return id;
}
FuncId IndexedFile::ToFuncId(const std::string& usr) {
IndexFuncId IndexedFile::ToFuncId(const std::string& usr) {
auto it = id_cache.usr_to_func_id.find(usr);
if (it != id_cache.usr_to_func_id.end())
return it->second;
FuncId id(funcs.size());
IndexFuncId id(funcs.size());
funcs.push_back(IndexedFuncDef(id, usr));
id_cache.usr_to_func_id[usr] = id;
id_cache.func_id_to_usr[id] = usr;
return id;
}
VarId IndexedFile::ToVarId(const std::string& usr) {
IndexVarId IndexedFile::ToVarId(const std::string& usr) {
auto it = id_cache.usr_to_var_id.find(usr);
if (it != id_cache.usr_to_var_id.end())
return it->second;
VarId id(vars.size());
IndexVarId id(vars.size());
vars.push_back(IndexedVarDef(id, usr));
id_cache.usr_to_var_id[usr] = id;
id_cache.var_id_to_usr[id] = usr;
return id;
}
TypeId IndexedFile::ToTypeId(const CXCursor& cursor) {
IndexTypeId IndexedFile::ToTypeId(const CXCursor& cursor) {
return ToTypeId(clang::Cursor(cursor).get_usr());
}
FuncId IndexedFile::ToFuncId(const CXCursor& cursor) {
IndexFuncId IndexedFile::ToFuncId(const CXCursor& cursor) {
return ToFuncId(clang::Cursor(cursor).get_usr());
}
VarId IndexedFile::ToVarId(const CXCursor& cursor) {
IndexVarId IndexedFile::ToVarId(const CXCursor& cursor) {
return ToVarId(clang::Cursor(cursor).get_usr());
}
IndexedTypeDef* IndexedFile::Resolve(TypeId id) {
IndexedTypeDef* IndexedFile::Resolve(IndexTypeId id) {
return &types[id.id];
}
IndexedFuncDef* IndexedFile::Resolve(FuncId id) {
IndexedFuncDef* IndexedFile::Resolve(IndexFuncId id) {
return &funcs[id.id];
}
IndexedVarDef* IndexedFile::Resolve(VarId id) {
IndexedVarDef* IndexedFile::Resolve(IndexVarId id) {
return &vars[id.id];
}
@ -77,7 +77,7 @@ std::string IndexedFile::ToString() {
return Serialize(*this);
}
IndexedTypeDef::IndexedTypeDef(TypeId id, const std::string& usr)
IndexedTypeDef::IndexedTypeDef(IndexTypeId id, const std::string& usr)
: def(usr), id(id) {
assert(usr.size() > 0);
// std::cerr << "Creating type with usr " << usr << std::endl;
@ -355,7 +355,7 @@ struct VisitDeclForTypeUsageParam {
bool is_interesting;
int has_processed_any = false;
optional<clang::Cursor> previous_cursor;
optional<TypeId> initial_type;
optional<IndexTypeId> initial_type;
VisitDeclForTypeUsageParam(IndexedFile* db, bool is_interesting)
: db(db), is_interesting(is_interesting) {}
@ -374,7 +374,7 @@ void VisitDeclForTypeUsageVisitorHandler(clang::Cursor cursor,
if (referenced_usr == "")
return;
TypeId ref_type_id = db->ToTypeId(referenced_usr);
IndexTypeId ref_type_id = db->ToTypeId(referenced_usr);
if (!param->initial_type)
param->initial_type = ref_type_id;
@ -437,7 +437,7 @@ clang::VisiterResult VisitDeclForTypeUsageVisitor(
// strips
// qualifies from |cursor| (ie, Foo* => Foo) and removes template arguments
// (ie, Foo<A,B> => Foo<*,*>).
optional<TypeId> ResolveToDeclarationType(IndexedFile* db,
optional<IndexTypeId> ResolveToDeclarationType(IndexedFile* db,
clang::Cursor cursor) {
clang::Cursor declaration =
cursor.get_type().strip_qualifiers().get_declaration();
@ -453,7 +453,7 @@ optional<TypeId> ResolveToDeclarationType(IndexedFile* db,
// useful if trying to figure out ie, what a using statement refers to. If
// trying to generally resolve a cursor to a type, use
// ResolveToDeclarationType, which works in more scenarios.
optional<TypeId> AddDeclTypeUsages(
optional<IndexTypeId> AddDeclTypeUsages(
IndexedFile* db,
clang::Cursor decl_cursor,
bool is_interesting,
@ -637,7 +637,7 @@ clang::VisiterResult AddDeclInitializerUsagesVisitor(clang::Cursor cursor,
// std::cerr << "Adding usage to id=" << ref_id.id << " usr=" << ref_usr
// << " at " << loc.ToString() << std::endl;
if (loc) {
VarId ref_id = db->ToVarId(ref_usr);
IndexVarId ref_id = db->ToVarId(ref_usr);
IndexedVarDef* ref_def = db->Resolve(ref_id);
AddUsage(ref_def->uses, loc.value());
}
@ -705,7 +705,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
std::string decl_usr = decl_cursor.get_usr();
VarId var_id = db->ToVarId(decl->entityInfo->USR);
IndexVarId var_id = db->ToVarId(decl->entityInfo->USR);
IndexedVarDef* var_def = db->Resolve(var_id);
// TODO: Eventually run with this if. Right now I want to iron out bugs
@ -744,7 +744,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// We don't need to assign declaring type multiple times if this variable
// has already been seen.
if (!decl->isRedeclaration) {
optional<TypeId> var_type = ResolveToDeclarationType(db, decl_cursor);
optional<IndexTypeId> var_type = ResolveToDeclarationType(db, decl_cursor);
if (var_type.has_value()) {
// Don't treat enum definition variables as instantiations.
bool is_enum_member = decl->semanticContainer && decl->semanticContainer->cursor.kind == CXCursor_EnumDecl;
@ -757,7 +757,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// TODO: Refactor handlers so more things are under 'if (!decl->isRedeclaration)'
if (decl->isDefinition && IsTypeDefinition(decl->semanticContainer)) {
TypeId declaring_type_id =
IndexTypeId declaring_type_id =
db->ToTypeId(decl->semanticContainer->cursor);
IndexedTypeDef* declaring_type_def = db->Resolve(declaring_type_id);
var_def->def.declaring_type = declaring_type_id;
@ -781,7 +781,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
clang::Cursor resolved =
decl_cursor.template_specialization_to_template_definition();
FuncId func_id = db->ToFuncId(resolved.cx_cursor);
IndexFuncId func_id = db->ToFuncId(resolved.cx_cursor);
IndexedFuncDef* func_def = db->Resolve(func_id);
AddUsage(func_def->uses, decl_loc_spelling.value());
@ -826,7 +826,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// ever
// be one of those in the entire program.
if (IsTypeDefinition(decl->semanticContainer)) {
TypeId declaring_type_id =
IndexTypeId declaring_type_id =
db->ToTypeId(decl->semanticContainer->cursor);
IndexedTypeDef* declaring_type_def = db->Resolve(declaring_type_id);
func_def->def.declaring_type = declaring_type_id;
@ -898,7 +898,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// TODO: How to handle multiple parent overrides??
for (unsigned int i = 0; i < num_overridden; ++i) {
clang::Cursor parent = overridden[i];
FuncId parent_id = db->ToFuncId(parent.get_usr());
IndexFuncId parent_id = db->ToFuncId(parent.get_usr());
IndexedFuncDef* parent_def = db->Resolve(parent_id);
func_def = db->Resolve(func_id); // ToFuncId invalidated func_def
@ -931,11 +931,11 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// Note we want to fetch the first TypeRef. Running
// ResolveCursorType(decl->cursor) would return
// the type of the typedef/using, not the type of the referenced type.
optional<TypeId> alias_of =
optional<IndexTypeId> alias_of =
AddDeclTypeUsages(db, decl->cursor, true /*is_interesting*/,
decl->semanticContainer, decl->lexicalContainer);
TypeId type_id = db->ToTypeId(decl->entityInfo->USR);
IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
IndexedTypeDef* type_def = db->Resolve(type_id);
if (alias_of)
@ -959,7 +959,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
if (!decl_loc_spelling)
break;
TypeId type_id = db->ToTypeId(decl->entityInfo->USR);
IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
IndexedTypeDef* type_def = db->Resolve(type_id);
// TODO: Eventually run with this if. Right now I want to iron out bugs
@ -1003,7 +1003,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
AddDeclTypeUsages(db, base_class->cursor, true /*is_interesting*/,
decl->semanticContainer, decl->lexicalContainer);
optional<TypeId> parent_type_id =
optional<IndexTypeId> parent_type_id =
ResolveToDeclarationType(db, base_class->cursor);
// type_def ptr could be invalidated by ResolveToDeclarationType.
IndexedTypeDef* type_def = db->Resolve(type_id);
@ -1095,7 +1095,7 @@ void indexEntityReference(CXClientData client_data,
clang::Cursor referenced = ref->referencedEntity->cursor;
referenced = referenced.template_specialization_to_template_definition();
VarId var_id = db->ToVarId(referenced.get_usr());
IndexVarId var_id = db->ToVarId(referenced.get_usr());
IndexedVarDef* var_def = db->Resolve(var_id);
AddUsage(var_def->uses, loc_spelling.value());
break;
@ -1127,9 +1127,9 @@ void indexEntityReference(CXClientData client_data,
param->last_func_usage_location = loc_spelling.value();
// Note: be careful, calling db->ToFuncId invalidates the FuncDef* ptrs.
FuncId called_id = db->ToFuncId(ref->referencedEntity->USR);
IndexFuncId called_id = db->ToFuncId(ref->referencedEntity->USR);
if (IsFunction(ref->container->cursor.kind)) {
FuncId caller_id = db->ToFuncId(ref->container->cursor);
IndexFuncId caller_id = db->ToFuncId(ref->container->cursor);
IndexedFuncDef* caller_def = db->Resolve(caller_id);
IndexedFuncDef* called_def = db->Resolve(called_id);
@ -1183,7 +1183,7 @@ void indexEntityReference(CXClientData client_data,
clang::Cursor referenced = ref->referencedEntity->cursor;
referenced = referenced.template_specialization_to_template_definition();
TypeId referenced_id = db->ToTypeId(referenced.get_usr());
IndexTypeId referenced_id = db->ToTypeId(referenced.get_usr());
IndexedTypeDef* referenced_def = db->Resolve(referenced_id);

View File

@ -50,10 +50,9 @@ bool operator==(const Id<T>& a, const Id<T>& b) {
return a.id == b.id;
}
struct _FakeFileType {};
using TypeId = Id<IndexedTypeDef>;
using FuncId = Id<IndexedFuncDef>;
using VarId = Id<IndexedVarDef>;
using IndexTypeId = Id<IndexedTypeDef>;
using IndexFuncId = Id<IndexedFuncDef>;
using IndexVarId = Id<IndexedVarDef>;
struct IdCache;
@ -167,18 +166,18 @@ void Reflect(TVisitor& visitor,
}
struct IndexedTypeDef {
using Def = TypeDefDefinitionData<TypeId, FuncId, VarId, Range>;
using Def = TypeDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
Def def;
TypeId id;
IndexTypeId id;
// Immediate derived types.
std::vector<TypeId> derived;
std::vector<IndexTypeId> derived;
// Declared variables of this type.
// TODO: this needs a lot more work and lots of tests.
// TODO: add instantiation on ctor / dtor, do not add instantiation if type is ptr
std::vector<VarId> instantiations;
std::vector<IndexVarId> instantiations;
// Every usage, useful for things like renames.
// NOTE: Do not insert directly! Use AddUsage instead.
@ -186,7 +185,7 @@ struct IndexedTypeDef {
IndexedTypeDef() : def("") {} // For serialization
IndexedTypeDef(TypeId id, const std::string& usr);
IndexedTypeDef(IndexTypeId id, const std::string& usr);
bool HasInterestingState() const {
return
@ -273,16 +272,16 @@ void Reflect(
}
struct IndexedFuncDef {
using Def = FuncDefDefinitionData<TypeId, FuncId, VarId, FuncRef, Range>;
using Def = FuncDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, FuncRef, Range>;
Def def;
FuncId id;
IndexFuncId id;
// Places the function is forward-declared.
std::vector<Range> declarations;
// Methods which directly override this one.
std::vector<FuncId> derived;
std::vector<IndexFuncId> derived;
// Functions which call this one.
// TODO: Functions can get called outside of just functions - for example,
@ -296,7 +295,7 @@ struct IndexedFuncDef {
std::vector<Range> uses;
IndexedFuncDef() {} // For reflection.
IndexedFuncDef(FuncId id, const std::string& usr) : def(usr), id(id) {
IndexedFuncDef(IndexFuncId id, const std::string& usr) : def(usr), id(id) {
// assert(usr.size() > 0);
}
@ -376,17 +375,17 @@ void Reflect(TVisitor& visitor,
}
struct IndexedVarDef {
using Def = VarDefDefinitionData<TypeId, FuncId, VarId, Range>;
using Def = VarDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
Def def;
VarId id;
IndexVarId id;
// Usages.
std::vector<Range> uses;
IndexedVarDef() : def("") {} // For serialization
IndexedVarDef(VarId id, const std::string& usr) : def(usr), id(id) {
IndexedVarDef(IndexVarId id, const std::string& usr) : def(usr), id(id) {
// assert(usr.size() > 0);
}
@ -405,12 +404,12 @@ MAKE_HASHABLE(IndexedVarDef, t.def.usr);
struct IdCache {
std::string primary_file;
std::unordered_map<std::string, TypeId> usr_to_type_id;
std::unordered_map<std::string, FuncId> usr_to_func_id;
std::unordered_map<std::string, VarId> usr_to_var_id;
std::unordered_map<TypeId, std::string> type_id_to_usr;
std::unordered_map<FuncId, std::string> func_id_to_usr;
std::unordered_map<VarId, std::string> var_id_to_usr;
std::unordered_map<std::string, IndexTypeId> usr_to_type_id;
std::unordered_map<std::string, IndexFuncId> usr_to_func_id;
std::unordered_map<std::string, IndexVarId> usr_to_var_id;
std::unordered_map<IndexTypeId, std::string> type_id_to_usr;
std::unordered_map<IndexFuncId, std::string> func_id_to_usr;
std::unordered_map<IndexVarId, std::string> var_id_to_usr;
IdCache(const std::string& primary_file);
@ -436,15 +435,15 @@ struct IndexedFile {
IndexedFile(const std::string& path);
TypeId ToTypeId(const std::string& usr);
FuncId ToFuncId(const std::string& usr);
VarId ToVarId(const std::string& usr);
TypeId ToTypeId(const CXCursor& usr);
FuncId ToFuncId(const CXCursor& usr);
VarId ToVarId(const CXCursor& usr);
IndexedTypeDef* Resolve(TypeId id);
IndexedFuncDef* Resolve(FuncId id);
IndexedVarDef* Resolve(VarId id);
IndexTypeId ToTypeId(const std::string& usr);
IndexFuncId ToFuncId(const std::string& usr);
IndexVarId ToVarId(const std::string& usr);
IndexTypeId ToTypeId(const CXCursor& usr);
IndexFuncId ToFuncId(const CXCursor& usr);
IndexVarId ToVarId(const CXCursor& usr);
IndexedTypeDef* Resolve(IndexTypeId id);
IndexedFuncDef* Resolve(IndexFuncId id);
IndexedVarDef* Resolve(IndexVarId id);
std::string ToString();
};

View File

@ -14,6 +14,21 @@
// TODO: Make all copy constructors explicit.
struct IdGlobalizer {
// TODO threading model
// - [querydb] Create IdGlobalizer mapping from every id registered in local_ids
// - [indexer] Create IndexUpdate using IdGlobalizer cached state
// - [querydb] Apply IndexUpdate
//
// Then lookup in cached_* should *never* fail.
const IdCache& local_ids;
QueryFileId index_file_id;
std::unordered_map<IndexTypeId, QueryTypeId> cached_type_ids_;
std::unordered_map<IndexFuncId, QueryFuncId> cached_func_ids_;
std::unordered_map<IndexVarId, QueryVarId> cached_var_ids_;
};
@ -26,37 +41,36 @@ std::vector<Out> Transform(const std::vector<In>& input, std::function<Out(In)>
return result;
}
// TODO: These functions are failing. Investigate why.
Usr MapIdToUsr(const IdCache& id_cache, const TypeId& id) {
Usr MapIdToUsr(const IdCache& id_cache, const IndexTypeId& id) {
assert(id_cache.type_id_to_usr.find(id) != id_cache.type_id_to_usr.end());
return id_cache.type_id_to_usr.find(id)->second;
}
Usr MapIdToUsr(const IdCache& id_cache, const FuncId& id) {
Usr MapIdToUsr(const IdCache& id_cache, const IndexFuncId& id) {
assert(id_cache.func_id_to_usr.find(id) != id_cache.func_id_to_usr.end());
return id_cache.func_id_to_usr.find(id)->second;
}
Usr MapIdToUsr(const IdCache& id_cache, const VarId& id) {
Usr MapIdToUsr(const IdCache& id_cache, const IndexVarId& id) {
assert(id_cache.var_id_to_usr.find(id) != id_cache.var_id_to_usr.end());
return id_cache.var_id_to_usr.find(id)->second;
}
QueryableLocation MapIdToUsr(const IdCache& id_cache, const Range& range) {
return QueryableLocation(id_cache.primary_file, range);
}
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<TypeId>& ids) {
return Transform<TypeId, Usr>(ids, [&](TypeId id) {
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexTypeId>& ids) {
return Transform<IndexTypeId, Usr>(ids, [&](IndexTypeId id) {
assert(id_cache.type_id_to_usr.find(id) != id_cache.type_id_to_usr.end());
return id_cache.type_id_to_usr.find(id)->second;
});
}
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<FuncId>& ids) {
return Transform<FuncId, Usr>(ids, [&](FuncId id) {
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexFuncId>& ids) {
return Transform<IndexFuncId, Usr>(ids, [&](IndexFuncId id) {
assert(id_cache.func_id_to_usr.find(id) != id_cache.func_id_to_usr.end());
return id_cache.func_id_to_usr.find(id)->second;
});
}
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<VarId>& ids) {
return Transform<VarId, Usr>(ids, [&](VarId id) {
std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexVarId>& ids) {
return Transform<IndexVarId, Usr>(ids, [&](IndexVarId id) {
assert(id_cache.var_id_to_usr.find(id) != id_cache.var_id_to_usr.end());
return id_cache.var_id_to_usr.find(id)->second;
});
@ -299,7 +313,7 @@ bool ComputeDifferenceForUpdate(
previous.begin(), previous.end(),
current.begin(), current.end(),
std::back_inserter(*removed));
// Returns the elmeents in |current| that are not in |previous|.
// Returns the elements in |current| that are not in |previous|.
std::set_difference(
current.begin(), current.end(),
previous.begin(), previous.end(),
@ -321,13 +335,7 @@ void CompareGroups(
while (prev_it != previous_data.end() && curr_it != current_data.end()) {
// same id
if (prev_it->def.usr == curr_it->def.usr) {
//if (!prev_it->is_bad_def && !curr_it->is_bad_def)
on_found(&*prev_it, &*curr_it);
//else if (prev_it->is_bad_def)
// on_added(&*curr_it);
//else if (curr_it->is_bad_def)
// on_removed(&*curr_it);
++prev_it;
++curr_it;
}
@ -665,11 +673,6 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
// are joined to. So that way even if the main db is busy we can
// still be joining. Joining the partially joined db to the main
// db should be faster since we will have larger data lanes to use.
// TODO: I think we can run libclang multiple times in one process. So we might
// only need two processes. Still, for perf reasons it would be good if
// we could stay in one process. We could probably just use shared
// memory. May want to run libclang in separate process to protect from
// crashes/issues there.
// TODO: allow user to store configuration as json? file in home dir; also
// allow local overrides (scan up dirs)
// TODO: add opt to dump config when starting (--dump-config)

View File

@ -5,6 +5,16 @@
using Usr = std::string;
struct QueryableFile;
struct QueryableTypeDef;
struct QueryableFuncDef;
struct QueryableVarDef;
using QueryFileId = Id<QueryableFile>;
using QueryTypeId = Id<QueryableTypeDef>;
using QueryFuncId = Id<QueryableFuncDef>;
using QueryVarId = Id<QueryableVarDef>;
// TODO: in types, store refs separately from irefs. Then we can drop
// 'interesting' from location when that is cleaned up.