rework-id

This commit is contained in:
Jacob Dufault 2017-02-26 11:45:59 -08:00
parent f3aa91d8db
commit aaa3542670
5 changed files with 376 additions and 170 deletions

View File

@ -2,47 +2,51 @@
#include "serializer.h" #include "serializer.h"
IndexedFile::IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db) IndexedFile::IndexedFile(IdCache* id_cache, FileDb* file_db)
: usr_to_id(usr_to_id), file_db(file_db) { : id_cache(id_cache), file_db(file_db) {
// TODO: Reconsider if we should still be reusing the same id_cache.
// Preallocate any existing resolved ids. // Preallocate any existing resolved ids.
for (const auto& entry : usr_to_id->usr_to_type_id) for (const auto& entry : id_cache->usr_to_type_id)
types.push_back(IndexedTypeDef(entry.second, entry.first)); types.push_back(IndexedTypeDef(entry.second, entry.first));
for (const auto& entry : usr_to_id->usr_to_func_id) for (const auto& entry : id_cache->usr_to_func_id)
funcs.push_back(IndexedFuncDef(entry.second, entry.first)); funcs.push_back(IndexedFuncDef(entry.second, entry.first));
for (const auto& entry : usr_to_id->usr_to_var_id) for (const auto& entry : id_cache->usr_to_var_id)
vars.push_back(IndexedVarDef(entry.second, entry.first)); vars.push_back(IndexedVarDef(entry.second, entry.first));
} }
// TODO: Optimize for const char*? // TODO: Optimize for const char*?
TypeId IndexedFile::ToTypeId(const std::string& usr) { TypeId IndexedFile::ToTypeId(const std::string& usr) {
auto it = usr_to_id->usr_to_type_id.find(usr); auto it = id_cache->usr_to_type_id.find(usr);
if (it != usr_to_id->usr_to_type_id.end()) if (it != id_cache->usr_to_type_id.end())
return it->second; return it->second;
TypeId id(usr_to_id->group, types.size()); TypeId id(id_cache->group, types.size());
types.push_back(IndexedTypeDef(id, usr)); types.push_back(IndexedTypeDef(id, usr));
usr_to_id->usr_to_type_id[usr] = id; id_cache->usr_to_type_id[usr] = id;
id_cache->type_id_to_usr[id] = usr;
return id; return id;
} }
FuncId IndexedFile::ToFuncId(const std::string& usr) { FuncId IndexedFile::ToFuncId(const std::string& usr) {
auto it = usr_to_id->usr_to_func_id.find(usr); auto it = id_cache->usr_to_func_id.find(usr);
if (it != usr_to_id->usr_to_func_id.end()) if (it != id_cache->usr_to_func_id.end())
return it->second; return it->second;
FuncId id(usr_to_id->group, funcs.size()); FuncId id(id_cache->group, funcs.size());
funcs.push_back(IndexedFuncDef(id, usr)); funcs.push_back(IndexedFuncDef(id, usr));
usr_to_id->usr_to_func_id[usr] = id; id_cache->usr_to_func_id[usr] = id;
id_cache->func_id_to_usr[id] = usr;
return id; return id;
} }
VarId IndexedFile::ToVarId(const std::string& usr) { VarId IndexedFile::ToVarId(const std::string& usr) {
auto it = usr_to_id->usr_to_var_id.find(usr); auto it = id_cache->usr_to_var_id.find(usr);
if (it != usr_to_id->usr_to_var_id.end()) if (it != id_cache->usr_to_var_id.end())
return it->second; return it->second;
VarId id(usr_to_id->group, vars.size()); VarId id(id_cache->group, vars.size());
vars.push_back(IndexedVarDef(id, usr)); vars.push_back(IndexedVarDef(id, usr));
usr_to_id->usr_to_var_id[usr] = id; id_cache->usr_to_var_id[usr] = id;
id_cache->var_id_to_usr[id] = usr;
return id; return id;
} }
@ -822,7 +826,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
static bool DUMP_AST = true; static bool DUMP_AST = true;
IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args) { IndexedFile Parse(IdCache* id_cache, FileDb* file_db, std::string filename, std::vector<std::string> args) {
clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/); clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/);
clang::TranslationUnit tu(index, filename, args); clang::TranslationUnit tu(index, filename, args);
@ -845,7 +849,7 @@ IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filen
*/ */
}; };
IndexedFile db(usr_to_id, file_db); IndexedFile db(id_cache, file_db);
NamespaceHelper ns; NamespaceHelper ns;
IndexParam param(&db, &ns); IndexParam param(&db, &ns);
clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks), clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks),
@ -986,9 +990,9 @@ int main55555(int argc, char** argv) {
// Run test. // Run test.
std::cout << "[START] " << path << std::endl; std::cout << "[START] " << path << std::endl;
UsrToIdResolver usr_to_id(1); IdCache id_cache(1);
FileDb file_db(1); FileDb file_db(1);
IndexedFile db = Parse(&usr_to_id, &file_db, path, {}); IndexedFile db = Parse(&id_cache, &file_db, path, {});
std::string actual_output = db.ToString(); std::string actual_output = db.ToString();
//WriteToFile("output.json", actual_output); //WriteToFile("output.json", actual_output);

View File

@ -295,6 +295,7 @@ using VarRef = Ref<IndexedVarDef>;
// TODO: Either eliminate the defs created as a by-product of cross-referencing, // TODO: Either eliminate the defs created as a by-product of cross-referencing,
// or do not emit things we don't have definitions for. // or do not emit things we don't have definitions for.
template<typename TypeId = TypeId, typename FuncId = FuncId, typename VarId = VarId>
struct TypeDefDefinitionData { struct TypeDefDefinitionData {
// General metadata. // General metadata.
TypeId id; TypeId id;
@ -327,7 +328,7 @@ struct TypeDefDefinitionData {
TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {} TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {}
bool operator==(const TypeDefDefinitionData& other) const { bool operator==(const TypeDefDefinitionData<TypeId, FuncId, VarId>& other) const {
return return
id == other.id && id == other.id &&
usr == other.usr && usr == other.usr &&
@ -341,13 +342,13 @@ struct TypeDefDefinitionData {
vars == other.vars; vars == other.vars;
} }
bool operator!=(const TypeDefDefinitionData& other) const { bool operator!=(const TypeDefDefinitionData<TypeId, FuncId, VarId>& other) const {
return !(*this == other); return !(*this == other);
} }
}; };
struct IndexedTypeDef { struct IndexedTypeDef {
TypeDefDefinitionData def; TypeDefDefinitionData<> def;
// Immediate derived types. // Immediate derived types.
std::vector<TypeId> derived; std::vector<TypeId> derived;
@ -375,6 +376,7 @@ namespace std {
}; };
} }
template<typename TypeId = TypeId, typename FuncId = FuncId, typename VarId = VarId, typename FuncRef = FuncRef>
struct FuncDefDefinitionData { struct FuncDefDefinitionData {
// General metadata. // General metadata.
FuncId id; FuncId id;
@ -399,7 +401,7 @@ struct FuncDefDefinitionData {
assert(usr.size() > 0); assert(usr.size() > 0);
} }
bool operator==(const FuncDefDefinitionData& other) const { bool operator==(const FuncDefDefinitionData<TypeId, FuncId, VarId, FuncRef>& other) const {
return return
id == other.id && id == other.id &&
usr == other.usr && usr == other.usr &&
@ -412,13 +414,13 @@ struct FuncDefDefinitionData {
callees == other.callees; callees == other.callees;
} }
bool operator!=(const FuncDefDefinitionData& other) const { bool operator!=(const FuncDefDefinitionData<TypeId, FuncId, VarId, FuncRef>& other) const {
return !(*this == other); return !(*this == other);
} }
}; };
struct IndexedFuncDef { struct IndexedFuncDef {
FuncDefDefinitionData def; FuncDefDefinitionData<> def;
// Places the function is forward-declared. // Places the function is forward-declared.
std::vector<Location> declarations; std::vector<Location> declarations;
@ -457,7 +459,7 @@ namespace std {
}; };
} }
template<typename TypeId = TypeId, typename FuncId = FuncId, typename VarId = VarId>
struct VarDefDefinitionData { struct VarDefDefinitionData {
// General metadata. // General metadata.
VarId id; VarId id;
@ -477,7 +479,7 @@ struct VarDefDefinitionData {
VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {} VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {}
bool operator==(const VarDefDefinitionData& other) const { bool operator==(const VarDefDefinitionData<TypeId, FuncId, VarId>& other) const {
return return
id == other.id && id == other.id &&
usr == other.usr && usr == other.usr &&
@ -489,13 +491,13 @@ struct VarDefDefinitionData {
declaring_type == other.declaring_type; declaring_type == other.declaring_type;
} }
bool operator!=(const VarDefDefinitionData& other) const { bool operator!=(const VarDefDefinitionData<TypeId, FuncId, VarId>& other) const {
return !(*this == other); return !(*this == other);
} }
}; };
struct IndexedVarDef { struct IndexedVarDef {
VarDefDefinitionData def; VarDefDefinitionData<> def;
// Usages. // Usages.
std::vector<Location> uses; std::vector<Location> uses;
@ -520,26 +522,29 @@ namespace std {
}; };
} }
struct UsrToIdResolver { struct IdCache {
// NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs // NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs
// to get fixed up when inserting into the real db. // to get fixed up when inserting into the real db.
GroupId group; GroupId group;
std::unordered_map<std::string, TypeId> usr_to_type_id; std::unordered_map<std::string, TypeId> usr_to_type_id;
std::unordered_map<std::string, FuncId> usr_to_func_id; std::unordered_map<std::string, FuncId> usr_to_func_id;
std::unordered_map<std::string, VarId> usr_to_var_id; std::unordered_map<std::string, VarId> usr_to_var_id;
std::unordered_map<TypeId, std::string> type_id_to_usr;
std::unordered_map<FuncId, std::string> func_id_to_usr;
std::unordered_map<VarId, std::string> var_id_to_usr;
UsrToIdResolver(GroupId group) : group(group) {} IdCache(GroupId group) : group(group) {}
}; };
struct IndexedFile { struct IndexedFile {
FileDb* file_db; FileDb* file_db;
UsrToIdResolver* usr_to_id; IdCache* id_cache;
std::vector<IndexedTypeDef> types; std::vector<IndexedTypeDef> types;
std::vector<IndexedFuncDef> funcs; std::vector<IndexedFuncDef> funcs;
std::vector<IndexedVarDef> vars; std::vector<IndexedVarDef> vars;
IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db); IndexedFile(IdCache* id_cache, FileDb* file_db);
TypeId ToTypeId(const std::string& usr); TypeId ToTypeId(const std::string& usr);
FuncId ToFuncId(const std::string& usr); FuncId ToFuncId(const std::string& usr);
@ -557,4 +562,4 @@ struct IndexedFile {
IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args); IndexedFile Parse(IdCache* id_cache, FileDb* file_db, std::string filename, std::vector<std::string> args);

399
query.cc
View File

@ -17,6 +17,7 @@
// TODO: Make all copy constructors explicit. // TODO: Make all copy constructors explicit.
#if false
struct IdMap { struct IdMap {
// TODO: id resolution is broken. We need to resolve same fundamental USR to same ID. Problem is that multiple USRs // TODO: id resolution is broken. We need to resolve same fundamental USR to same ID. Problem is that multiple USRs
// can have different source IDs. // can have different source IDs.
@ -164,13 +165,13 @@ struct IdMap {
FileId Remap(FileId from) { FileId Remap(FileId from) {
return GenericRemap<FileId>(&remap_file_id, &group_file_id_to_usr, &usr_to_file_id, &next_file_id, from); return GenericRemap<FileId>(&remap_file_id, &group_file_id_to_usr, &usr_to_file_id, &next_file_id, from);
} }
TypeId Remap(TypeId from) { Usr Remap(TypeId from) {
return GenericRemap(&remap_type_id, &group_type_id_to_usr, &usr_to_type_id, &next_type_id, from); return GenericRemap(&remap_type_id, &group_type_id_to_usr, &usr_to_type_id, &next_type_id, from);
} }
FuncId Remap(FuncId from) { Usr Remap(FuncId from) {
return GenericRemap(&remap_func_id, &group_func_id_to_usr, &usr_to_func_id, &next_func_id, from); return GenericRemap(&remap_func_id, &group_func_id_to_usr, &usr_to_func_id, &next_func_id, from);
} }
VarId Remap(VarId from) { Usr Remap(VarId from) {
return GenericRemap(&remap_var_id, &group_var_id_to_usr, &usr_to_var_id, &next_var_id, from); return GenericRemap(&remap_var_id, &group_var_id_to_usr, &usr_to_var_id, &next_var_id, from);
} }
Location Remap(Location from) { Location Remap(Location from) {
@ -179,12 +180,12 @@ struct IdMap {
from.raw_file_id = file.id; from.raw_file_id = file.id;
return from; return from;
} }
FuncRef Remap(FuncRef from) { UsrRef Remap(FuncRef from) {
from.id = Remap(from.id); from.id = Remap(from.id);
from.loc = Remap(from.loc); from.loc = Remap(from.loc);
return from; return from;
} }
TypeDefDefinitionData Remap(TypeDefDefinitionData def) { QueryableTypeDef::DefUpdate Remap(QueryableTypeDef::DefUpdate def) {
def.id = Remap(def.id); def.id = Remap(def.id);
if (def.definition) if (def.definition)
def.definition = Remap(def.definition.value()); def.definition = Remap(def.definition.value());
@ -196,7 +197,7 @@ struct IdMap {
def.vars = Remap(def.vars); def.vars = Remap(def.vars);
return def; return def;
} }
FuncDefDefinitionData Remap(FuncDefDefinitionData def) { QueryableFuncDef::DefUpdate Remap(QueryableFuncDef::DefUpdate def) {
def.id = Remap(def.id); def.id = Remap(def.id);
if (def.definition) if (def.definition)
def.definition = Remap(def.definition.value()); def.definition = Remap(def.definition.value());
@ -208,7 +209,7 @@ struct IdMap {
def.callees = Remap(def.callees); def.callees = Remap(def.callees);
return def; return def;
} }
VarDefDefinitionData Remap(VarDefDefinitionData def) { QueryableVarDef::DefUpdate Remap(QueryableVarDef::DefUpdate def) {
def.id = Remap(def.id); def.id = Remap(def.id);
if (def.declaration) if (def.declaration)
def.declaration = Remap(def.declaration.value()); def.declaration = Remap(def.declaration.value());
@ -267,61 +268,149 @@ struct IdMap {
result.push_back(Remap(l)); result.push_back(Remap(l));
return result; return result;
} }
std::vector<TypeId> Remap(const std::vector<TypeId>& from) { std::vector<Usr> Remap(const std::vector<TypeId>& from) {
return GenericVectorRemap(&remap_type_id, &next_type_id, from); return GenericVectorRemap(&remap_type_id, &next_type_id, from);
} }
std::vector<FuncId> Remap(const std::vector<FuncId>& from) { std::vector<Usr> Remap(const std::vector<FuncId>& from) {
return GenericVectorRemap(&remap_func_id, &next_func_id, from); return GenericVectorRemap(&remap_func_id, &next_func_id, from);
} }
std::vector<VarId> Remap(const std::vector<VarId>& from) { std::vector<Usr> Remap(const std::vector<VarId>& from) {
return GenericVectorRemap(&remap_var_id, &next_var_id, from); return GenericVectorRemap(&remap_var_id, &next_var_id, from);
} }
std::vector<FuncRef> Remap(const std::vector<FuncRef>& from) { std::vector<UsrRef> Remap(const std::vector<UsrRef>& from) {
std::vector<FuncRef> result; std::vector<UsrRef> result;
result.reserve(from.size()); result.reserve(from.size());
for (FuncRef r : from) for (FuncRef r : from)
result.push_back(Remap(r)); result.push_back(Remap(r));
return result; return result;
} }
}; };
#endif
// TODO: Switch over to QueryableLocation. Figure out if there is
// a good way to get the indexer using it. I don't think so
// since we may discover more files while indexing a file.
//
// We could also reuse planned USR caching system for file
// paths.
struct CachedFileDb {
using Id = int64_t;
std::vector<std::string> file_names;
};
struct QueryableLocation {
CachedFileDb::Id id;
int line;
int column;
bool is_interesting;
};
template<typename TId, typename TValue>
MergeableUpdate<TId, TValue> MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector<TValue>& removed, const std::vector<TValue>& added) {
MergeableUpdate<TId, TValue> update;
update.id = id_map->Remap(symbol_id);
update.to_remove = id_map->Remap(removed);
update.to_add = id_map->Remap(added);
return update;
}
// NOTE: When not inside of a |def| object, there can be duplicates of the same // NOTE: When not inside of a |def| object, there can be duplicates of the same
// information if that information is contributed from separate sources. // information if that information is contributed from separate sources.
// If we need to avoid this duplication in the future, we will have to // If we need to avoid this duplication in the future, we will have to
// add a refcount. // add a refcount.
template<typename In, typename Out>
QueryableTypeDef::QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed) std::vector<Out> Transform(const std::vector<In>& input, std::function<Out(In)> op) {
: def(id_map.Remap(indexed.def)) { std::vector<Out> result;
derived = id_map.Remap(indexed.derived); result.reserve(input.size());
uses = id_map.Remap(indexed.uses); for (const In& in : input)
result.push_back(op(in));
return result;
} }
QueryableFuncDef::QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed) Usr MapIdToUsr(IdCache& id_cache, TypeId& id) {
: def(id_map.Remap(indexed.def)) { return id_cache.type_id_to_usr[id];
declarations = id_map.Remap(indexed.declarations); }
derived = id_map.Remap(indexed.derived); Usr MapIdToUsr(IdCache& id_cache, FuncId& id) {
callers = id_map.Remap(indexed.callers); return id_cache.func_id_to_usr[id];
uses = id_map.Remap(indexed.uses); }
Usr MapIdToUsr(IdCache& id_cache, VarId& id) {
return id_cache.var_id_to_usr[id];
}
Location MapIdToUsr(IdCache& id_cache, Location& ids); // FIXME: We will need additional data to map locations.
std::vector<Usr> MapIdToUsr(IdCache& id_cache, std::vector<TypeId>& ids) {
return Transform<TypeId, Usr>(ids, [&](TypeId id) { return id_cache.type_id_to_usr[id]; });
}
std::vector<Usr> MapIdToUsr(IdCache& id_cache, std::vector<FuncId>& ids) {
return Transform<FuncId, Usr>(ids, [&](FuncId id) { return id_cache.func_id_to_usr[id]; });
}
std::vector<Usr> MapIdToUsr(IdCache& id_cache, std::vector<VarId>& ids) {
return Transform<VarId, Usr>(ids, [&](VarId id) { return id_cache.var_id_to_usr[id]; });
}
std::vector<UsrRef> MapIdToUsr(IdCache& id_cache, std::vector<FuncRef>& ids) {
return Transform<FuncRef, UsrRef>(ids, [&](FuncRef ref) {
UsrRef result;
result.loc = ref.loc; // FIXME: Patch proper location. Fix when fixing MapIdToUsr(Location). I'm thinking we will have a GlobalLocation type.
result.usr = id_cache.func_id_to_usr[ref.id];
return result;
});
}
std::vector<Location> MapIdToUsr(IdCache& id_cache, std::vector<Location>& ids); // FIXME: We will need additional data to map locations.
QueryableTypeDef::DefUpdate MapIdToUsr(IdCache& id_cache, TypeDefDefinitionData<>& def) {
QueryableTypeDef::DefUpdate result(def.usr, def.usr);
if (result.definition)
result.definition = MapIdToUsr(id_cache, def.definition.value());
if (result.alias_of)
result.alias_of = MapIdToUsr(id_cache, def.alias_of.value());
result.parents = MapIdToUsr(id_cache, def.parents);
result.types = MapIdToUsr(id_cache, def.types);
result.funcs = MapIdToUsr(id_cache, def.funcs);
result.vars = MapIdToUsr(id_cache, def.vars);
return result;
}
QueryableFuncDef::DefUpdate MapIdToUsr(IdCache& id_cache, FuncDefDefinitionData<>& def) {
QueryableFuncDef::DefUpdate result(def.usr, def.usr);
if (result.definition)
result.definition = MapIdToUsr(id_cache, def.definition.value());
if (result.declaring_type)
result.declaring_type = MapIdToUsr(id_cache, def.declaring_type.value());
if (result.base)
result.base = MapIdToUsr(id_cache, def.base.value());
result.locals = MapIdToUsr(id_cache, def.locals);
result.callees = MapIdToUsr(id_cache, def.callees);
return result;
}
QueryableVarDef::DefUpdate MapIdToUsr(IdCache& id_cache, VarDefDefinitionData<>& def) {
QueryableVarDef::DefUpdate result(def.usr, def.usr);
if (result.declaration)
result.declaration = MapIdToUsr(id_cache, def.declaration.value());
if (result.definition)
result.definition = MapIdToUsr(id_cache, def.definition.value());
if (result.variable_type)
result.variable_type = MapIdToUsr(id_cache, def.variable_type.value());
if (result.declaring_type)
result.declaring_type = MapIdToUsr(id_cache, def.declaring_type.value());
return result;
} }
QueryableVarDef::QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed) QueryableTypeDef::QueryableTypeDef(IdCache& id_cache, IndexedTypeDef& indexed)
: def(id_map.Remap(indexed.def)) { : def(MapIdToUsr(id_cache, indexed.def)) {
uses = id_map.Remap(indexed.uses); derived = MapIdToUsr(id_cache, indexed.derived);
uses = MapIdToUsr(id_cache, indexed.uses);
}
QueryableFuncDef::QueryableFuncDef(IdCache& id_cache, IndexedFuncDef& indexed)
: def(MapIdToUsr(id_cache, indexed.def)) {
declarations = MapIdToUsr(id_cache, indexed.declarations);
derived = MapIdToUsr(id_cache, indexed.derived);
callers = MapIdToUsr(id_cache, indexed.callers);
uses = MapIdToUsr(id_cache, indexed.uses);
}
QueryableVarDef::QueryableVarDef(IdCache& id_cache, IndexedVarDef& indexed)
: def(MapIdToUsr(id_cache, indexed.def)) {
uses = MapIdToUsr(id_cache, indexed.uses);
} }
struct QueryableEntry { struct QueryableEntry {
@ -335,14 +424,16 @@ struct QueryableEntry {
// TODO: For space reasons, it may make sense to map Usr -> offset inside of global storage. But not for intermediate or disk-storage.
// We can probably eliminate most of that pain by coming up with our own UsrDb concept which interns the Usr strings. We can make
// the pain of a global UsrDb less by
// (parallel)clangindex -> (main)commit USRs to global -> (parallel)transfer IDs to global USRs -> (main)import
// TODO: remove GroupId concept.
struct CachedIndexedFile { struct CachedIndexedFile {
// Path to the file indexed. // Path to the file indexed.
std::string path; std::string path;
// GroupId of the indexed file.
GroupId group;
// TODO: Make sure that |previous_index| and |current_index| use the same id // TODO: Make sure that |previous_index| and |current_index| use the same id
// to USR mapping. This lets us greatly speed up difference computation. // to USR mapping. This lets us greatly speed up difference computation.
@ -352,8 +443,7 @@ struct CachedIndexedFile {
optional<IndexedFile> previous_index; optional<IndexedFile> previous_index;
IndexedFile current_index; IndexedFile current_index;
CachedIndexedFile(const IndexedFile& indexed) CachedIndexedFile(const IndexedFile& indexed) : current_index(indexed) {}
: group(indexed.usr_to_id->group), current_index(indexed) {}
}; };
template<typename T> template<typename T>
@ -372,18 +462,33 @@ void RemoveRange(std::vector<T>* dest, const std::vector<T>& to_remove) {
dest->erase(it); dest->erase(it);
} }
struct IndexUpdate {
IdMap* id_map;
struct IndexUpdate {
// Type updates. // Type updates.
std::vector<TypeId> types_removed; std::vector<Usr> types_removed;
std::vector<QueryableTypeDef> types_added; std::vector<QueryableTypeDef> types_added;
std::vector<QueryableTypeDef::DefUpdate> types_def_changed; std::vector<QueryableTypeDef::DefUpdate> types_def_changed;
std::vector<QueryableTypeDef::DerivedUpdate> types_derived; std::vector<QueryableTypeDef::DerivedUpdate> types_derived;
std::vector<QueryableTypeDef::UsesUpdate> types_uses; std::vector<QueryableTypeDef::UsesUpdate> types_uses;
// Function updates. // Function updates.
std::vector<FuncId> funcs_removed; std::vector<Usr> funcs_removed;
std::vector<QueryableFuncDef> funcs_added; std::vector<QueryableFuncDef> funcs_added;
std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed; std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed;
std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations; std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations;
@ -392,14 +497,14 @@ struct IndexUpdate {
std::vector<QueryableFuncDef::UsesUpdate> funcs_uses; std::vector<QueryableFuncDef::UsesUpdate> funcs_uses;
// Variable updates. // Variable updates.
std::vector<VarId> vars_removed; std::vector<Usr> vars_removed;
std::vector<QueryableVarDef> vars_added; std::vector<QueryableVarDef> vars_added;
std::vector<QueryableVarDef::DefUpdate> vars_def_changed; std::vector<QueryableVarDef::DefUpdate> vars_def_changed;
std::vector<QueryableVarDef::UsesUpdate> vars_uses; std::vector<QueryableVarDef::UsesUpdate> vars_uses;
IndexUpdate(IdMap* id_map) : id_map(id_map) {} IndexUpdate(IndexedFile& file);
IndexUpdate(IdMap* id_map, IndexedFile& file);
#if false
void Remap(IdMap* map) { void Remap(IdMap* map) {
id_map = map; id_map = map;
@ -427,7 +532,9 @@ struct IndexUpdate {
#undef INDEX_UPDATE_REMAP #undef INDEX_UPDATE_REMAP
} }
#endif
#if false
// Merges the contents of |update| into this IndexUpdate instance. // Merges the contents of |update| into this IndexUpdate instance.
void Merge(const IndexUpdate& update) { void Merge(const IndexUpdate& update) {
#define INDEX_UPDATE_MERGE(name) \ #define INDEX_UPDATE_MERGE(name) \
@ -454,19 +561,27 @@ struct IndexUpdate {
#undef INDEX_UPDATE_MERGE #undef INDEX_UPDATE_MERGE
} }
#endif
}; };
IndexUpdate::IndexUpdate(IdMap* id_map, IndexedFile& file) : id_map(id_map) { IndexUpdate::IndexUpdate(IndexedFile& file) {
id_map->Import(file.file_db, file.usr_to_id);
for (IndexedTypeDef& def : file.types) for (IndexedTypeDef& def : file.types)
types_added.push_back(QueryableTypeDef(*id_map, def)); types_added.push_back(QueryableTypeDef(*file.id_cache, def));
for (IndexedFuncDef& def : file.funcs) for (IndexedFuncDef& def : file.funcs)
funcs_added.push_back(QueryableFuncDef(*id_map, def)); funcs_added.push_back(QueryableFuncDef(*file.id_cache, def));
for (IndexedVarDef& def : file.vars) for (IndexedVarDef& def : file.vars)
vars_added.push_back(QueryableVarDef(*id_map, def)); vars_added.push_back(QueryableVarDef(*file.id_cache, def));
} }
#if false
template<typename TId, typename TValue>
MergeableUpdate<TId, TValue> MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector<TValue>& removed, const std::vector<TValue>& added) {
MergeableUpdate<TId, TValue> update;
update.id = id_map->Remap(symbol_id);
update.to_remove = id_map->Remap(removed);
update.to_add = id_map->Remap(added);
return update;
}
template<typename TValue> template<typename TValue>
TValue* TryFind(std::unordered_set<TValue*>& set, TValue* value) { TValue* TryFind(std::unordered_set<TValue*>& set, TValue* value) {
@ -639,6 +754,23 @@ IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& curre
#undef JOIN #undef JOIN
} }
#endif
@ -660,40 +792,106 @@ struct QueryableDatabase {
std::vector<QueryableFuncDef> funcs; std::vector<QueryableFuncDef> funcs;
std::vector<QueryableVarDef> vars; std::vector<QueryableVarDef> vars;
// TypeId to index in |types| (same for funcs, vars) // Lookup symbol based on a usr.
std::unordered_map<TypeId, int> type_id_to_index; std::unordered_map<Usr, SymbolIdx> usr_to_symbol;
std::unordered_map<FuncId, int> func_id_to_index;
std::unordered_map<VarId, int> var_id_to_index;
// |files| is indexed by FileId. Retrieve a FileId from a path using // |files| is indexed by FileId. Retrieve a FileId from a path using
// |file_db|. // |file_db|.
FileDb file_db; FileDb file_db;
std::vector<QueryableFile> files; std::vector<QueryableFile> files;
// When importing data into the global db we need to remap ids from an
// arbitrary group into the global group.
IdMap id_map;
QueryableDatabase(GroupId group); QueryableDatabase(GroupId group);
// Insert the contents of |update| into |db|. // Insert the contents of |update| into |db|.
void ApplyIndexUpdate(IndexUpdate* update); void ApplyIndexUpdate(IndexUpdate* update);
void RemoveUsrs(const std::vector<Usr>& to_remove);
void Import(const std::vector<QueryableTypeDef>& defs);
void Import(const std::vector<QueryableFuncDef>& defs);
void Import(const std::vector<QueryableVarDef>& defs);
void Update(const std::vector<QueryableTypeDef::DefUpdate>& updates);
void Update(const std::vector<QueryableFuncDef::DefUpdate>& updates);
void Update(const std::vector<QueryableVarDef::DefUpdate>& updates);
}; };
template<typename TDef, typename TId> void QueryableDatabase::RemoveUsrs(const std::vector<Usr>& to_remove) {
void RemoveAll(std::unordered_map<TId, int>* id_map, std::vector<TDef>* defs, const std::vector<TId>& ids_to_remove) { // TODO: Removing usrs is tricky because it means we will have to rebuild idx locations. I'm thinking we just nullify
auto to_erase = std::remove_if(defs->begin(), defs->end(), [&](const TDef& def) { // the entry instead of actually removing the data. The index could be massive.
/*
usr_to_symbol.erase(std::remove_if(usr_to_symbol.begin(), usr_to_symbol.end(), [&to_remove](const std::string& usr) {
// TODO: make ids_to_remove a set? // TODO: make ids_to_remove a set?
return std::find(ids_to_remove.begin(), ids_to_remove.end(), def.def.id) != ids_to_remove.end(); return std::find(to_remove.begin(), to_remove.end(), usr) != to_remove.end();
}); }), usr_to_symbol.end());
for (auto it = to_erase; it != defs->end(); ++it) {
id_map->erase(it->def.id); types.erase(std::remove_if(types.begin(), types.end(), [&to_remove](const QueryableTypeDef& def) {
return false;
}), types.end());
*/
/*
for (auto it = to_erase; it != usr_to_symbol.end(); ++it) {
switch (it->second.kind) {
case SymbolKind::Type:
types.erase(types.begin() + it->second.type_idx);
break;
case SymbolKind::Func:
funcs.erase(funcs.begin() + it->second.func_idx);
break;
case SymbolKind::Var:
vars.erase(vars.begin() + it->second.var_idx);
break;
}
} }
*/
defs->erase(to_erase, defs->end());
} }
void QueryableDatabase::Import(const std::vector<QueryableTypeDef>& defs) {
for (auto& def : defs) {
usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Type, types.size());
types.push_back(def);
}
}
void QueryableDatabase::Import(const std::vector<QueryableFuncDef>& defs) {
for (auto& def : defs) {
usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Func, funcs.size());
funcs.push_back(def);
}
}
void QueryableDatabase::Import(const std::vector<QueryableVarDef>& defs) {
for (auto& def : defs) {
usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Var, vars.size());
vars.push_back(def);
}
}
void QueryableDatabase::Update(const std::vector<QueryableTypeDef::DefUpdate>& updates) {
for (auto& def : updates) {
SymbolIdx idx = usr_to_symbol[def.usr];
assert(idx.kind == SymbolKind::Type);
types[idx.idx].def = def;
}
}
void QueryableDatabase::Update(const std::vector<QueryableFuncDef::DefUpdate>& updates) {
for (auto& def : updates) {
SymbolIdx idx = usr_to_symbol[def.usr];
assert(idx.kind == SymbolKind::Func);
funcs[idx.idx].def = def;
}
}
void QueryableDatabase::Update(const std::vector<QueryableVarDef::DefUpdate>& updates) {
for (auto& def : updates) {
SymbolIdx idx = usr_to_symbol[def.usr];
assert(idx.kind == SymbolKind::Var);
vars[idx.idx].def = def;
}
}
template<typename TDef, typename TId> template<typename TDef, typename TId>
void AddAll(std::unordered_map<TId, int>* id_map, std::vector<TDef>* defs, const std::vector<TDef>& to_add) { void AddAll(std::unordered_map<TId, int>* id_map, std::vector<TDef>* defs, const std::vector<TDef>& to_add) {
for (const TDef& def : to_add) { for (const TDef& def : to_add) {
@ -711,40 +909,36 @@ void ApplyUpdates(std::unordered_map<TId, int>* id_map, std::vector<TDef>* defs,
} }
} }
QueryableDatabase::QueryableDatabase(GroupId group) : id_map(group), file_db(group) {} QueryableDatabase::QueryableDatabase(GroupId group) : file_db(group) {}
void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) { void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
id_map.Import(update->id_map);
#define JOIN(a, b) a##b #define JOIN(a, b) a##b
#define HANDLE_MERGEABLE(update_var_name, def_var_name, index_name, storage_name) \ #define HANDLE_MERGEABLE(update_var_name, def_var_name, storage_name) \
for (auto merge_update : JOIN(update->, update_var_name)) { \ for (auto merge_update : JOIN(update->, update_var_name)) { \
int index = JOIN(index_name, [merge_update.id]); \ SymbolIdx index = usr_to_symbol[merge_update.usr]; \
auto* def = &JOIN(storage_name, [index]); \ auto* def = &JOIN(storage_name, [index.idx]); \
AddRange(JOIN(&def->, def_var_name), merge_update.to_add); \ AddRange(JOIN(&def->, def_var_name), merge_update.to_add); \
RemoveRange(JOIN(&def->, def_var_name), merge_update.to_remove); \ RemoveRange(JOIN(&def->, def_var_name), merge_update.to_remove); \
} }
update->Remap(&id_map); RemoveUsrs(update->types_removed);
Import(update->types_added);
Update(update->types_def_changed);
HANDLE_MERGEABLE(types_derived, derived, types);
HANDLE_MERGEABLE(types_uses, uses, types);
RemoveAll(&type_id_to_index, &types, update->types_removed); RemoveUsrs(update->funcs_removed);
AddAll(&type_id_to_index, &types, update->types_added); Import(update->funcs_added);
ApplyUpdates(&type_id_to_index, &types, update->types_def_changed); Update(update->funcs_def_changed);
HANDLE_MERGEABLE(types_derived, derived, type_id_to_index, types); HANDLE_MERGEABLE(funcs_declarations, declarations, funcs);
HANDLE_MERGEABLE(types_uses, uses, type_id_to_index, types); HANDLE_MERGEABLE(funcs_derived, derived, funcs);
HANDLE_MERGEABLE(funcs_callers, callers, funcs);
HANDLE_MERGEABLE(funcs_uses, uses, funcs);
RemoveAll(&func_id_to_index, &funcs, update->funcs_removed); RemoveUsrs(update->vars_removed);
AddAll(&func_id_to_index, &funcs, update->funcs_added); Import(update->vars_added);
ApplyUpdates(&func_id_to_index, &funcs, update->funcs_def_changed); Update(update->vars_def_changed);
HANDLE_MERGEABLE(funcs_declarations, declarations, func_id_to_index, funcs); HANDLE_MERGEABLE(vars_uses, uses, vars);
HANDLE_MERGEABLE(funcs_derived, derived, func_id_to_index, funcs);
HANDLE_MERGEABLE(funcs_callers, callers, func_id_to_index, funcs);
HANDLE_MERGEABLE(funcs_uses, uses, func_id_to_index, funcs);
RemoveAll(&var_id_to_index, &vars, update->vars_removed);
AddAll(&var_id_to_index, &vars, update->vars_added);
ApplyUpdates(&var_id_to_index, &vars, update->vars_def_changed);
HANDLE_MERGEABLE(vars_uses, uses, var_id_to_index, vars);
#undef HANDLE_MERGEABLE #undef HANDLE_MERGEABLE
#undef JOIN #undef JOIN
@ -761,25 +955,24 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
int main(int argc, char** argv) { int main(int argc, char** argv) {
// TODO: Unify UserToIdResolver and FileDb // TODO: Unify UserToIdResolver and FileDb
UsrToIdResolver usr_to_id(1); IdCache id_cache(1);
FileDb file_db(1); FileDb file_db(1);
IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {}); IndexedFile indexed_file_a = Parse(&id_cache, &file_db, "full_tests/index_delta/a_v0.cc", {});
std::cout << indexed_file_a.ToString() << std::endl; std::cout << indexed_file_a.ToString() << std::endl;
std::cout << std::endl; std::cout << std::endl;
IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {}); IndexedFile indexed_file_b = Parse(&id_cache, &file_db, "full_tests/index_delta/a_v1.cc", {});
std::cout << indexed_file_b.ToString() << std::endl; std::cout << indexed_file_b.ToString() << std::endl;
// TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate. // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate.
IdMap dest_ids(2); IndexUpdate import(indexed_file_a);
IndexUpdate import(&dest_ids, indexed_file_a); /*
dest_ids.Import(indexed_file_b.file_db, indexed_file_b.usr_to_id); dest_ids.Import(indexed_file_b.file_db, indexed_file_b.id_cache);
IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b); IndexUpdate update = ComputeDiff(indexed_file_a, indexed_file_b);
*/
QueryableDatabase db(5); QueryableDatabase db(5);
db.ApplyIndexUpdate(&import); db.ApplyIndexUpdate(&import);
db.ApplyIndexUpdate(&update); //db.ApplyIndexUpdate(&update);
return 0; return 0;
} }

62
query.h
View File

@ -23,8 +23,14 @@ enum class PreferredSymbolLocation {
}; };
using Usr = std::string; using Usr = std::string;
struct UsrRef {
Usr usr;
Location loc;
struct IdMap; bool operator==(const UsrRef& other) const {
return usr == other.usr && loc == other.loc;
}
};
// There are two sources of reindex updates: the (single) definition of a // There are two sources of reindex updates: the (single) definition of a
// symbol has changed, or one of many users of the symbol has changed. // symbol has changed, or one of many users of the symbol has changed.
@ -36,63 +42,61 @@ struct IdMap;
// that it can be merged with other updates before actually being applied to // that it can be merged with other updates before actually being applied to
// the main database. See |MergeableUpdate|. // the main database. See |MergeableUpdate|.
template<typename TId, typename TValue> template<typename TValue>
struct MergeableUpdate { struct MergeableUpdate {
// The type/func/var which is getting new usages. // The type/func/var which is getting new usages.
TId id; Usr usr;
// Entries to add and remove. // Entries to add and remove.
std::vector<TValue> to_add; std::vector<TValue> to_add;
std::vector<TValue> to_remove; std::vector<TValue> to_remove;
}; };
struct QueryableTypeDef { struct QueryableTypeDef {
TypeDefDefinitionData def; TypeDefDefinitionData<Usr, Usr, Usr> def;
std::vector<TypeId> derived; std::vector<Usr> derived;
std::vector<Location> uses; std::vector<Location> uses;
using DefUpdate = TypeDefDefinitionData; using DefUpdate = TypeDefDefinitionData<Usr, Usr, Usr>;
using DerivedUpdate = MergeableUpdate<TypeId, TypeId>; using DerivedUpdate = MergeableUpdate<Usr>;
using UsesUpdate = MergeableUpdate<TypeId, Location>; using UsesUpdate = MergeableUpdate<Location>;
QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed); QueryableTypeDef(IdCache& id_cache, IndexedTypeDef& indexed);
}; };
struct QueryableFuncDef { struct QueryableFuncDef {
FuncDefDefinitionData def; FuncDefDefinitionData<Usr, Usr, Usr, UsrRef> def;
std::vector<Location> declarations; std::vector<Location> declarations;
std::vector<FuncId> derived; std::vector<Usr> derived;
std::vector<FuncRef> callers; std::vector<UsrRef> callers;
std::vector<Location> uses; std::vector<Location> uses;
using DefUpdate = FuncDefDefinitionData; using DefUpdate = FuncDefDefinitionData<Usr, Usr, Usr, UsrRef>;
using DeclarationsUpdate = MergeableUpdate<FuncId, Location>; using DeclarationsUpdate = MergeableUpdate<Location>;
using DerivedUpdate = MergeableUpdate<FuncId, FuncId>; using DerivedUpdate = MergeableUpdate<Usr>;
using CallersUpdate = MergeableUpdate<FuncId, FuncRef>; using CallersUpdate = MergeableUpdate<UsrRef>;
using UsesUpdate = MergeableUpdate<FuncId, Location>; using UsesUpdate = MergeableUpdate<Location>;
QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed); QueryableFuncDef(IdCache& id_cache, IndexedFuncDef& indexed);
}; };
struct QueryableVarDef { struct QueryableVarDef {
VarDefDefinitionData def; VarDefDefinitionData<Usr, Usr, Usr> def;
std::vector<Location> uses; std::vector<Location> uses;
using DefUpdate = VarDefDefinitionData; using DefUpdate = VarDefDefinitionData<Usr, Usr, Usr>;
using UsesUpdate = MergeableUpdate<VarId, Location>; using UsesUpdate = MergeableUpdate<Location>;
QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed); QueryableVarDef(IdCache& id_cache, IndexedVarDef& indexed);
}; };
enum class SymbolKind { Type, Func, Var }; enum class SymbolKind { Invalid, Type, Func, Var };
struct SymbolIdx { struct SymbolIdx {
SymbolKind kind; SymbolKind kind;
union { uint64_t idx;
uint64_t type_idx;
uint64_t func_idx; SymbolIdx() : kind(SymbolKind::Invalid), idx(-1) {} // Default ctor needed by stdlib. Do not use.
uint64_t var_idx; SymbolIdx(SymbolKind kind, uint64_t idx) : kind(kind), idx(idx) {}
};
}; };

View File

@ -87,8 +87,8 @@ void Serialize(Writer& writer, const char* key, uint64_t value) {
} }
void Serialize(Writer& writer, IndexedFile* file) { void Serialize(Writer& writer, IndexedFile* file) {
auto it = file->usr_to_id->usr_to_type_id.find(""); auto it = file->id_cache->usr_to_type_id.find("");
if (it != file->usr_to_id->usr_to_type_id.end()) { if (it != file->id_cache->usr_to_type_id.end()) {
file->Resolve(it->second)->def.short_name = "<fundamental>"; file->Resolve(it->second)->def.short_name = "<fundamental>";
assert(file->Resolve(it->second)->uses.size() == 0); assert(file->Resolve(it->second)->uses.size() == 0);
} }