ccls/query.cc
Jacob Dufault 243ed8dfa5 wip
2017-02-25 17:08:05 -08:00

658 lines
21 KiB
C++

#include "query.h"
#include <cstdint>
#include <functional>
#include <unordered_set>
#include <unordered_map>
#include <string>
#include <iostream>
#include "function_output_iterator.hpp"
#include "compilation_database_loader.h"
#include "optional.h"
#include "indexer.h"
//#define CATCH_CONFIG_MAIN
//#include "catch.hpp"
// TODO: Make all copy constructors explicit.
struct IdMap {
// The first vector is indexed by TId::group.
// The second vector is indexed by TId::id.
template<typename TId>
using GroupMap = std::vector<std::unordered_map<TId, TId>>;
GroupId target_group;
int64_t next_file_id = 1;
int64_t next_type_id = 1;
int64_t next_func_id = 1;
int64_t next_var_id = 1;
GroupMap<FileId> remap_file_id;
GroupMap<TypeId> remap_type_id;
GroupMap<FuncId> remap_func_id;
GroupMap<VarId> remap_var_id;
IdMap(GroupId target_group) : target_group(target_group) {}
template<typename TId>
inline TId GenericRemap(GroupMap<TId>* map, int64_t* next_id, TId from) {
if (from.group == target_group)
return from;
// PERF: If this function is a hot-spot we can pull the group computation
// out, ie,
//
// IdMap id_map;
// GroupIdMap group_map = id_map.ResolveIdGroup(file.group)
// for (...)
// group_map.Remap(id)
// Find the group that |from| belongs to. Create groups if needed.
if (from.group >= map->size())
map->resize(from.group + 1);
// If the group doesn't have an ID already mapped out for |from|, map it.
/*
// TODO: The concern with this approach is that it going to waste huge
// amounts of memory, because the first 16k+ ids can be unused.
std::vector<TId>& group = (*map)[from.group];
if (from.id >= group.size()) {
group.reserve(from.id + 1);
for (size_t i = group.size(); i < from.id; ++i)
group.emplace_back(TId(target_group, (*next_id)++));
}
*/
std::unordered_map<TId, TId> group = (*map)[from.group];
// Lookup the id from the group or add it.
auto it = group.find(from);
if (it == group.end()) {
TId result(target_group, (*next_id)++);
group[from] = result;
return result;
}
return it->second;
}
template<typename TId>
inline std::vector<TId> GenericVectorRemap(GroupMap<TId>* map, int64_t* next_id, const std::vector<TId>& from) {
if (from.empty())
return {};
int group_id = from[0].group;
if (group_id >= map->size())
map->resize(group_id + 1);
std::unordered_map<TId, TId> group = (*map)[group_id];
std::vector<TId> result;
result.reserve(from.size());
for (TId id : from) {
if (id.group == target_group) {
result.push_back(id);
continue;
}
// Lookup the id from the group or add it.
auto it = group.find(id);
if (it == group.end()) {
TId new_id(target_group, (*next_id)++);
group[id] = new_id;
result.push_back(new_id);
}
else {
result.push_back(it->second);
}
}
return result;
}
FileId Remap(FileId from) {
return GenericRemap(&remap_file_id, &next_file_id, from);
}
Location Remap(Location from) {
FileId file = Remap(from.file_id());
from.raw_file_group = file.group;
from.raw_file_id = file.id;
return from;
}
TypeId Remap(TypeId from) {
return GenericRemap(&remap_type_id, &next_type_id, from);
}
FuncId Remap(FuncId from) {
return GenericRemap(&remap_func_id, &next_func_id, from);
}
VarId Remap(VarId from) {
return GenericRemap(&remap_var_id, &next_var_id, from);
}
FuncRef Remap(FuncRef from) {
from.id = Remap(from.id);
from.loc = Remap(from.loc);
return from;
}
TypeDefDefinitionData Remap(TypeDefDefinitionData def) {
def.id = Remap(def.id);
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.alias_of)
def.alias_of = Remap(def.alias_of.value());
def.parents = Remap(def.parents);
def.types = Remap(def.types);
def.funcs = Remap(def.funcs);
def.vars = Remap(def.vars);
return def;
}
FuncDefDefinitionData Remap(FuncDefDefinitionData def) {
def.id = Remap(def.id);
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.declaring_type)
def.declaring_type = Remap(def.declaring_type.value());
if (def.base)
def.base = Remap(def.base.value());
def.locals = Remap(def.locals);
def.callees = Remap(def.callees);
return def;
}
VarDefDefinitionData Remap(VarDefDefinitionData def) {
def.id = Remap(def.id);
if (def.declaration)
def.declaration = Remap(def.declaration.value());
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.variable_type)
def.variable_type = Remap(def.variable_type.value());
if (def.declaring_type)
def.declaring_type = Remap(def.declaring_type.value());
return def;
}
//std::vector<FileId> Remap(const std::vector<FileId>& from) {
// return GenericVectorRemap(&remap_file_id, &next_file_id, from);
//}
std::vector<Location> Remap(const std::vector<Location>& from) {
std::vector<Location> result;
result.reserve(from.size());
for (Location l : from)
result.push_back(Remap(l));
return result;
}
std::vector<TypeId> Remap(const std::vector<TypeId>& from) {
return GenericVectorRemap(&remap_type_id, &next_type_id, from);
}
std::vector<FuncId> Remap(const std::vector<FuncId>& from) {
return GenericVectorRemap(&remap_func_id, &next_func_id, from);
}
std::vector<VarId> Remap(const std::vector<VarId>& from) {
return GenericVectorRemap(&remap_var_id, &next_var_id, from);
}
std::vector<FuncRef> Remap(const std::vector<FuncRef>& from) {
std::vector<FuncRef> result;
result.reserve(from.size());
for (FuncRef r : from)
result.push_back(Remap(r));
return result;
}
};
enum class SymbolKind { Type, Func, Var };
struct SymbolIdx {
SymbolKind kind;
union {
uint64_t type_idx;
uint64_t func_idx;
uint64_t var_idx;
};
};
// There are two sources of reindex updates: the (single) definition of a
// symbol has changed, or one of many users of the symbol has changed.
//
// For simplicitly, if the single definition has changed, we update all of the
// associated single-owner definition data. See |Update*DefId|.
//
// If one of the many symbol users submits an update, we store the update such
// that it can be merged with other updates before actually being applied to
// the main database. See |MergeableUpdate|.
template<typename TId, typename TValue>
struct MergeableUpdate {
// The type/func/var which is getting new usages.
TId id;
// Entries to add and remove.
std::vector<TValue> to_add;
std::vector<TValue> to_remove;
};
template<typename TId, typename TValue>
MergeableUpdate<TId, TValue> MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector<TValue>& removed, const std::vector<TValue>& added) {
MergeableUpdate<TId, TValue> update;
update.id = id_map->Remap(symbol_id);
update.to_remove = id_map->Remap(removed);
update.to_add = id_map->Remap(added);
return update;
}
// NOTE: When not inside of a |def| object, there can be duplicates of the same
// information if that information is contributed from separate sources.
// If we need to avoid this duplication in the future, we will have to
// add a refcount.
struct QueryableTypeDef {
TypeDefDefinitionData def;
std::vector<TypeId> derived;
std::vector<Location> uses;
using DefUpdate = TypeDefDefinitionData;
using DerivedUpdate = MergeableUpdate<TypeId, TypeId>;
using UsesUpdate = MergeableUpdate<TypeId, Location>;
QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed)
: def(id_map.Remap(indexed.def)) {
derived = id_map.Remap(indexed.derived);
uses = id_map.Remap(indexed.uses);
}
};
struct QueryableFuncDef {
FuncDefDefinitionData def;
std::vector<Location> declarations;
std::vector<FuncId> derived;
std::vector<FuncRef> callers;
std::vector<Location> uses;
using DefUpdate = FuncDefDefinitionData;
using DeclarationsUpdate = MergeableUpdate<FuncId, Location>;
using DerivedUpdate = MergeableUpdate<FuncId, FuncId>;
using CallersUpdate = MergeableUpdate<FuncId, FuncRef>;
using UsesUpdate = MergeableUpdate<FuncId, Location>;
QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed)
: def(id_map.Remap(indexed.def)) {
declarations = id_map.Remap(indexed.declarations);
derived = id_map.Remap(indexed.derived);
callers = id_map.Remap(indexed.callers);
uses = id_map.Remap(indexed.uses);
}
};
struct QueryableVarDef {
VarDefDefinitionData def;
std::vector<Location> uses;
using DefUpdate = VarDefDefinitionData;
using UsesUpdate = MergeableUpdate<VarId, Location>;
QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed)
: def(id_map.Remap(indexed.def)) {
uses = id_map.Remap(indexed.uses);
}
};
struct QueryableFile {
FileId file_id;
// Symbols declared in the file.
std::vector<SymbolIdx> declared_symbols;
// Symbols which have definitions in the file.
std::vector<SymbolIdx> defined_symbols;
};
struct QueryableEntry {
const char* const str;
};
// The query database is heavily optimized for fast queries. It is stored
// in-memory.
struct QueryableDatabase {
IdMap id_map;
// Indicies between lookup vectors are related to symbols, ie, index 5 in
// |qualified_names| matches index 5 in |symbols|.
std::vector<QueryableEntry> qualified_names;
std::vector<SymbolIdx> symbols;
// Raw data storage.
std::vector<QueryableTypeDef> types;
std::vector<QueryableFuncDef> funcs;
std::vector<QueryableVarDef> vars;
// |files| is indexed by FileId. Retrieve a FileId from a path using
// |file_db|.
FileDb file_db;
std::vector<QueryableFile> files;
// When importing data into the global db we need to remap ids from an
// arbitrary group into the global group.
IdMap local_id_group_to_global_id_group;
};
struct CachedIndexedFile {
// Path to the file indexed.
std::string path;
// GroupId of the indexed file.
GroupId group;
// TODO: Make sure that |previous_index| and |current_index| use the same id
// to USR mapping. This lets us greatly speed up difference computation.
// The previous index. This is used for index updates, so we only apply a
// an update diff when changing the global db.
optional<IndexedFile> previous_index;
IndexedFile current_index;
CachedIndexedFile(const IndexedFile& indexed)
: group(indexed.usr_to_id->group), current_index(indexed) {}
};
struct IndexUpdate {
IdMap* id_map;
// Type updates.
std::vector<TypeId> types_removed;
std::vector<QueryableTypeDef> types_added;
std::vector<QueryableTypeDef::DefUpdate> types_def_changed;
std::vector<QueryableTypeDef::DerivedUpdate> types_derived;
std::vector<QueryableTypeDef::UsesUpdate> types_uses;
// Function updates.
std::vector<FuncId> funcs_removed;
std::vector<QueryableFuncDef> funcs_added;
std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed;
std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations;
std::vector<QueryableFuncDef::DerivedUpdate> funcs_derived;
std::vector<QueryableFuncDef::CallersUpdate> funcs_callers;
std::vector<QueryableFuncDef::UsesUpdate> funcs_uses;
// Variable updates.
std::vector<VarId> vars_removed;
std::vector<QueryableVarDef> vars_added;
std::vector<QueryableVarDef::DefUpdate> vars_def_changed;
std::vector<QueryableVarDef::UsesUpdate> vars_uses;
IndexUpdate(IdMap* id_map) : id_map(id_map) {}
};
template<typename TValue>
TValue* TryFind(std::unordered_set<TValue*>& set, TValue* value) {
// TODO: Make |value| a const ref?
auto it = set.find(value);
if (it == set.end())
return nullptr;
return *it;
}
template<typename T>
std::unordered_set<T*> CreateSet(std::vector<T>& elements) {
std::unordered_set<T*> result;
result.reserve(elements.size());
for (T& element : elements)
result.insert(&element);
return result;
}
// Compares |previous| and |current|, adding all elements that are
// in |previous| but not |current| to |removed|, and all elements
// that are in |current| but not |previous| to |added|.
//
// Returns true iff |removed| or |added| are non-empty.
template<typename T>
bool ComputeDifferenceForUpdate(
std::vector<T>& previous, std::vector<T>& current,
std::vector<T>* removed, std::vector<T>* added) {
// We need to sort to use std::set_difference.
std::sort(previous.begin(), previous.end());
std::sort(current.begin(), current.end());
// Returns the elements in |previous| that are not in |current|.
std::set_difference(
previous.begin(), previous.end(),
current.begin(), current.end(),
std::back_inserter(*removed));
// Returns the elmeents in |current| that are not in |previous|.
std::set_difference(
current.begin(), current.end(),
previous.begin(), previous.end(),
std::back_inserter(*added));
return !removed->empty() || !added->empty();
}
#if false
template<typename T>
void CompareGroups(
std::vector<T>& previous_data, std::vector<T>& current_data,
std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
// TODO: It could be faster to use set_intersection and set_difference to
// compute these values. We will have to presort the input by ID, though.
// Precompute sets so we stay around O(3N) instead of O(N^2). Otherwise
// lookups for duplicate elements will be O(N) and we need them to be O(1).
std::unordered_set<T*> previous_set = CreateSet(previous_data);
std::unordered_set<T*> current_set = CreateSet(current_data);
// TODO: TryFind is just comparing pointers which obviously fails because they point to different memory...
for (T* current_entry : current_set) {
// Possibly updated.
if (T* previous_entry = TryFind(previous_set, current_entry))
on_found(previous_entry, current_entry);
// Added
else
on_added(current_entry);
}
for (T* previous_entry : previous_set) {
// Removed
if (!TryFind(current_set, previous_entry))
on_removed(previous_entry);
}
}
#endif
template<typename T>
void CompareGroups(
std::vector<T>& previous_data, std::vector<T>& current_data,
std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
// TODO: It could be faster to use set_intersection and set_difference to
// compute these values. We will have to presort the input by ID, though.
std::sort(previous_data.begin(), previous_data.end());
std::sort(current_data.begin(), current_data.end());
/*
std::set_difference(
current_data.begin(), current_data.end(),
previous_data.begin(), previous_data.end(),
boost::make_function_output_iterator([](const T& val) {
}));
*/
auto prev_it = previous_data.begin();
auto curr_it = current_data.begin();
while (prev_it != previous_data.end() && curr_it != current_data.end()) {
// same id
if (prev_it->def.id == curr_it->def.id) {
if (!prev_it->is_bad_def && !curr_it->is_bad_def)
on_found(&*prev_it, &*curr_it);
else if (prev_it->is_bad_def)
on_added(&*curr_it);
else if (curr_it->is_bad_def)
on_removed(&*curr_it);
++prev_it;
++curr_it;
}
// prev_id is smaller - prev_it has data curr_it does not have.
else if (prev_it->def.id < curr_it->def.id) {
on_removed(&*prev_it);
++prev_it;
}
// prev_id is bigger - curr_it has data prev_it does not have.
else {
on_added(&*curr_it);
++curr_it;
}
}
// if prev_it still has data, that means it is not in curr_it and was removed.
while (prev_it != previous_data.end()) {
on_removed(&*prev_it);
++prev_it;
}
// if curr_it still has data, that means it is not in prev_it and was added.
while (curr_it != current_data.end()) {
on_added(&*curr_it);
++curr_it;
}
}
// TODO: make this const correct.
IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& current) {
#define JOIN(a, b) a##b
// |query_name| is the name of the variable on the query type.
// |index_name| is the name of the variable on the index type.
// |type| is the type of the variable.
#define PROCESS_UPDATE_DIFF(query_name, index_name, type) \
{ \
/* Check for changes. */ \
std::vector<type> removed, added; \
bool did_add = ComputeDifferenceForUpdate(JOIN(previous->, index_name), JOIN(current->, index_name), &removed, &added); \
if (did_add) {\
std::cout << "Adding mergeable update on " << current->def.short_name << " (" << current->def.usr << ") for field " << #index_name << std::endl; \
JOIN(update., query_name).push_back(MakeMergeableUpdate(id_map, current->def.id, removed, added)); \
} \
}
assert(previous.usr_to_id == current.usr_to_id);
assert(previous.file_db == current.file_db);
IndexUpdate update(id_map);
// Types
CompareGroups<IndexedTypeDef>(previous.types, current.types,
/*onRemoved:*/[&update, &id_map](IndexedTypeDef* def) {
update.types_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedTypeDef* def) {
update.types_added.push_back(QueryableTypeDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedTypeDef* previous, IndexedTypeDef* current) {
if (previous->def != current->def)
update.types_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(types_derived, derived, TypeId);
PROCESS_UPDATE_DIFF(types_uses, uses, Location);
});
// Functions
CompareGroups<IndexedFuncDef>(previous.funcs, current.funcs,
/*onRemoved:*/[&update, &id_map](IndexedFuncDef* def) {
update.funcs_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedFuncDef* def) {
update.funcs_added.push_back(QueryableFuncDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedFuncDef* previous, IndexedFuncDef* current) {
if (previous->def != current->def)
update.funcs_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(funcs_declarations, declarations, Location);
PROCESS_UPDATE_DIFF(funcs_derived, derived, FuncId);
PROCESS_UPDATE_DIFF(funcs_callers, callers, FuncRef);
PROCESS_UPDATE_DIFF(funcs_uses, uses, Location);
});
// Variables
CompareGroups<IndexedVarDef>(previous.vars, current.vars,
/*onRemoved:*/[&update, &id_map](IndexedVarDef* def) {
update.vars_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedVarDef* def) {
update.vars_added.push_back(QueryableVarDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedVarDef* previous, IndexedVarDef* current) {
if (previous->def != current->def)
update.vars_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(vars_uses, uses, Location);
});
return update;
#undef PROCESS_UPDATE_DIFF
#undef JOIN
}
// Merge the contents of |source| into |destination|.
void Merge(const IndexUpdate& source, IndexUpdate* destination) {
// TODO.
}
// Insert the contents of |update| into |db|.
void ApplyIndexUpdate(const IndexUpdate& update, QueryableDatabase* db) {
}
int main(int argc, char** argv) {
// TODO: Unify UserToIdResolver and FileDb
UsrToIdResolver usr_to_id(1);
FileDb file_db(1);
IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {});
std::cout << indexed_file_a.ToString() << std::endl;
std::cout << std::endl;
IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {});
std::cout << indexed_file_b.ToString() << std::endl;
// TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate.
IdMap dest_ids(2);
IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b);
return 0;
}
// TODO: Idea: when indexing and joining to the main db, allow many dbs that
// are joined to. So that way even if the main db is busy we can
// still be joining. Joining the partially joined db to the main
// db should be faster since we will have larger data lanes to use.
// TODO: I think we can run libclang multiple times in one process. So we might
// only need two processes. Still, for perf reasons it would be good if
// we could stay in one process. We could probably just use shared
// memory. May want to run libclang in separate process to protect from
// crashes/issues there.
// TODO: allow user to store configuration as json? file in home dir; also
// allow local overrides (scan up dirs)
// TODO: add opt to dump config when starting (--dump-config)
// TODO: allow user to decide some indexer choices, ie, do we mark prototype parameters as usages?