This commit is contained in:
Jacob Dufault 2017-02-25 15:59:09 -08:00
parent f3f72a0dfa
commit b7d9a0f815
9 changed files with 1096 additions and 405 deletions

184
command_line.cc Normal file
View File

@ -0,0 +1,184 @@
#include <iostream>
#include <string>
#include <unordered_map>
#include "compilation_database_loader.h"
#include "indexer.h"
#include "query.h"
bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) {
#define PARSE_AS(name, string) \
if (content == #string) { \
*obj = name; \
return true; \
}
PARSE_AS(PreferredSymbolLocation::Declaration, "declaration");
PARSE_AS(PreferredSymbolLocation::Definition, "definition");
return false;
#undef PARSE_AS
}
bool ParseCommand(const std::string& content, Command* obj) {
#define PARSE_AS(name, string) \
if (content == #string) { \
*obj = name; \
return true; \
}
PARSE_AS(Command::Callees, "callees");
PARSE_AS(Command::Callers, "callers");
PARSE_AS(Command::FindAllUsages, "find-all-usages");
PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages");
PARSE_AS(Command::GotoReferenced, "goto-referenced");
PARSE_AS(Command::Hierarchy, "hierarchy");
PARSE_AS(Command::Outline, "outline");
PARSE_AS(Command::Search, "search");
return false;
#undef PARSE_AS
}
std::unordered_map<std::string, std::string> ParseOptions(int argc, char** argv) {
std::unordered_map<std::string, std::string> output;
std::string previous_arg;
for (int i = 1; i < argc; ++i) {
std::string arg = argv[i];
if (arg[0] != '-') {
if (previous_arg.size() == 0) {
std::cerr << "Invalid arguments; switches must start with -" << std::endl;
exit(1);
}
output[previous_arg] = arg;
previous_arg = "";
}
else {
output[arg] = "";
previous_arg = arg;
}
}
return output;
}
bool HasOption(const std::unordered_map<std::string, std::string>& options, const std::string& option) {
return options.find(option) != options.end();
}
int main2(int argc, char** argv) {
std::unordered_map<std::string, std::string> options = ParseOptions(argc, argv);
if (argc == 1 || options.find("--help") != options.end()) {
std::cout << R"help(clang-indexer help:
General:
--help Print this help information.
--help-commands
Print all available query commands.
--project Path to compile_commands.json. Needed for the server, and
optionally by clients if there are multiple servers running.
--print-config
Emit all configuration data this executable is using.
Server:
--server If present, this binary will run in server mode. The binary
will not return until killed or an exit is requested. The
server computes and caches an index of the entire program
which is then queried by short-lived client processes. A
client is created by running this binary with a --command
flag.
--cache-dir Directory to cache the index and other useful information. If
a previous cache is present, the database will try to reuse
it. If this flag is not present, the database will be
in-memory only.
--threads Number of threads to use for indexing and querying tasks.
This value is optional; a good estimate is computed by
default.
Client:
--command Execute a query command against the index. See
--command-help for a listing of valid commands and a
description of what they do. Presence of this flag indicates
that the indexer is in client mode; this flag is mutually
exclusive with --server.
--location Location of the query. Some commands require only a file,
other require a line and column as well. Format is
filename[:line:column]. For example, "foobar.cc" and
"foobar.cc:1:10" are valid inputs.
--preferred-symbol-location
When looking up symbols, try to return either the
'declaration' or the 'definition'. Defaults to 'definition'.
)help";
exit(0);
}
if (HasOption(options, "--help-commands")) {
std::cout << R"(Available commands:
callees:
callers:
Emit all functions (with location) that this function calls ("callees") or
that call this function ("callers"). Requires a location.
find-all-usages:
Emit every usage of the given symbol. This is intended to support a rename
refactoring. This output contains many uninteresting usages of symbols;
prefer find-interesting-usges. Requires a location.
find-interesting-usages:
Emit only usages of the given symbol which are semantically interesting.
Requires a location.
goto-referenced:
Find an associated reference (either definition or declaration) for the
given symbol. Requires a location.
hierarchy:
List the type hierarchy (ie, inherited and derived members) for the given
method or type. Requires a location.
outline:
Emit a file outline, listing all of the symbols in the file.
search:
Search for a symbol by name.
)";
exit(0);
}
if (HasOption(options, "--project")) {
std::vector<CompilationEntry> entries = LoadCompilationEntriesFromDirectory(options["--project"]);
std::vector<IndexedFile> dbs;
for (const CompilationEntry& entry : entries) {
std::cout << "Parsing " << entry.filename << std::endl;
//IndexedFile db = Parse(2, entry.filename, entry.args);
//dbs.emplace_back(db);
//std::cout << db.ToString() << std::endl << std::endl;
}
std::cin.get();
exit(0);
}
if (HasOption(options, "--command")) {
Command command;
if (!ParseCommand(options["--command"], &command))
Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands");
}
std::cout << "Invalid arguments. Try --help.";
exit(1);
return 0;
}

View File

@ -9,4 +9,8 @@ struct CompilationEntry {
std::vector<std::string> args; std::vector<std::string> args;
}; };
// TODO: Add support for loading when there is no compilation_database.json
// file. We will just recursively scan the directory and support a global
// set of defines and include directories.
std::vector<CompilationEntry> LoadCompilationEntriesFromDirectory(const std::string& project_directory); std::vector<CompilationEntry> LoadCompilationEntriesFromDirectory(const std::string& project_directory);

View File

@ -0,0 +1,7 @@
void called();
void caller() {
}
void missing() {}

View File

@ -0,0 +1,7 @@
void called();
void caller() {
called();
}
void added() {}

View File

@ -0,0 +1,62 @@
// (C) Copyright Jeremy Siek 2001.
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
// Revision History:
// 27 Feb 2001 Jeremy Siek
// Initial checkin.
#ifndef BOOST_FUNCTION_OUTPUT_ITERATOR_HPP
#define BOOST_FUNCTION_OUTPUT_ITERATOR_HPP
#include <iterator>
namespace boost {
namespace iterators {
template <class UnaryFunction>
class function_output_iterator {
typedef function_output_iterator self;
public:
typedef std::output_iterator_tag iterator_category;
typedef void value_type;
typedef void difference_type;
typedef void pointer;
typedef void reference;
explicit function_output_iterator() {}
explicit function_output_iterator(const UnaryFunction& f)
: m_f(f) {}
struct output_proxy {
output_proxy(UnaryFunction& f) : m_f(f) { }
template <class T> output_proxy& operator=(const T& value) {
m_f(value);
return *this;
}
UnaryFunction& m_f;
};
output_proxy operator*() { return output_proxy(m_f); }
self& operator++() { return *this; }
self& operator++(int) { return *this; }
private:
UnaryFunction m_f;
};
template <class UnaryFunction>
inline function_output_iterator<UnaryFunction>
make_function_output_iterator(const UnaryFunction& f = UnaryFunction()) {
return function_output_iterator<UnaryFunction>(f);
}
} // namespace iterators
using iterators::function_output_iterator;
using iterators::make_function_output_iterator;
} // namespace boost
#endif // BOOST_FUNCTION_OUTPUT_ITERATOR_HPP

View File

@ -2,37 +2,47 @@
#include "serializer.h" #include "serializer.h"
IndexedFile::IndexedFile() {} IndexedFile::IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db)
: usr_to_id(usr_to_id), file_db(file_db) {
// Preallocate any existing resolved ids.
for (const auto& entry : usr_to_id->usr_to_type_id)
types.push_back(IndexedTypeDef(entry.second, entry.first));
for (const auto& entry : usr_to_id->usr_to_func_id)
funcs.push_back(IndexedFuncDef(entry.second, entry.first));
for (const auto& entry : usr_to_id->usr_to_var_id)
vars.push_back(IndexedVarDef(entry.second, entry.first));
}
// TODO: Optimize for const char*? // TODO: Optimize for const char*?
TypeId IndexedFile::ToTypeId(const std::string& usr) { TypeId IndexedFile::ToTypeId(const std::string& usr) {
auto it = usr_to_type_id.find(usr); auto it = usr_to_id->usr_to_type_id.find(usr);
if (it != usr_to_type_id.end()) if (it != usr_to_id->usr_to_type_id.end())
return it->second; return it->second;
TypeId id(types.size()); TypeId id(usr_to_id->group, types.size());
types.push_back(IndexedTypeDef(id, usr)); types.push_back(IndexedTypeDef(id, usr));
usr_to_type_id[usr] = id; usr_to_id->usr_to_type_id[usr] = id;
return id; return id;
} }
FuncId IndexedFile::ToFuncId(const std::string& usr) { FuncId IndexedFile::ToFuncId(const std::string& usr) {
auto it = usr_to_func_id.find(usr); auto it = usr_to_id->usr_to_func_id.find(usr);
if (it != usr_to_func_id.end()) if (it != usr_to_id->usr_to_func_id.end())
return it->second; return it->second;
FuncId id(funcs.size()); FuncId id(usr_to_id->group, funcs.size());
funcs.push_back(IndexedFuncDef(id, usr)); funcs.push_back(IndexedFuncDef(id, usr));
usr_to_func_id[usr] = id; usr_to_id->usr_to_func_id[usr] = id;
return id; return id;
} }
VarId IndexedFile::ToVarId(const std::string& usr) { VarId IndexedFile::ToVarId(const std::string& usr) {
auto it = usr_to_var_id.find(usr); auto it = usr_to_id->usr_to_var_id.find(usr);
if (it != usr_to_var_id.end()) if (it != usr_to_id->usr_to_var_id.end())
return it->second; return it->second;
VarId id(vars.size()); VarId id(usr_to_id->group, vars.size());
vars.push_back(IndexedVarDef(id, usr)); vars.push_back(IndexedVarDef(id, usr));
usr_to_var_id[usr] = id; usr_to_id->usr_to_var_id[usr] = id;
return id; return id;
} }
@ -50,13 +60,13 @@ VarId IndexedFile::ToVarId(const CXCursor& cursor) {
IndexedTypeDef* IndexedFile::Resolve(TypeId id) { IndexedTypeDef* IndexedFile::Resolve(TypeId id) {
return &types[id.local_id]; return &types[id.id];
} }
IndexedFuncDef* IndexedFile::Resolve(FuncId id) { IndexedFuncDef* IndexedFile::Resolve(FuncId id) {
return &funcs[id.local_id]; return &funcs[id.id];
} }
IndexedVarDef* IndexedFile::Resolve(VarId id) { IndexedVarDef* IndexedFile::Resolve(VarId id) {
return &vars[id.local_id]; return &vars[id.id];
} }
std::string IndexedFile::ToString() { std::string IndexedFile::ToString() {
@ -311,7 +321,7 @@ void VisitDeclForTypeUsageVisitorHandler(clang::Cursor cursor, VisitDeclForTypeU
if (param->is_interesting) { if (param->is_interesting) {
IndexedTypeDef* ref_type_def = db->Resolve(ref_type_id); IndexedTypeDef* ref_type_def = db->Resolve(ref_type_id);
Location loc = db->file_db.Resolve(cursor, true /*interesting*/); Location loc = db->file_db->Resolve(cursor, true /*interesting*/);
ref_type_def->AddUsage(loc); ref_type_def->AddUsage(loc);
} }
} }
@ -434,7 +444,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
var_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, var_def->def.short_name); var_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, var_def->def.short_name);
//} //}
Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/); Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/);
if (decl->isDefinition) if (decl->isDefinition)
var_def->def.definition = decl_loc; var_def->def.definition = decl_loc;
else else
@ -480,7 +490,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
func_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, func_def->def.short_name); func_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, func_def->def.short_name);
//} //}
Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/); Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/);
if (decl->isDefinition) if (decl->isDefinition)
func_def->def.definition = decl_loc; func_def->def.definition = decl_loc;
else else
@ -597,7 +607,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
type_def->def.short_name = decl->entityInfo->name; type_def->def.short_name = decl->entityInfo->name;
type_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, type_def->def.short_name); type_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, type_def->def.short_name);
Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/); Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/);
type_def->def.definition = decl_loc.WithInteresting(false); type_def->def.definition = decl_loc.WithInteresting(false);
type_def->AddUsage(decl_loc); type_def->AddUsage(decl_loc);
break; break;
@ -631,7 +641,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// } // }
assert(decl->isDefinition); assert(decl->isDefinition);
Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/); Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/);
type_def->def.definition = decl_loc.WithInteresting(false); type_def->def.definition = decl_loc.WithInteresting(false);
type_def->AddUsage(decl_loc); type_def->AddUsage(decl_loc);
@ -660,7 +670,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
} }
default: default:
std::cout << "!! Unhandled indexDeclaration: " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db.Resolve(decl->loc, false /*interesting*/).ToString() << std::endl; std::cout << "!! Unhandled indexDeclaration: " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db->Resolve(decl->loc, false /*interesting*/).ToString() << std::endl;
std::cout << " entityInfo->kind = " << decl->entityInfo->kind << std::endl; std::cout << " entityInfo->kind = " << decl->entityInfo->kind << std::endl;
std::cout << " entityInfo->USR = " << decl->entityInfo->USR << std::endl; std::cout << " entityInfo->USR = " << decl->entityInfo->USR << std::endl;
if (decl->declAsContainer) if (decl->declAsContainer)
@ -696,7 +706,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
{ {
VarId var_id = db->ToVarId(ref->referencedEntity->cursor); VarId var_id = db->ToVarId(ref->referencedEntity->cursor);
IndexedVarDef* var_def = db->Resolve(var_id); IndexedVarDef* var_def = db->Resolve(var_id);
var_def->uses.push_back(db->file_db.Resolve(ref->loc, false /*interesting*/)); var_def->uses.push_back(db->file_db->Resolve(ref->loc, false /*interesting*/));
break; break;
} }
@ -718,7 +728,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
// Don't report duplicate usages. // Don't report duplicate usages.
// TODO: search full history? // TODO: search full history?
Location loc = db->file_db.Resolve(ref->loc, false /*interesting*/); Location loc = db->file_db->Resolve(ref->loc, false /*interesting*/);
if (param->last_func_usage_location == loc) break; if (param->last_func_usage_location == loc) break;
param->last_func_usage_location = loc; param->last_func_usage_location = loc;
@ -746,8 +756,8 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
if (ref->referencedEntity->kind == CXIdxEntity_CXXConstructor || if (ref->referencedEntity->kind == CXIdxEntity_CXXConstructor ||
ref->referencedEntity->kind == CXIdxEntity_CXXDestructor) { ref->referencedEntity->kind == CXIdxEntity_CXXDestructor) {
Location parent_loc = db->file_db.Resolve(ref->parentEntity->cursor, true /*interesting*/); Location parent_loc = db->file_db->Resolve(ref->parentEntity->cursor, true /*interesting*/);
Location our_loc = db->file_db.Resolve(ref->loc, true /*is_interesting*/); Location our_loc = db->file_db->Resolve(ref->loc, true /*is_interesting*/);
if (!parent_loc.IsEqualTo(our_loc)) { if (!parent_loc.IsEqualTo(our_loc)) {
IndexedFuncDef* called_def = db->Resolve(called_id); IndexedFuncDef* called_def = db->Resolve(called_id);
assert(called_def->def.declaring_type.has_value()); assert(called_def->def.declaring_type.has_value());
@ -783,16 +793,16 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
// Foo f; // Foo f;
// } // }
// //
referenced_def->AddUsage(db->file_db.Resolve(ref->loc, false /*interesting*/)); referenced_def->AddUsage(db->file_db->Resolve(ref->loc, false /*interesting*/));
break; break;
} }
default: default:
std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
std::cout << " ref->referencedEntity->kind = " << ref->referencedEntity->kind << std::endl; std::cout << " ref->referencedEntity->kind = " << ref->referencedEntity->kind << std::endl;
if (ref->parentEntity) if (ref->parentEntity)
std::cout << " ref->parentEntity->kind = " << ref->parentEntity->kind << std::endl; std::cout << " ref->parentEntity->kind = " << ref->parentEntity->kind << std::endl;
std::cout << " ref->loc = " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl; std::cout << " ref->loc = " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
std::cout << " ref->kind = " << ref->kind << std::endl; std::cout << " ref->kind = " << ref->kind << std::endl;
if (ref->parentEntity) if (ref->parentEntity)
std::cout << " parentEntity = " << clang::Cursor(ref->parentEntity->cursor).ToString() << std::endl; std::cout << " parentEntity = " << clang::Cursor(ref->parentEntity->cursor).ToString() << std::endl;
@ -807,7 +817,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
static bool DUMP_AST = true; static bool DUMP_AST = true;
IndexedFile Parse(std::string filename, std::vector<std::string> args) { IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args) {
clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/); clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/);
clang::TranslationUnit tu(index, filename, args); clang::TranslationUnit tu(index, filename, args);
@ -830,7 +840,7 @@ IndexedFile Parse(std::string filename, std::vector<std::string> args) {
*/ */
}; };
IndexedFile db; IndexedFile db(usr_to_id, file_db);
NamespaceHelper ns; NamespaceHelper ns;
IndexParam param(&db, &ns); IndexParam param(&db, &ns);
clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks), clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks),
@ -971,7 +981,9 @@ int main(int argc, char** argv) {
// Run test. // Run test.
std::cout << "[START] " << path << std::endl; std::cout << "[START] " << path << std::endl;
IndexedFile db = Parse(path, {}); UsrToIdResolver usr_to_id(1);
FileDb file_db(1);
IndexedFile db = Parse(&usr_to_id, &file_db, path, {});
std::string actual_output = db.ToString(); std::string actual_output = db.ToString();
//WriteToFile("output.json", actual_output); //WriteToFile("output.json", actual_output);

331
indexer.h
View File

@ -23,25 +23,154 @@ struct IndexedTypeDef;
struct IndexedFuncDef; struct IndexedFuncDef;
struct IndexedVarDef; struct IndexedVarDef;
using FileId = int64_t;
using namespace std::experimental; using namespace std::experimental;
using GroupId = int;
template<typename T>
struct Id {
GroupId group;
uint64_t id;
Id() : id(0) {} // Needed for containers. Do not use directly.
Id(GroupId group, uint64_t id) : group(group), id(id) {}
bool operator==(const Id<T>& other) const {
assert(group == other.group && "Cannot compare Ids from different groups");
return id == other.id;
}
bool operator<(const Id<T>& other) const {
assert(group == other.group);
return id < other.id;
}
};
namespace std {
template<typename T>
struct hash<Id<T>> {
size_t operator()(const Id<T>& k) const {
return ((hash<uint64_t>()(k.id) ^ (hash<int>()(k.group) << 1)) >> 1);
}
};
}
template<typename T>
bool operator==(const Id<T>& a, const Id<T>& b) {
assert(a.group == b.group && "Cannot compare Ids from different groups");
return a.id == b.id;
}
struct _FakeFileType {};
using FileId = Id<_FakeFileType>;
using TypeId = Id<IndexedTypeDef>;
using FuncId = Id<IndexedFuncDef>;
using VarId = Id<IndexedVarDef>;
struct Location {
bool interesting;
int raw_file_group;
int raw_file_id;
int line;
int column;
Location() {
interesting = false;
raw_file_group = -1;
raw_file_id = -1;
line = -1;
column = -1;
}
Location(bool interesting, FileId file, uint32_t line, uint32_t column) {
this->interesting = interesting;
this->raw_file_group = file.group;
this->raw_file_id = file.id;
this->line = line;
this->column = column;
}
FileId file_id() {
return FileId(raw_file_id, raw_file_group);
}
std::string ToString() {
// Output looks like this:
//
// *1:2:3
//
// * => interesting
// 1 => file id
// 2 => line
// 3 => column
std::string result;
if (interesting)
result += '*';
result += std::to_string(raw_file_id);
result += ':';
result += std::to_string(line);
result += ':';
result += std::to_string(column);
return result;
}
// Compare two Locations and check if they are equal. Ignores the value of
// |interesting|.
// operator== doesn't seem to work properly...
bool IsEqualTo(const Location& o) const {
// When comparing, ignore the value of |interesting|.
return
raw_file_group == o.raw_file_group &&
raw_file_id == o.raw_file_id &&
line == o.line &&
column == o.column;
}
bool operator==(const Location& o) const {
return IsEqualTo(o);
}
bool operator<(const Location& o) const {
return
interesting < o.interesting &&
raw_file_group < o.raw_file_group &&
raw_file_id < o.raw_file_id &&
line < o.line &&
column < o.column;
}
Location WithInteresting(bool interesting) {
Location result = *this;
result.interesting = interesting;
return result;
}
};
#if false
// TODO: Move off of this weird wrapper, use struct with custom wrappers // TODO: Move off of this weird wrapper, use struct with custom wrappers
// directly. // directly.
BEGIN_BITFIELD_TYPE(Location, uint64_t) BEGIN_BITFIELD_TYPE(Location, uint64_t)
ADD_BITFIELD_MEMBER(interesting, /*start:*/ 0, /*len:*/ 1); // 2 values ADD_BITFIELD_MEMBER(interesting, /*start:*/ 0, /*len:*/ 1); // 2 values
ADD_BITFIELD_MEMBER(file_id, /*start:*/ 1, /*len:*/ 29); // 536,870,912 values ADD_BITFIELD_MEMBER(raw_file_group, /*start:*/ 1, /*len:*/ 4); // 16 values, ok if they wrap around.
ADD_BITFIELD_MEMBER(line, /*start:*/ 30, /*len:*/ 20); // 1,048,576 values ADD_BITFIELD_MEMBER(raw_file_id, /*start:*/ 5, /*len:*/ 25); // 33,554,432 values
ADD_BITFIELD_MEMBER(column, /*start:*/ 50, /*len:*/ 14); // 16,384 values ADD_BITFIELD_MEMBER(line, /*start:*/ 30, /*len:*/ 20); // 1,048,576 values
ADD_BITFIELD_MEMBER(column, /*start:*/ 50, /*len:*/ 14); // 16,384 values
Location(bool interesting, FileId file_id, uint32_t line, uint32_t column) { Location(bool interesting, FileId file, uint32_t line, uint32_t column) {
this->interesting = interesting; this->interesting = interesting;
this->file_id = file_id; this->raw_file_group = file.group;
this->raw_file_id = file.id;
this->line = line; this->line = line;
this->column = column; this->column = column;
} }
FileId file_id() {
return FileId(raw_file_id, raw_file_group);
}
std::string ToString() { std::string ToString() {
// Output looks like this: // Output looks like this:
// //
@ -55,7 +184,7 @@ std::string ToString() {
std::string result; std::string result;
if (interesting) if (interesting)
result += '*'; result += '*';
result += std::to_string(file_id); result += std::to_string(raw_file_id);
result += ':'; result += ':';
result += std::to_string(line); result += std::to_string(line);
result += ':'; result += ':';
@ -78,15 +207,17 @@ Location WithInteresting(bool interesting) {
} }
END_BITFIELD_TYPE() END_BITFIELD_TYPE()
#endif
struct IndexedFileDb { struct FileDb {
GroupId group;
std::unordered_map<std::string, FileId> file_path_to_file_id; std::unordered_map<std::string, FileId> file_path_to_file_id;
std::unordered_map<FileId, std::string> file_id_to_file_path; std::unordered_map<FileId, std::string> file_id_to_file_path;
IndexedFileDb() { FileDb(GroupId group) : group(group) {
// Reserve id 0 for unfound. // Reserve id 0 for unfound.
file_path_to_file_id[""] = 0; file_path_to_file_id[""] = FileId(group, 0);
file_id_to_file_path[0] = ""; file_id_to_file_path[FileId(group, 0)] = "";
} }
Location Resolve(const CXSourceLocation& cx_loc, bool interesting) { Location Resolve(const CXSourceLocation& cx_loc, bool interesting) {
@ -103,7 +234,7 @@ struct IndexedFileDb {
file_id = it->second; file_id = it->second;
} }
else { else {
file_id = file_path_to_file_id.size(); file_id = FileId(group, file_path_to_file_id.size());
file_path_to_file_id[path] = file_id; file_path_to_file_id[path] = file_id;
file_id_to_file_path[file_id] = path; file_id_to_file_path[file_id] = path;
} }
@ -128,34 +259,32 @@ struct IndexedFileDb {
template<typename T> template<typename T>
struct LocalId { struct Ref {
uint64_t local_id; Id<T> id;
Location loc;
LocalId() : local_id(0) {} // Needed for containers. Do not use directly. Ref(Id<T> id, Location loc) : id(id), loc(loc) {}
explicit LocalId(uint64_t local_id) : local_id(local_id) {}
bool operator==(const LocalId<T>& other) { bool operator==(const Ref<T>& other) {
return local_id == other.local_id; return id == other.id && loc == other.loc;
}
bool operator!=(const Ref<T>& other) {
return !(*this == other);
}
bool operator<(const Ref<T>& other) const {
return id < other.id && loc < other.loc;
} }
}; };
template<typename T> template<typename T>
bool operator==(const LocalId<T>& a, const LocalId<T>& b) { bool operator==(const Ref<T>& a, const Ref<T>& b) {
return a.local_id == b.local_id; return a.id == b.id && a.loc == b.loc;
}
template<typename T>
bool operator!=(const Ref<T>& a, const Ref<T>& b) {
return !(a == b);
} }
using TypeId = LocalId<IndexedTypeDef>;
using FuncId = LocalId<IndexedFuncDef>;
using VarId = LocalId<IndexedVarDef>;
template<typename T>
struct Ref {
LocalId<T> id;
Location loc;
Ref(LocalId<T> id, Location loc) : id(id), loc(loc) {}
};
using TypeRef = Ref<IndexedTypeDef>; using TypeRef = Ref<IndexedTypeDef>;
using FuncRef = Ref<IndexedFuncDef>; using FuncRef = Ref<IndexedFuncDef>;
using VarRef = Ref<IndexedVarDef>; using VarRef = Ref<IndexedVarDef>;
@ -197,6 +326,24 @@ struct TypeDefDefinitionData {
std::vector<VarId> vars; std::vector<VarId> vars;
TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {} TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {}
bool operator==(const TypeDefDefinitionData& other) const {
return
id == other.id &&
usr == other.usr &&
short_name == other.short_name &&
qualified_name == other.qualified_name &&
definition == other.definition &&
alias_of == other.alias_of &&
parents == other.parents &&
types == other.types &&
funcs == other.funcs &&
vars == other.vars;
}
bool operator!=(const TypeDefDefinitionData& other) const {
return !(*this == other);
}
}; };
struct IndexedTypeDef { struct IndexedTypeDef {
@ -213,8 +360,21 @@ struct IndexedTypeDef {
IndexedTypeDef(TypeId id, const std::string& usr); IndexedTypeDef(TypeId id, const std::string& usr);
void AddUsage(Location loc, bool insert_if_not_present = true); void AddUsage(Location loc, bool insert_if_not_present = true);
bool operator<(const IndexedTypeDef& other) const {
return def.id < other.def.id;
}
}; };
namespace std {
template <>
struct hash<IndexedTypeDef> {
size_t operator()(const IndexedTypeDef& k) const {
return hash<string>()(k.def.usr);
}
};
}
struct FuncDefDefinitionData { struct FuncDefDefinitionData {
// General metadata. // General metadata.
FuncId id; FuncId id;
@ -238,6 +398,23 @@ struct FuncDefDefinitionData {
FuncDefDefinitionData(FuncId id, const std::string& usr) : id(id), usr(usr) { FuncDefDefinitionData(FuncId id, const std::string& usr) : id(id), usr(usr) {
assert(usr.size() > 0); assert(usr.size() > 0);
} }
bool operator==(const FuncDefDefinitionData& other) const {
return
id == other.id &&
usr == other.usr &&
short_name == other.short_name &&
qualified_name == other.qualified_name &&
definition == other.definition &&
declaring_type == other.declaring_type &&
base == other.base &&
locals == other.locals &&
callees == other.callees;
}
bool operator!=(const FuncDefDefinitionData& other) const {
return !(*this == other);
}
}; };
struct IndexedFuncDef { struct IndexedFuncDef {
@ -265,8 +442,22 @@ struct IndexedFuncDef {
IndexedFuncDef(FuncId id, const std::string& usr) : def(id, usr) { IndexedFuncDef(FuncId id, const std::string& usr) : def(id, usr) {
assert(usr.size() > 0); assert(usr.size() > 0);
} }
bool operator<(const IndexedFuncDef& other) const {
return def.id < other.def.id;
}
}; };
namespace std {
template <>
struct hash<IndexedFuncDef> {
size_t operator()(const IndexedFuncDef& k) const {
return hash<string>()(k.def.usr);
}
};
}
struct VarDefDefinitionData { struct VarDefDefinitionData {
// General metadata. // General metadata.
VarId id; VarId id;
@ -285,6 +476,22 @@ struct VarDefDefinitionData {
optional<TypeId> declaring_type; optional<TypeId> declaring_type;
VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {} VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {}
bool operator==(const VarDefDefinitionData& other) const {
return
id == other.id &&
usr == other.usr &&
short_name == other.short_name &&
qualified_name == other.qualified_name &&
declaration == other.declaration &&
definition == other.definition &&
variable_type == other.variable_type &&
declaring_type == other.declaring_type;
}
bool operator!=(const VarDefDefinitionData& other) const {
return !(*this == other);
}
}; };
struct IndexedVarDef { struct IndexedVarDef {
@ -292,29 +499,47 @@ struct IndexedVarDef {
// Usages. // Usages.
std::vector<Location> uses; std::vector<Location> uses;
bool is_system_def = false; bool is_system_def = false;
IndexedVarDef(VarId id, const std::string& usr) : def(id, usr) { IndexedVarDef(VarId id, const std::string& usr) : def(id, usr) {
assert(usr.size() > 0); assert(usr.size() > 0);
} }
bool operator<(const IndexedVarDef& other) const {
return def.id < other.def.id;
}
}; };
namespace std {
template <>
struct hash<IndexedVarDef> {
size_t operator()(const IndexedVarDef& k) const {
return hash<string>()(k.def.usr);
}
};
}
struct IndexedFile { struct UsrToIdResolver {
// NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs // NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs
// to get fixed up when inserting into the real db. // to get fixed up when inserting into the real db.
GroupId group;
std::unordered_map<std::string, TypeId> usr_to_type_id; std::unordered_map<std::string, TypeId> usr_to_type_id;
std::unordered_map<std::string, FuncId> usr_to_func_id; std::unordered_map<std::string, FuncId> usr_to_func_id;
std::unordered_map<std::string, VarId> usr_to_var_id; std::unordered_map<std::string, VarId> usr_to_var_id;
UsrToIdResolver(GroupId group) : group(group) {}
};
struct IndexedFile {
FileDb* file_db;
UsrToIdResolver* usr_to_id;
std::vector<IndexedTypeDef> types; std::vector<IndexedTypeDef> types;
std::vector<IndexedFuncDef> funcs; std::vector<IndexedFuncDef> funcs;
std::vector<IndexedVarDef> vars; std::vector<IndexedVarDef> vars;
IndexedFileDb file_db; IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db);
IndexedFile();
TypeId ToTypeId(const std::string& usr); TypeId ToTypeId(const std::string& usr);
FuncId ToFuncId(const std::string& usr); FuncId ToFuncId(const std::string& usr);
@ -332,32 +557,4 @@ struct IndexedFile {
// TODO: Maybe instead of clearing/adding diffs, we should just clear out the IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args);
// entire previous index and readd the new one? That would be simpler.
// TODO: ^^^ I don't think we can do this. It will probably stall the main
// indexer for far too long since we will have to iterate over tons of
// data.
// TODO: Idea: when indexing and joining to the main db, allow many dbs that
// are joined to. So that way even if the main db is busy we can
// still be joining. Joining the partially joined db to the main
// db should be faster since we will have larger data lanes to use.
struct IndexedTypeDefDiff {};
struct IndexedFuncDefDiff {};
struct IndexedVarDefDiff {};
struct IndexedFileDiff {
std::vector<IndexedTypeDefDiff> removed_types;
std::vector<IndexedFuncDefDiff> removed_funcs;
std::vector<IndexedVarDefDiff> removed_vars;
std::vector<IndexedTypeDefDiff> added_types;
std::vector<IndexedFuncDefDiff> added_funcs;
std::vector<IndexedVarDefDiff> added_vars;
// TODO: Instead of change, maybe we just remove and then add again? not sure.
std::vector<IndexedTypeDefDiff> changed_types;
std::vector<IndexedFuncDefDiff> changed_funcs;
std::vector<IndexedVarDefDiff> changed_vars;
};
IndexedFile Parse(std::string filename, std::vector<std::string> args);

710
query.cc
View File

@ -1,19 +1,200 @@
#include "query.h" #include "query.h"
#include <cstdint> #include <cstdint>
#include <functional>
#include <unordered_set>
#include <unordered_map> #include <unordered_map>
#include <string> #include <string>
#include <iostream> #include <iostream>
#include "function_output_iterator.hpp"
#include "compilation_database_loader.h" #include "compilation_database_loader.h"
#include "optional.h" #include "optional.h"
#include "indexer.h" #include "indexer.h"
struct FileDatabase { //#define CATCH_CONFIG_MAIN
std::unordered_map<std::string, FileId> filename_to_file_id; //#include "catch.hpp"
std::unordered_map<FileId, std::string> file_id_to_filename;
// TODO: Make all copy constructors explicit.
struct IdMap {
// The first vector is indexed by TId::group.
// The second vector is indexed by TId::id.
template<typename TId>
using GroupMap = std::vector<std::unordered_map<TId, TId>>;
GroupId target_group;
int64_t next_file_id = 1;
int64_t next_type_id = 1;
int64_t next_func_id = 1;
int64_t next_var_id = 1;
GroupMap<FileId> remap_file_id;
GroupMap<TypeId> remap_type_id;
GroupMap<FuncId> remap_func_id;
GroupMap<VarId> remap_var_id;
IdMap(GroupId target_group) : target_group(target_group) {}
template<typename TId>
inline TId GenericRemap(GroupMap<TId>* map, int64_t* next_id, TId from) {
// PERF: If this function is a hot-spot we can pull the group computation
// out, ie,
//
// IdMap id_map;
// GroupIdMap group_map = id_map.ResolveIdGroup(file.group)
// for (...)
// group_map.Remap(id)
// Find the group that |from| belongs to. Create groups if needed.
if (from.group >= map->size())
map->resize(from.group + 1);
// If the group doesn't have an ID already mapped out for |from|, map it.
/*
// TODO: The concern with this approach is that it going to waste huge
// amounts of memory, because the first 16k+ ids can be unused.
std::vector<TId>& group = (*map)[from.group];
if (from.id >= group.size()) {
group.reserve(from.id + 1);
for (size_t i = group.size(); i < from.id; ++i)
group.emplace_back(TId(target_group, (*next_id)++));
}
*/
std::unordered_map<TId, TId> group = (*map)[from.group];
// Lookup the id from the group or add it.
auto it = group.find(from);
if (it == group.end()) {
TId result(target_group, (*next_id)++);
group[from] = result;
return result;
}
return it->second;
}
template<typename TId>
inline std::vector<TId> GenericVectorRemap(GroupMap<TId>* map, int64_t* next_id, const std::vector<TId>& from) {
if (from.empty())
return {};
int group_id = from[0].group;
if (group_id >= map->size())
map->resize(group_id + 1);
std::unordered_map<TId, TId> group = (*map)[group_id];
std::vector<TId> result;
result.reserve(from.size());
for (TId id : from) {
// Lookup the id from the group or add it.
auto it = group.find(id);
if (it == group.end()) {
TId new_id(target_group, (*next_id)++);
group[id] = new_id;
result.push_back(new_id);
}
else {
result.push_back(it->second);
}
}
return result;
}
FileId Remap(FileId from) {
return GenericRemap(&remap_file_id, &next_file_id, from);
}
Location Remap(Location from) {
FileId file = Remap(from.file_id());
from.raw_file_group = file.group;
from.raw_file_id = file.id;
return from;
}
TypeId Remap(TypeId from) {
return GenericRemap(&remap_type_id, &next_type_id, from);
}
FuncId Remap(FuncId from) {
return GenericRemap(&remap_func_id, &next_func_id, from);
}
VarId Remap(VarId from) {
return GenericRemap(&remap_var_id, &next_var_id, from);
}
FuncRef Remap(FuncRef from) {
from.id = Remap(from.id);
from.loc = Remap(from.loc);
return from;
}
TypeDefDefinitionData Remap(TypeDefDefinitionData def) {
def.id = Remap(def.id);
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.alias_of)
def.alias_of = Remap(def.alias_of.value());
def.parents = Remap(def.parents);
def.types = Remap(def.types);
def.funcs = Remap(def.funcs);
def.vars = Remap(def.vars);
return def;
}
FuncDefDefinitionData Remap(FuncDefDefinitionData def) {
def.id = Remap(def.id);
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.declaring_type)
def.declaring_type = Remap(def.declaring_type.value());
if (def.base)
def.base = Remap(def.base.value());
def.locals = Remap(def.locals);
def.callees = Remap(def.callees);
return def;
}
VarDefDefinitionData Remap(VarDefDefinitionData def) {
def.id = Remap(def.id);
if (def.declaration)
def.declaration = Remap(def.declaration.value());
if (def.definition)
def.definition = Remap(def.definition.value());
if (def.variable_type)
def.variable_type = Remap(def.variable_type.value());
if (def.declaring_type)
def.declaring_type = Remap(def.declaring_type.value());
return def;
}
//std::vector<FileId> Remap(const std::vector<FileId>& from) {
// return GenericVectorRemap(&remap_file_id, &next_file_id, from);
//}
std::vector<Location> Remap(const std::vector<Location>& from) {
std::vector<Location> result;
result.reserve(from.size());
for (Location l : from)
result.push_back(Remap(l));
return result;
}
std::vector<TypeId> Remap(const std::vector<TypeId>& from) {
return GenericVectorRemap(&remap_type_id, &next_type_id, from);
}
std::vector<FuncId> Remap(const std::vector<FuncId>& from) {
return GenericVectorRemap(&remap_func_id, &next_func_id, from);
}
std::vector<VarId> Remap(const std::vector<VarId>& from) {
return GenericVectorRemap(&remap_var_id, &next_var_id, from);
}
std::vector<FuncRef> Remap(const std::vector<FuncRef>& from) {
std::vector<FuncRef> result;
result.reserve(from.size());
for (FuncRef r : from)
result.push_back(Remap(r));
return result;
}
}; };
enum class SymbolKind { Type, Func, Var }; enum class SymbolKind { Type, Func, Var };
struct SymbolIdx { struct SymbolIdx {
SymbolKind kind; SymbolKind kind;
@ -45,6 +226,20 @@ struct MergeableUpdate {
std::vector<TValue> to_remove; std::vector<TValue> to_remove;
}; };
template<typename TId, typename TValue>
MergeableUpdate<TId, TValue> MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector<TValue>& removed, const std::vector<TValue>& added) {
MergeableUpdate<TId, TValue> update;
update.id = id_map->Remap(symbol_id);
update.to_remove = id_map->Remap(removed);
update.to_add = id_map->Remap(added);
return update;
}
// NOTE: When not inside of a |def| object, there can be duplicates of the same
// information if that information is contributed from separate sources.
// If we need to avoid this duplication in the future, we will have to
// add a refcount.
struct QueryableTypeDef { struct QueryableTypeDef {
TypeDefDefinitionData def; TypeDefDefinitionData def;
std::vector<TypeId> derived; std::vector<TypeId> derived;
@ -53,6 +248,12 @@ struct QueryableTypeDef {
using DefUpdate = TypeDefDefinitionData; using DefUpdate = TypeDefDefinitionData;
using DerivedUpdate = MergeableUpdate<TypeId, TypeId>; using DerivedUpdate = MergeableUpdate<TypeId, TypeId>;
using UsesUpdate = MergeableUpdate<TypeId, Location>; using UsesUpdate = MergeableUpdate<TypeId, Location>;
QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed)
: def(id_map.Remap(indexed.def)) {
derived = id_map.Remap(indexed.derived);
uses = id_map.Remap(indexed.uses);
}
}; };
struct QueryableFuncDef { struct QueryableFuncDef {
@ -67,6 +268,14 @@ struct QueryableFuncDef {
using DerivedUpdate = MergeableUpdate<FuncId, FuncId>; using DerivedUpdate = MergeableUpdate<FuncId, FuncId>;
using CallersUpdate = MergeableUpdate<FuncId, FuncRef>; using CallersUpdate = MergeableUpdate<FuncId, FuncRef>;
using UsesUpdate = MergeableUpdate<FuncId, Location>; using UsesUpdate = MergeableUpdate<FuncId, Location>;
QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed)
: def(id_map.Remap(indexed.def)) {
declarations = id_map.Remap(indexed.declarations);
derived = id_map.Remap(indexed.derived);
callers = id_map.Remap(indexed.callers);
uses = id_map.Remap(indexed.uses);
}
}; };
struct QueryableVarDef { struct QueryableVarDef {
@ -75,9 +284,16 @@ struct QueryableVarDef {
using DefUpdate = VarDefDefinitionData; using DefUpdate = VarDefDefinitionData;
using UsesUpdate = MergeableUpdate<VarId, Location>; using UsesUpdate = MergeableUpdate<VarId, Location>;
QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed)
: def(id_map.Remap(indexed.def)) {
uses = id_map.Remap(indexed.uses);
}
}; };
struct QueryableFile { struct QueryableFile {
FileId file_id;
// Symbols declared in the file. // Symbols declared in the file.
std::vector<SymbolIdx> declared_symbols; std::vector<SymbolIdx> declared_symbols;
// Symbols which have definitions in the file. // Symbols which have definitions in the file.
@ -91,6 +307,8 @@ struct QueryableEntry {
// The query database is heavily optimized for fast queries. It is stored // The query database is heavily optimized for fast queries. It is stored
// in-memory. // in-memory.
struct QueryableDatabase { struct QueryableDatabase {
IdMap id_map;
// Indicies between lookup vectors are related to symbols, ie, index 5 in // Indicies between lookup vectors are related to symbols, ie, index 5 in
// |qualified_names| matches index 5 in |symbols|. // |qualified_names| matches index 5 in |symbols|.
std::vector<QueryableEntry> qualified_names; std::vector<QueryableEntry> qualified_names;
@ -102,220 +320,324 @@ struct QueryableDatabase {
std::vector<QueryableVarDef> vars; std::vector<QueryableVarDef> vars;
// |files| is indexed by FileId. Retrieve a FileId from a path using // |files| is indexed by FileId. Retrieve a FileId from a path using
// |file_locator|. // |file_db|.
FileDatabase file_locator; FileDb file_db;
std::vector<QueryableFile> files; std::vector<QueryableFile> files;
// When importing data into the global db we need to remap ids from an
// arbitrary group into the global group.
IdMap local_id_group_to_global_id_group;
}; };
struct Query {
};
struct CachedIndexedFile { struct CachedIndexedFile {
// Path to the file indexed. // Path to the file indexed.
std::string path; std::string path;
// GroupId of the indexed file.
// Full in-memory storage for the index. Empty if not loaded into memory. GroupId group;
// |path| can be used to fetch the index from disk.
optional<rapidjson::Document> index; // TODO: Make sure that |previous_index| and |current_index| use the same id
// to USR mapping. This lets us greatly speed up difference computation.
// The previous index. This is used for index updates, so we only apply a
// an update diff when changing the global db.
optional<IndexedFile> previous_index;
IndexedFile current_index;
CachedIndexedFile(const IndexedFile& indexed)
: group(indexed.usr_to_id->group), current_index(indexed) {}
}; };
struct DocumentDiff {
struct IndexUpdate {
IdMap* id_map;
// Type updates.
std::vector<TypeId> types_removed;
std::vector<QueryableTypeDef> types_added;
std::vector<QueryableTypeDef::DefUpdate> types_def_changed;
std::vector<QueryableTypeDef::DerivedUpdate> types_derived;
std::vector<QueryableTypeDef::UsesUpdate> types_uses;
// Function updates.
std::vector<FuncId> funcs_removed;
std::vector<QueryableFuncDef> funcs_added;
std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed;
std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations;
std::vector<QueryableFuncDef::DerivedUpdate> funcs_derived;
std::vector<QueryableFuncDef::CallersUpdate> funcs_callers;
std::vector<QueryableFuncDef::UsesUpdate> funcs_uses;
// Variable updates.
std::vector<VarId> vars_removed;
std::vector<QueryableVarDef> vars_added;
std::vector<QueryableVarDef::DefUpdate> vars_def_changed;
std::vector<QueryableVarDef::UsesUpdate> vars_uses;
IndexUpdate(IdMap* id_map) : id_map(id_map) {}
}; };
// Compute a diff between |original| and |updated|.
//rapidjson::Document DiffIndex(rapidjson::Document original, rapidjson::Document updated) {
//}
template<typename TValue>
TValue* TryFind(std::unordered_set<TValue*>& set, TValue* value) {
bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) { // TODO: Make |value| a const ref?
#define PARSE_AS(name, string) \ auto it = set.find(value);
if (content == #string) { \ if (it == set.end())
*obj = name; \ return nullptr;
return true; \ return *it;
}
PARSE_AS(PreferredSymbolLocation::Declaration, "declaration");
PARSE_AS(PreferredSymbolLocation::Definition, "definition");
return false;
#undef PARSE_AS
} }
bool ParseCommand(const std::string& content, Command* obj) { template<typename T>
#define PARSE_AS(name, string) \ std::unordered_set<T*> CreateSet(std::vector<T>& elements) {
if (content == #string) { \ std::unordered_set<T*> result;
*obj = name; \ result.reserve(elements.size());
return true; \ for (T& element : elements)
} result.insert(&element);
return result;
PARSE_AS(Command::Callees, "callees");
PARSE_AS(Command::Callers, "callers");
PARSE_AS(Command::FindAllUsages, "find-all-usages");
PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages");
PARSE_AS(Command::GotoReferenced, "goto-referenced");
PARSE_AS(Command::Hierarchy, "hierarchy");
PARSE_AS(Command::Outline, "outline");
PARSE_AS(Command::Search, "search");
return false;
#undef PARSE_AS
} }
// Compares |previous| and |current|, adding all elements that are
// in |previous| but not |current| to |removed|, and all elements
// that are in |current| but not |previous| to |added|.
//
// Returns true iff |removed| or |added| are non-empty.
template<typename T>
bool ComputeDifferenceForUpdate(
std::vector<T>& previous, std::vector<T>& current,
std::vector<T>* removed, std::vector<T>* added) {
// We need to sort to use std::set_difference.
std::sort(previous.begin(), previous.end());
std::sort(current.begin(), current.end());
// Returns the elements in |previous| that are not in |current|.
std::set_difference(
previous.begin(), previous.end(),
current.begin(), current.end(),
std::back_inserter(*removed));
// Returns the elmeents in |current| that are not in |previous|.
std::set_difference(
current.begin(), current.end(),
previous.begin(), previous.end(),
std::back_inserter(*added));
return !removed->empty() || !added->empty();
}
#if false
template<typename T>
void CompareGroups(
std::vector<T>& previous_data, std::vector<T>& current_data,
std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
// TODO: It could be faster to use set_intersection and set_difference to
// compute these values. We will have to presort the input by ID, though.
// Precompute sets so we stay around O(3N) instead of O(N^2). Otherwise
// lookups for duplicate elements will be O(N) and we need them to be O(1).
std::unordered_set<T*> previous_set = CreateSet(previous_data);
std::unordered_set<T*> current_set = CreateSet(current_data);
// TODO: TryFind is just comparing pointers which obviously fails because they point to different memory...
for (T* current_entry : current_set) {
// Possibly updated.
if (T* previous_entry = TryFind(previous_set, current_entry))
on_found(previous_entry, current_entry);
// Added
else
on_added(current_entry);
}
for (T* previous_entry : previous_set) {
// Removed
if (!TryFind(current_set, previous_entry))
on_removed(previous_entry);
}
}
#endif
template<typename T>
void CompareGroups(
std::vector<T>& previous_data, std::vector<T>& current_data,
std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
// TODO: It could be faster to use set_intersection and set_difference to
// compute these values. We will have to presort the input by ID, though.
std::sort(previous_data.begin(), previous_data.end());
std::sort(current_data.begin(), current_data.end());
/*
std::set_difference(
current_data.begin(), current_data.end(),
previous_data.begin(), previous_data.end(),
boost::make_function_output_iterator([](const T& val) {
}));
*/
auto prev_it = previous_data.begin();
auto curr_it = current_data.begin();
while (prev_it != previous_data.end() && curr_it != current_data.end()) {
// same id
if (prev_it->def.id == curr_it->def.id) {
on_found(&*prev_it, &*curr_it);
++prev_it;
++curr_it;
}
// prev_id is smaller - prev_it has data curr_it does not have.
else if (prev_it->def.id < curr_it->def.id) {
on_removed(&*prev_it);
++prev_it;
}
// prev_id is bigger - curr_it has data prev_it does not have.
else {
on_added(&*curr_it);
++curr_it;
}
}
// if prev_it still has data, that means it is not in curr_it and was removed.
while (prev_it != previous_data.end()) {
on_removed(&*prev_it);
++prev_it;
}
// if curr_it still has data, that means it is not in prev_it and was added.
while (curr_it != current_data.end()) {
on_added(&*curr_it);
++curr_it;
}
}
// TODO: make this const correct.
IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& current) {
#define JOIN(a, b) a##b
// |query_name| is the name of the variable on the query type.
// |index_name| is the name of the variable on the index type.
// |type| is the type of the variable.
#define PROCESS_UPDATE_DIFF(query_name, index_name, type) \
{ \
/* Check for changes. */ \
std::vector<type> removed, added; \
bool did_add = ComputeDifferenceForUpdate(JOIN(previous->, index_name), JOIN(current->, index_name), &removed, &added); \
if (did_add) {\
std::cout << "Adding mergeable update on " << current->def.short_name << " (" << current->def.usr << ") for field " << #index_name << std::endl; \
JOIN(update., query_name).push_back(MakeMergeableUpdate(id_map, current->def.id, removed, added)); \
} \
}
assert(previous.usr_to_id == current.usr_to_id);
assert(previous.file_db == current.file_db);
IndexUpdate update(id_map);
// Types
CompareGroups<IndexedTypeDef>(previous.types, current.types,
/*onRemoved:*/[&update, &id_map](IndexedTypeDef* def) {
update.types_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedTypeDef* def) {
update.types_added.push_back(QueryableTypeDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedTypeDef* previous, IndexedTypeDef* current) {
if (previous->def != current->def)
update.types_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(types_derived, derived, TypeId);
PROCESS_UPDATE_DIFF(types_uses, uses, Location);
});
// Functions
CompareGroups<IndexedFuncDef>(previous.funcs, current.funcs,
/*onRemoved:*/[&update, &id_map](IndexedFuncDef* def) {
update.funcs_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedFuncDef* def) {
update.funcs_added.push_back(QueryableFuncDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedFuncDef* previous, IndexedFuncDef* current) {
if (previous->def != current->def)
update.funcs_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(funcs_declarations, declarations, Location);
PROCESS_UPDATE_DIFF(funcs_derived, derived, FuncId);
PROCESS_UPDATE_DIFF(funcs_callers, callers, FuncRef);
PROCESS_UPDATE_DIFF(funcs_uses, uses, Location);
});
// Variables
CompareGroups<IndexedVarDef>(previous.vars, current.vars,
/*onRemoved:*/[&update, &id_map](IndexedVarDef* def) {
update.vars_removed.push_back(id_map->Remap(def->def.id));
},
/*onAdded:*/[&update, &id_map](IndexedVarDef* def) {
update.vars_added.push_back(QueryableVarDef(*id_map, *def));
},
/*onChanged:*/[&update, &id_map](IndexedVarDef* previous, IndexedVarDef* current) {
if (previous->def != current->def)
update.vars_def_changed.push_back(id_map->Remap(current->def));
PROCESS_UPDATE_DIFF(vars_uses, uses, Location);
});
return update;
#undef PROCESS_UPDATE_DIFF
#undef JOIN
}
// Merge the contents of |source| into |destination|.
void Merge(const IndexUpdate& source, IndexUpdate* destination) {
// TODO.
}
// Insert the contents of |update| into |db|.
void ApplyIndexUpdate(const IndexUpdate& update, QueryableDatabase* db) {
}
int ma333in(int argc, char** argv) {
// TODO: Unify UserToIdResolver and FileDb
UsrToIdResolver usr_to_id(1);
FileDb file_db(1);
IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {});
std::cout << indexed_file_a.ToString() << std::endl;
std::cout << std::endl;
IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {});
std::cout << indexed_file_b.ToString() << std::endl;
// TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate.
IdMap dest_ids(2);
IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b);
return 0;
}
// TODO: Idea: when indexing and joining to the main db, allow many dbs that
// are joined to. So that way even if the main db is busy we can
// still be joining. Joining the partially joined db to the main
// db should be faster since we will have larger data lanes to use.
// TODO: I think we can run libclang multiple times in one process. So we might // TODO: I think we can run libclang multiple times in one process. So we might
// only need two processes. Still, for perf reasons it would be good if // only need two processes. Still, for perf reasons it would be good if
// we could stay in one process. // we could stay in one process. We could probably just use shared
// memory. May want to run libclang in separate process to protect from
// crashes/issues there.
// TODO: allow user to store configuration as json? file in home dir; also // TODO: allow user to store configuration as json? file in home dir; also
// allow local overrides (scan up dirs) // allow local overrides (scan up dirs)
// TODO: add opt to dump config when starting (--dump-config) // TODO: add opt to dump config when starting (--dump-config)
// TODO: allow user to decide some indexer choices, ie, do we define // TODO: allow user to decide some indexer choices, ie, do we mark prototype parameters as usages?
// TODO: may want to run indexer in separate process to avoid indexer/compiler crashes?
std::unordered_map<std::string, std::string> ParseOptions(int argc, char** argv) {
std::unordered_map<std::string, std::string> output;
std::string previous_arg;
for (int i = 1; i < argc; ++i) {
std::string arg = argv[i];
if (arg[0] != '-') {
if (previous_arg.size() == 0) {
std::cerr << "Invalid arguments; switches must start with -" << std::endl;
exit(1);
}
output[previous_arg] = arg;
previous_arg = "";
}
else {
output[arg] = "";
previous_arg = arg;
}
}
return output;
}
bool HasOption(const std::unordered_map<std::string, std::string>& options, const std::string& option) {
return options.find(option) != options.end();
}
int main2(int argc, char** argv) {
std::unordered_map<std::string, std::string> options = ParseOptions(argc, argv);
if (argc == 1 || options.find("--help") != options.end()) {
std::cout << R"help(clang-indexer help:
General:
--help Print this help information.
--help-commands
Print all available query commands.
--project Path to compile_commands.json. Needed for the server, and
optionally by clients if there are multiple servers running.
--print-config
Emit all configuration data this executable is using.
Server:
--server If present, this binary will run in server mode. The binary
will not return until killed or an exit is requested. The
server computes and caches an index of the entire program
which is then queried by short-lived client processes. A
client is created by running this binary with a --command
flag.
--cache-dir Directory to cache the index and other useful information. If
a previous cache is present, the database will try to reuse
it. If this flag is not present, the database will be
in-memory only.
--threads Number of threads to use for indexing and querying tasks.
This value is optional; a good estimate is computed by
default.
Client:
--command Execute a query command against the index. See
--command-help for a listing of valid commands and a
description of what they do. Presence of this flag indicates
that the indexer is in client mode; this flag is mutually
exclusive with --server.
--location Location of the query. Some commands require only a file,
other require a line and column as well. Format is
filename[:line:column]. For example, "foobar.cc" and
"foobar.cc:1:10" are valid inputs.
--preferred-symbol-location
When looking up symbols, try to return either the
'declaration' or the 'definition'. Defaults to 'definition'.
)help";
exit(0);
}
if (HasOption(options, "--help-commands")) {
std::cout << R"(Available commands:
callees:
callers:
Emit all functions (with location) that this function calls ("callees") or
that call this function ("callers"). Requires a location.
find-all-usages:
Emit every usage of the given symbol. This is intended to support a rename
refactoring. This output contains many uninteresting usages of symbols;
prefer find-interesting-usges. Requires a location.
find-interesting-usages:
Emit only usages of the given symbol which are semantically interesting.
Requires a location.
goto-referenced:
Find an associated reference (either definition or declaration) for the
given symbol. Requires a location.
hierarchy:
List the type hierarchy (ie, inherited and derived members) for the given
method or type. Requires a location.
outline:
Emit a file outline, listing all of the symbols in the file.
search:
Search for a symbol by name.
)";
exit(0);
}
if (HasOption(options, "--project")) {
std::vector<CompilationEntry> entries = LoadCompilationEntriesFromDirectory(options["--project"]);
std::vector<IndexedFile> dbs;
for (const CompilationEntry& entry : entries) {
std::cout << "Parsing " << entry.filename << std::endl;
IndexedFile db = Parse(entry.filename, entry.args);
dbs.emplace_back(db);
std::cout << db.ToString() << std::endl << std::endl;
}
std::cin.get();
exit(0);
}
if (HasOption(options, "--command")) {
Command command;
if (!ParseCommand(options["--command"], &command))
Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands");
}
std::cout << "Invalid arguments. Try --help.";
exit(1);
return 0;
}

View File

@ -2,110 +2,6 @@
#include "indexer.h" #include "indexer.h"
#if false
template<typename T>
void Emit(Reader& a, const char* key, T& v) {
static_assert(false); // Must be specialized.
}
template<typename T>
void Emit(Writer& a, const char* key, T& v) {
static_assert(false); // Must be specialized.
}
template<>
void Emit(Reader& r, const char* key, int& v) {
v = r[key].GetInt();
}
template<>
void Emit(Writer& w, const char* key, int &v) {
w.Key(key);
w.Int(v);
}
void StartObject(Reader& r) {}
void StartObject(Writer& w) {
w.StartObject();
}
void EndObject(Reader& r) {}
void EndObject(Writer& w) {
w.EndObject();
}
void StartArray(Reader& r) {}
void StartArray(Writer& w) {
w.StartArray();
}
void EndArray(Reader& r) {}
void EndArray(Writer& w) {
w.EndArray();
}
struct Object {
//Location l;
int a = 0, b = 0, c = 0;
};
/*
void EmitKey(Reader& r, const char* key) {
w.Key(key);
}
void EmitKey(Writer& w, const char* key) {
w = w[key];
}
*/
template<typename S>
void Serialize(S& stream, Object& obj) {
StartObject(stream);
Emit(stream, "a", obj.a);
Emit(stream, "b", obj.b);
Emit(stream, "b", obj.c);
EndObject(stream);
}
/*
template <typename C, typename T>
C& operator&(C& stream, T& t) {
t.serialize(stream);
}
*/
int main(int argc, char** argv) {
rapidjson::StringBuffer output;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(output);
writer.SetFormatOptions(
rapidjson::PrettyFormatOptions::kFormatSingleLineArray);
writer.SetIndent(' ', 2);
Object foo;
foo.a = 10;
Serialize(writer, foo);
std::cout << output.GetString() << std::endl;
std::cout << "----" << std::endl;
rapidjson::Document doc;
//doc = doc["foo"];
doc.Parse(output.GetString());
Object foo2;
Serialize(doc, foo2);
std::cin.get();
//Reader r;
//foo.Serialize(r);
return 0;
}
#endif
@ -134,26 +30,26 @@ void Serialize(Writer& writer, const char* key, const std::vector<Location>& loc
} }
template<typename T> template<typename T>
void Serialize(Writer& writer, const char* key, LocalId<T> id) { void Serialize(Writer& writer, const char* key, Id<T> id) {
if (key) writer.Key(key); if (key) writer.Key(key);
writer.Uint64(id.local_id); writer.Uint64(id.id);
} }
template<typename T> template<typename T>
void Serialize(Writer& writer, const char* key, optional<LocalId<T>> id) { void Serialize(Writer& writer, const char* key, optional<Id<T>> id) {
if (id) { if (id) {
Serialize(writer, key, id.value()); Serialize(writer, key, id.value());
} }
} }
template<typename T> template<typename T>
void Serialize(Writer& writer, const char* key, const std::vector<LocalId<T>>& ids) { void Serialize(Writer& writer, const char* key, const std::vector<Id<T>>& ids) {
if (ids.size() == 0) if (ids.size() == 0)
return; return;
if (key) writer.Key(key); if (key) writer.Key(key);
writer.StartArray(); writer.StartArray();
for (LocalId<T> id : ids) for (Id<T> id : ids)
Serialize(writer, nullptr, id); Serialize(writer, nullptr, id);
writer.EndArray(); writer.EndArray();
} }
@ -161,7 +57,7 @@ void Serialize(Writer& writer, const char* key, const std::vector<LocalId<T>>& i
template<typename T> template<typename T>
void Serialize(Writer& writer, const char* key, Ref<T> ref) { void Serialize(Writer& writer, const char* key, Ref<T> ref) {
if (key) writer.Key(key); if (key) writer.Key(key);
std::string s = std::to_string(ref.id.local_id) + "@" + ref.loc.ToString(); std::string s = std::to_string(ref.id.id) + "@" + ref.loc.ToString();
writer.String(s.c_str()); writer.String(s.c_str());
} }
@ -191,8 +87,8 @@ void Serialize(Writer& writer, const char* key, uint64_t value) {
} }
void Serialize(Writer& writer, IndexedFile* file) { void Serialize(Writer& writer, IndexedFile* file) {
auto it = file->usr_to_type_id.find(""); auto it = file->usr_to_id->usr_to_type_id.find("");
if (it != file->usr_to_type_id.end()) { if (it != file->usr_to_id->usr_to_type_id.end()) {
file->Resolve(it->second)->def.short_name = "<fundamental>"; file->Resolve(it->second)->def.short_name = "<fundamental>";
assert(file->Resolve(it->second)->uses.size() == 0); assert(file->Resolve(it->second)->uses.size() == 0);
} }