Optimize import pipeline.

Previous implementation was slower at reindexing if loading from cache because primary cc files would be reindexed for every dependency that changed.
This commit is contained in:
Jacob Dufault 2017-04-23 18:01:51 -07:00
parent 1b2f5896dc
commit 91b5614c7e
8 changed files with 149 additions and 91 deletions

View File

@ -744,7 +744,6 @@ struct Index_DoIndex {
// of the dependencies. The main cc will then be parsed, which will include // of the dependencies. The main cc will then be parsed, which will include
// updates to all dependencies. // updates to all dependencies.
ImportOnly,
ImportThenParse, ImportThenParse,
Parse, Parse,
Freshen, Freshen,
@ -762,10 +761,13 @@ struct Index_DoIdMap {
std::unique_ptr<IndexedFile> previous; std::unique_ptr<IndexedFile> previous;
std::unique_ptr<IndexedFile> current; std::unique_ptr<IndexedFile> current;
explicit Index_DoIdMap(std::unique_ptr<IndexedFile> current)
: current(std::move(current)) {}
explicit Index_DoIdMap(std::unique_ptr<IndexedFile> previous, explicit Index_DoIdMap(std::unique_ptr<IndexedFile> previous,
std::unique_ptr<IndexedFile> current) std::unique_ptr<IndexedFile> current)
: previous(std::move(previous)), : previous(std::move(previous)),
current(std::move(current)) {} current(std::move(current)) {}
}; };
struct Index_OnIdMapped { struct Index_OnIdMapped {
@ -913,85 +915,83 @@ void RegisterMessageTypes() {
void DispatchDependencyImports(Index_DoIndexQueue* queue_do_index, bool ImportCachedIndex(IndexerConfig* config,
Index_DoIndex::Type request_type, FileConsumer::SharedState* file_consumer_shared,
const std::vector<std::string>& dependencies) {
// Import all dependencies.
for (auto& dependency_path : dependencies) {
std::cerr << "- Dispatching dependency import " << dependency_path << std::endl;
queue_do_index->PriorityEnqueue(Index_DoIndex(request_type, dependency_path, nullopt));
}
}
void ImportCachedIndex(IndexerConfig* config,
Index_DoIndexQueue* queue_do_index,
Index_DoIdMapQueue* queue_do_id_map, Index_DoIdMapQueue* queue_do_id_map,
const std::string path, const std::string& tu_path) {
int64_t* last_modification_time) { // TODO: only load cache if command line arguments are the same.
*last_modification_time = 0;
Timer time; Timer time;
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, path); std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, tu_path);
time.ResetAndPrint("Reading cached index from disk " + path); time.ResetAndPrint("Reading cached index from disk " + tu_path);
if (!cache) if (!cache)
return; return true;
DispatchDependencyImports(queue_do_index, Index_DoIndex::Type::ImportOnly, cache->dependencies); bool needs_reparse = false;
*last_modification_time = cache->last_modification_time; // Import all dependencies.
Index_DoIdMap response(nullptr, std::move(cache)); for (auto& dependency_path : cache->dependencies) {
queue_do_id_map->Enqueue(std::move(response)); std::cerr << "- Got dependency " << dependency_path << std::endl;
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, dependency_path);
if (GetLastModificationTime(cache->path) == cache->last_modification_time)
file_consumer_shared->Mark(cache->path);
else
needs_reparse = true;
queue_do_id_map->Enqueue(Index_DoIdMap(std::move(cache)));
}
// Import primary file.
if (GetLastModificationTime(tu_path) == cache->last_modification_time)
file_consumer_shared->Mark(tu_path);
else
needs_reparse = true;
queue_do_id_map->Enqueue(Index_DoIdMap(std::move(cache)));
return needs_reparse;
} }
void ParseFile(IndexerConfig* config, void ParseFile(IndexerConfig* config,
FileConsumer::SharedState* file_consumer_shared, FileConsumer::SharedState* file_consumer_shared,
Index_DoIdMapQueue* queue_do_id_map, Index_DoIdMapQueue* queue_do_id_map,
const std::string& path, const std::string& tu_or_dep_path,
const optional<std::vector<std::string>>& args, const optional<std::vector<std::string>>& args) {
std::vector<std::string>* opt_out_dependencies) {
Timer time; Timer time;
// Parse request and send a response. std::unique_ptr<IndexedFile> cache_for_args = LoadCachedIndex(config, tu_or_dep_path);
std::unique_ptr<IndexedFile> cached_path_index = LoadCachedIndex(config, path);
if (cached_path_index) {
// Give the user dependencies if requested.
if (opt_out_dependencies)
*opt_out_dependencies = cached_path_index->dependencies;
// Skip index if file modification time didn't change.
int64_t modification_time = GetLastModificationTime(path);
if (modification_time == cached_path_index->last_modification_time) {
time.ResetAndPrint("Skipping index update on " + path + " since file modification time has not changed");
return;
}
else {
time.ResetAndPrint("Modification time on " + path + " has changed from " + std::to_string(cached_path_index->last_modification_time) + " to " + std::to_string(modification_time));
}
}
std::string tu_path = cache_for_args ? cache_for_args->import_file : tu_or_dep_path;
// TODO: Replace checking cache for arguments by guessing arguments on via directory structure. That will also work better for new files.
const std::vector<std::string>& tu_args = args ? *args : cache_for_args ? cache_for_args->args : kEmptyArgs;
std::vector<std::unique_ptr<IndexedFile>> indexes = Parse( std::vector<std::unique_ptr<IndexedFile>> indexes = Parse(
config, file_consumer_shared, config, file_consumer_shared,
path, cached_path_index ? cached_path_index->import_file : path, tu_path, tu_args);
args ? *args : cached_path_index ? cached_path_index->args : kEmptyArgs); time.ResetAndPrint("Parsing/indexing " + tu_path + " with args " + StringJoin(tu_args));
time.ResetAndPrint("Parsing/indexing " + path);
for (std::unique_ptr<IndexedFile>& new_index : indexes) { for (std::unique_ptr<IndexedFile>& new_index : indexes) {
std::cerr << "Got index for " << new_index->path << std::endl; std::cerr << "Got index for " << new_index->path << std::endl;
// Load the cached index. // Load the cached index.
std::unique_ptr<IndexedFile> cached_index; std::unique_ptr<IndexedFile> cached_index;
if (new_index->path == path) if (cache_for_args && new_index->path == cache_for_args->path)
cached_index = std::move(cached_path_index); cached_index = std::move(cache_for_args);
else else
cached_index = LoadCachedIndex(config, new_index->path); cached_index = LoadCachedIndex(config, new_index->path);
// TODO: Enable this assert when we are no longer forcibly indexing the primary file.
//assert(!cached_index || GetLastModificationTime(new_index->path) != cached_index->last_modification_time);
time.ResetAndPrint("Loading cached index"); time.ResetAndPrint("Loading cached index");
// Update dependencies on |new_index|, since they won't get reparsed if we // Any any existing dependencies to |new_index| that were there before,
// have parsed them once before. // because we will not reparse them if they haven't changed.
if (cached_index) // TODO: indexer should always include dependencies. This doesn't let us remove old dependencies.
AddRange(&new_index->dependencies, cached_index->dependencies); if (cached_index) {
for (auto& dep : cached_index->dependencies) {
if (std::find(new_index->dependencies.begin(), new_index->dependencies.end(), dep) == new_index->dependencies.end())
new_index->dependencies.push_back(dep);
}
}
// Cache the newly indexed file. This replaces the existing cache. // Cache the newly indexed file. This replaces the existing cache.
// TODO: Run this as another import pipeline stage. // TODO: Run this as another import pipeline stage.
@ -1005,6 +1005,40 @@ void ParseFile(IndexerConfig* config,
} }
bool ResetStaleFiles(IndexerConfig* config,
FileConsumer::SharedState* file_consumer_shared,
const std::string& tu_path) {
Timer time;
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, tu_path);
time.ResetAndPrint("Reading cached index from disk " + tu_path);
if (!cache) {
std::cerr << "[indexer] Unable to load existing index from file when freshening (dependences will not be freshened)" << std::endl;
file_consumer_shared->Mark(tu_path);
return true;
}
bool needs_reparse = false;
// Check dependencies
for (auto& dependency_path : cache->dependencies) {
std::cerr << "- Got dependency " << dependency_path << std::endl;
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, dependency_path);
if (GetLastModificationTime(cache->path) != cache->last_modification_time) {
needs_reparse = true;
file_consumer_shared->Reset(cache->path);
}
}
// Check primary file
if (GetLastModificationTime(tu_path) != cache->last_modification_time) {
needs_reparse = true;
file_consumer_shared->Mark(tu_path);
}
return needs_reparse;
}
bool IndexMain_DoIndex(IndexerConfig* config, bool IndexMain_DoIndex(IndexerConfig* config,
FileConsumer::SharedState* file_consumer_shared, FileConsumer::SharedState* file_consumer_shared,
Project* project, Project* project,
@ -1017,18 +1051,14 @@ bool IndexMain_DoIndex(IndexerConfig* config,
Timer time; Timer time;
switch (index_request->type) { switch (index_request->type) {
case Index_DoIndex::Type::ImportOnly: {
int64_t cache_modification_time;
ImportCachedIndex(config, queue_do_index, queue_do_id_map, index_request->path, &cache_modification_time);
break;
}
case Index_DoIndex::Type::ImportThenParse: { case Index_DoIndex::Type::ImportThenParse: {
int64_t cache_modification_time; // This assumes index_request->path is a cc or translation unit file (ie,
ImportCachedIndex(config, queue_do_index, queue_do_id_map, index_request->path, &cache_modification_time); // it is in compile_commands.json).
bool needs_reparse = ImportCachedIndex(config, file_consumer_shared, queue_do_id_map, index_request->path);
// If the file has been updated, we need to reparse it. // If the file has been updated, we need to reparse it.
if (GetLastModificationTime(index_request->path) > cache_modification_time) { if (needs_reparse) {
// Instead of parsing the file immediately, we push the request to the // Instead of parsing the file immediately, we push the request to the
// back of the queue so we will finish all of the Import requests // back of the queue so we will finish all of the Import requests
// before starting to run actual index jobs. This gives the user a // before starting to run actual index jobs. This gives the user a
@ -1040,14 +1070,19 @@ bool IndexMain_DoIndex(IndexerConfig* config,
} }
case Index_DoIndex::Type::Parse: { case Index_DoIndex::Type::Parse: {
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args, nullptr); // index_request->path can be a cc/tu or a dependency path.
file_consumer_shared->Reset(index_request->path);
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args);
break; break;
} }
case Index_DoIndex::Type::Freshen: { case Index_DoIndex::Type::Freshen: {
std::vector<std::string> dependencies; // This assumes index_request->path is a cc or translation unit file (ie,
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args, &dependencies); // it is in compile_commands.json).
DispatchDependencyImports(queue_do_index, Index_DoIndex::Type::Freshen, dependencies);
bool needs_reparse = ResetStaleFiles(config, file_consumer_shared, index_request->path);
if (needs_reparse)
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args);
break; break;
} }
} }

View File

@ -18,6 +18,18 @@ bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) {
return a.data[0] == b.data[0] && a.data[1] == b.data[1] && a.data[2] == b.data[2]; return a.data[0] == b.data[0] && a.data[1] == b.data[1] && a.data[2] == b.data[2];
} }
bool FileConsumer::SharedState::Mark(const std::string& file) {
std::lock_guard<std::mutex> lock(mutex);
return files.insert(file).second;
}
void FileConsumer::SharedState::Reset(const std::string& file) {
std::lock_guard<std::mutex> lock(mutex);
auto it = files.find(file);
if (it != files.end())
files.erase(it);
}
FileConsumer::FileConsumer(SharedState* shared_state) : shared_(shared_state) {} FileConsumer::FileConsumer(SharedState* shared_state) : shared_(shared_state) {}
IndexedFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) { IndexedFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) {
@ -39,11 +51,7 @@ IndexedFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership)
std::string file_name = FileName(file); std::string file_name = FileName(file);
// No result in local; we need to query global. // No result in local; we need to query global.
bool did_insert = false; bool did_insert = shared_->Mark(file_name);
{
std::lock_guard<std::mutex> lock(shared_->mutex);
did_insert = shared_->files.insert(file_name).second;
}
*is_first_ownership = did_insert; *is_first_ownership = did_insert;
local_[file_id] = did_insert ? MakeUnique<IndexedFile>(file_name) : nullptr; local_[file_id] = did_insert ? MakeUnique<IndexedFile>(file_name) : nullptr;
return local_[file_id].get(); return local_[file_id].get();

View File

@ -26,6 +26,11 @@ struct FileConsumer {
struct SharedState { struct SharedState {
mutable std::unordered_set<std::string> files; mutable std::unordered_set<std::string> files;
mutable std::mutex mutex; mutable std::mutex mutex;
// Mark the file as used. Returns true if the file was not previously used.
bool Mark(const std::string& file);
// Reset the used state (ie, mark the file as unused).
void Reset(const std::string& file);
}; };
FileConsumer(SharedState* shared_state); FileConsumer(SharedState* shared_state);

View File

@ -1341,20 +1341,19 @@ void indexEntityReference(CXClientData client_data,
std::vector<std::unique_ptr<IndexedFile>> Parse( std::vector<std::unique_ptr<IndexedFile>> Parse(
IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared, IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared,
std::string desired_index_file, std::string import_file, std::string file,
std::vector<std::string> args, std::vector<std::string> args,
bool dump_ast) { bool dump_ast) {
if (!config->enableIndexing) if (!config->enableIndexing)
return {}; return {};
desired_index_file = NormalizePath(desired_index_file); file = NormalizePath(file);
import_file = NormalizePath(import_file);
clang::Index index(0 /*excludeDeclarationsFromPCH*/, clang::Index index(0 /*excludeDeclarationsFromPCH*/,
0 /*displayDiagnostics*/); 0 /*displayDiagnostics*/);
std::vector<CXUnsavedFile> unsaved_files; std::vector<CXUnsavedFile> unsaved_files;
clang::TranslationUnit tu(config, index, import_file, args, unsaved_files, CXTranslationUnit_KeepGoing); clang::TranslationUnit tu(config, index, file, args, unsaved_files, CXTranslationUnit_KeepGoing);
if (dump_ast) if (dump_ast)
Dump(tu.document_cursor()); Dump(tu.document_cursor());
@ -1369,21 +1368,18 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
FileConsumer file_consumer(file_consumer_shared); FileConsumer file_consumer(file_consumer_shared);
IndexParam param(&file_consumer); IndexParam param(&file_consumer);
CXFile file = clang_getFile(tu.cx_tu, desired_index_file.c_str()); // TODO: There is no real reason why we need |ForceLocal|. Remove it when we
param.primary_file = file_consumer.ForceLocal(file); // have argument guessing.
if (desired_index_file != import_file) CXFile cx_file = clang_getFile(tu.cx_tu, file.c_str());
param.primary_file = nullptr; param.primary_file = file_consumer.ForceLocal(cx_file);
if (desired_index_file != import_file) std::cerr << "!! [START] Indexing " << file << std::endl;
std::cerr << "!! [START] Indexing desired_index_file=" << desired_index_file << ", import_file=" << import_file << std::endl;
else
std::cerr << "!! [START] Indexing " << desired_index_file << std::endl;
CXIndexAction index_action = clang_IndexAction_create(index.cx_index); CXIndexAction index_action = clang_IndexAction_create(index.cx_index);
clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks), clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks),
CXIndexOpt_IndexFunctionLocalSymbols | CXIndexOpt_SkipParsedBodiesInSession | CXIndexOpt_IndexImplicitTemplateInstantiations, CXIndexOpt_IndexFunctionLocalSymbols | CXIndexOpt_SkipParsedBodiesInSession | CXIndexOpt_IndexImplicitTemplateInstantiations,
tu.cx_tu); tu.cx_tu);
clang_IndexAction_dispose(index_action); clang_IndexAction_dispose(index_action);
std::cerr << "!! [END] Indexing " << desired_index_file << std::endl; std::cerr << "!! [END] Indexing " << file << std::endl;
auto result = param.file_consumer->TakeLocalState(); auto result = param.file_consumer->TakeLocalState();
for (auto& entry : result) { for (auto& entry : result) {
@ -1395,7 +1391,7 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
entry->id_cache.primary_file = entry->path; entry->id_cache.primary_file = entry->path;
entry->last_modification_time = GetLastModificationTime(entry->path); entry->last_modification_time = GetLastModificationTime(entry->path);
entry->import_file = import_file; entry->import_file = file;
entry->args = args; entry->args = args;
} }
@ -1403,21 +1399,21 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
for (auto& entry : result) { for (auto& entry : result) {
for (auto& type : entry->types) { for (auto& type : entry->types) {
if (!type.HasInterestingState()) { if (!type.HasInterestingState()) {
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl; std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
std::cerr << "!!!! USR " << type.def.usr << std::endl; std::cerr << "!!!! USR " << type.def.usr << std::endl;
assert(false); assert(false);
} }
} }
for (auto& func : entry->funcs) { for (auto& func : entry->funcs) {
if (!func.HasInterestingState()) { if (!func.HasInterestingState()) {
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl; std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
std::cerr << "!!!! USR " << func.def.usr << std::endl; std::cerr << "!!!! USR " << func.def.usr << std::endl;
assert(false); assert(false);
} }
} }
for (auto& var : entry->vars) { for (auto& var : entry->vars) {
if (!var.HasInterestingState()) { if (!var.HasInterestingState()) {
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl; std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
std::cerr << "!!!! USR " << var.def.usr << std::endl; std::cerr << "!!!! USR " << var.def.usr << std::endl;
assert(false); assert(false);
} }

View File

@ -503,7 +503,7 @@ struct IndexedFile {
// |dependencies| are the existing dependencies of |import_file| if this is a reparse. // |dependencies| are the existing dependencies of |import_file| if this is a reparse.
std::vector<std::unique_ptr<IndexedFile>> Parse( std::vector<std::unique_ptr<IndexedFile>> Parse(
IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared, IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared,
std::string desired_index_file, std::string import_file, std::string file,
std::vector<std::string> args, std::vector<std::string> args,
bool dump_ast = false); bool dump_ast = false);
void IndexInit(); void IndexInit();

View File

@ -139,7 +139,7 @@ void RunTests() {
std::cout << "[START] " << path << std::endl; std::cout << "[START] " << path << std::endl;
std::vector<std::unique_ptr<IndexedFile>> dbs = Parse( std::vector<std::unique_ptr<IndexedFile>> dbs = Parse(
&config, &file_consumer_shared, &config, &file_consumer_shared,
path, path, path,
{ {
"-xc++", "-xc++",
"-std=c++11", "-std=c++11",

View File

@ -66,6 +66,18 @@ std::string ReplaceAll(const std::string& source, const std::string& from, const
return result; return result;
} }
std::string StringJoin(const std::vector<std::string>& values) {
std::string result;
bool first = true;
for (auto& entry : values) {
if (!first)
result += ", ";
first = false;
result += entry;
}
return result;
}
static std::vector<std::string> GetFilesInFolderHelper(std::string folder, bool recursive, std::string output_prefix) { static std::vector<std::string> GetFilesInFolderHelper(std::string folder, bool recursive, std::string output_prefix) {
std::vector<std::string> result; std::vector<std::string> result;

View File

@ -18,6 +18,8 @@ bool StartsWith(const std::string& value, const std::string& start);
bool EndsWith(const std::string& value, const std::string& ending); bool EndsWith(const std::string& value, const std::string& ending);
std::string ReplaceAll(const std::string& source, const std::string& from, const std::string& to); std::string ReplaceAll(const std::string& source, const std::string& from, const std::string& to);
std::string StringJoin(const std::vector<std::string>& values);
// Finds all files in the given folder. This is recursive. // Finds all files in the given folder. This is recursive.
std::vector<std::string> GetFilesInFolder(std::string folder, bool recursive, bool add_folder_to_path); std::vector<std::string> GetFilesInFolder(std::string folder, bool recursive, bool add_folder_to_path);
optional<std::string> ReadContent(const std::string& filename); optional<std::string> ReadContent(const std::string& filename);