mirror of
https://github.com/MaskRay/ccls.git
synced 2025-02-20 23:49:28 +00:00
Optimize import pipeline.
Previous implementation was slower at reindexing if loading from cache because primary cc files would be reindexed for every dependency that changed.
This commit is contained in:
parent
1b2f5896dc
commit
91b5614c7e
@ -744,7 +744,6 @@ struct Index_DoIndex {
|
||||
// of the dependencies. The main cc will then be parsed, which will include
|
||||
// updates to all dependencies.
|
||||
|
||||
ImportOnly,
|
||||
ImportThenParse,
|
||||
Parse,
|
||||
Freshen,
|
||||
@ -762,10 +761,13 @@ struct Index_DoIdMap {
|
||||
std::unique_ptr<IndexedFile> previous;
|
||||
std::unique_ptr<IndexedFile> current;
|
||||
|
||||
explicit Index_DoIdMap(std::unique_ptr<IndexedFile> current)
|
||||
: current(std::move(current)) {}
|
||||
|
||||
explicit Index_DoIdMap(std::unique_ptr<IndexedFile> previous,
|
||||
std::unique_ptr<IndexedFile> current)
|
||||
std::unique_ptr<IndexedFile> current)
|
||||
: previous(std::move(previous)),
|
||||
current(std::move(current)) {}
|
||||
current(std::move(current)) {}
|
||||
};
|
||||
|
||||
struct Index_OnIdMapped {
|
||||
@ -913,85 +915,83 @@ void RegisterMessageTypes() {
|
||||
|
||||
|
||||
|
||||
void DispatchDependencyImports(Index_DoIndexQueue* queue_do_index,
|
||||
Index_DoIndex::Type request_type,
|
||||
const std::vector<std::string>& dependencies) {
|
||||
// Import all dependencies.
|
||||
for (auto& dependency_path : dependencies) {
|
||||
std::cerr << "- Dispatching dependency import " << dependency_path << std::endl;
|
||||
queue_do_index->PriorityEnqueue(Index_DoIndex(request_type, dependency_path, nullopt));
|
||||
}
|
||||
}
|
||||
|
||||
void ImportCachedIndex(IndexerConfig* config,
|
||||
Index_DoIndexQueue* queue_do_index,
|
||||
bool ImportCachedIndex(IndexerConfig* config,
|
||||
FileConsumer::SharedState* file_consumer_shared,
|
||||
Index_DoIdMapQueue* queue_do_id_map,
|
||||
const std::string path,
|
||||
int64_t* last_modification_time) {
|
||||
*last_modification_time = 0;
|
||||
const std::string& tu_path) {
|
||||
// TODO: only load cache if command line arguments are the same.
|
||||
|
||||
Timer time;
|
||||
|
||||
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, path);
|
||||
time.ResetAndPrint("Reading cached index from disk " + path);
|
||||
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, tu_path);
|
||||
time.ResetAndPrint("Reading cached index from disk " + tu_path);
|
||||
if (!cache)
|
||||
return;
|
||||
return true;
|
||||
|
||||
DispatchDependencyImports(queue_do_index, Index_DoIndex::Type::ImportOnly, cache->dependencies);
|
||||
bool needs_reparse = false;
|
||||
|
||||
*last_modification_time = cache->last_modification_time;
|
||||
Index_DoIdMap response(nullptr, std::move(cache));
|
||||
queue_do_id_map->Enqueue(std::move(response));
|
||||
// Import all dependencies.
|
||||
for (auto& dependency_path : cache->dependencies) {
|
||||
std::cerr << "- Got dependency " << dependency_path << std::endl;
|
||||
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, dependency_path);
|
||||
if (GetLastModificationTime(cache->path) == cache->last_modification_time)
|
||||
file_consumer_shared->Mark(cache->path);
|
||||
else
|
||||
needs_reparse = true;
|
||||
queue_do_id_map->Enqueue(Index_DoIdMap(std::move(cache)));
|
||||
}
|
||||
|
||||
// Import primary file.
|
||||
if (GetLastModificationTime(tu_path) == cache->last_modification_time)
|
||||
file_consumer_shared->Mark(tu_path);
|
||||
else
|
||||
needs_reparse = true;
|
||||
queue_do_id_map->Enqueue(Index_DoIdMap(std::move(cache)));
|
||||
|
||||
return needs_reparse;
|
||||
}
|
||||
|
||||
void ParseFile(IndexerConfig* config,
|
||||
FileConsumer::SharedState* file_consumer_shared,
|
||||
Index_DoIdMapQueue* queue_do_id_map,
|
||||
const std::string& path,
|
||||
const optional<std::vector<std::string>>& args,
|
||||
std::vector<std::string>* opt_out_dependencies) {
|
||||
const std::string& tu_or_dep_path,
|
||||
const optional<std::vector<std::string>>& args) {
|
||||
Timer time;
|
||||
|
||||
// Parse request and send a response.
|
||||
std::unique_ptr<IndexedFile> cached_path_index = LoadCachedIndex(config, path);
|
||||
std::unique_ptr<IndexedFile> cache_for_args = LoadCachedIndex(config, tu_or_dep_path);
|
||||
|
||||
if (cached_path_index) {
|
||||
// Give the user dependencies if requested.
|
||||
if (opt_out_dependencies)
|
||||
*opt_out_dependencies = cached_path_index->dependencies;
|
||||
|
||||
// Skip index if file modification time didn't change.
|
||||
int64_t modification_time = GetLastModificationTime(path);
|
||||
if (modification_time == cached_path_index->last_modification_time) {
|
||||
time.ResetAndPrint("Skipping index update on " + path + " since file modification time has not changed");
|
||||
return;
|
||||
}
|
||||
else {
|
||||
time.ResetAndPrint("Modification time on " + path + " has changed from " + std::to_string(cached_path_index->last_modification_time) + " to " + std::to_string(modification_time));
|
||||
}
|
||||
}
|
||||
|
||||
std::string tu_path = cache_for_args ? cache_for_args->import_file : tu_or_dep_path;
|
||||
// TODO: Replace checking cache for arguments by guessing arguments on via directory structure. That will also work better for new files.
|
||||
const std::vector<std::string>& tu_args = args ? *args : cache_for_args ? cache_for_args->args : kEmptyArgs;
|
||||
std::vector<std::unique_ptr<IndexedFile>> indexes = Parse(
|
||||
config, file_consumer_shared,
|
||||
path, cached_path_index ? cached_path_index->import_file : path,
|
||||
args ? *args : cached_path_index ? cached_path_index->args : kEmptyArgs);
|
||||
time.ResetAndPrint("Parsing/indexing " + path);
|
||||
tu_path, tu_args);
|
||||
time.ResetAndPrint("Parsing/indexing " + tu_path + " with args " + StringJoin(tu_args));
|
||||
|
||||
for (std::unique_ptr<IndexedFile>& new_index : indexes) {
|
||||
std::cerr << "Got index for " << new_index->path << std::endl;
|
||||
|
||||
// Load the cached index.
|
||||
std::unique_ptr<IndexedFile> cached_index;
|
||||
if (new_index->path == path)
|
||||
cached_index = std::move(cached_path_index);
|
||||
if (cache_for_args && new_index->path == cache_for_args->path)
|
||||
cached_index = std::move(cache_for_args);
|
||||
else
|
||||
cached_index = LoadCachedIndex(config, new_index->path);
|
||||
// TODO: Enable this assert when we are no longer forcibly indexing the primary file.
|
||||
//assert(!cached_index || GetLastModificationTime(new_index->path) != cached_index->last_modification_time);
|
||||
|
||||
time.ResetAndPrint("Loading cached index");
|
||||
|
||||
// Update dependencies on |new_index|, since they won't get reparsed if we
|
||||
// have parsed them once before.
|
||||
if (cached_index)
|
||||
AddRange(&new_index->dependencies, cached_index->dependencies);
|
||||
// Any any existing dependencies to |new_index| that were there before,
|
||||
// because we will not reparse them if they haven't changed.
|
||||
// TODO: indexer should always include dependencies. This doesn't let us remove old dependencies.
|
||||
if (cached_index) {
|
||||
for (auto& dep : cached_index->dependencies) {
|
||||
if (std::find(new_index->dependencies.begin(), new_index->dependencies.end(), dep) == new_index->dependencies.end())
|
||||
new_index->dependencies.push_back(dep);
|
||||
}
|
||||
}
|
||||
|
||||
// Cache the newly indexed file. This replaces the existing cache.
|
||||
// TODO: Run this as another import pipeline stage.
|
||||
@ -1005,6 +1005,40 @@ void ParseFile(IndexerConfig* config,
|
||||
|
||||
}
|
||||
|
||||
bool ResetStaleFiles(IndexerConfig* config,
|
||||
FileConsumer::SharedState* file_consumer_shared,
|
||||
const std::string& tu_path) {
|
||||
Timer time;
|
||||
|
||||
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, tu_path);
|
||||
time.ResetAndPrint("Reading cached index from disk " + tu_path);
|
||||
if (!cache) {
|
||||
std::cerr << "[indexer] Unable to load existing index from file when freshening (dependences will not be freshened)" << std::endl;
|
||||
file_consumer_shared->Mark(tu_path);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool needs_reparse = false;
|
||||
|
||||
// Check dependencies
|
||||
for (auto& dependency_path : cache->dependencies) {
|
||||
std::cerr << "- Got dependency " << dependency_path << std::endl;
|
||||
std::unique_ptr<IndexedFile> cache = LoadCachedIndex(config, dependency_path);
|
||||
if (GetLastModificationTime(cache->path) != cache->last_modification_time) {
|
||||
needs_reparse = true;
|
||||
file_consumer_shared->Reset(cache->path);
|
||||
}
|
||||
}
|
||||
|
||||
// Check primary file
|
||||
if (GetLastModificationTime(tu_path) != cache->last_modification_time) {
|
||||
needs_reparse = true;
|
||||
file_consumer_shared->Mark(tu_path);
|
||||
}
|
||||
|
||||
return needs_reparse;
|
||||
}
|
||||
|
||||
bool IndexMain_DoIndex(IndexerConfig* config,
|
||||
FileConsumer::SharedState* file_consumer_shared,
|
||||
Project* project,
|
||||
@ -1017,18 +1051,14 @@ bool IndexMain_DoIndex(IndexerConfig* config,
|
||||
Timer time;
|
||||
|
||||
switch (index_request->type) {
|
||||
case Index_DoIndex::Type::ImportOnly: {
|
||||
int64_t cache_modification_time;
|
||||
ImportCachedIndex(config, queue_do_index, queue_do_id_map, index_request->path, &cache_modification_time);
|
||||
break;
|
||||
}
|
||||
|
||||
case Index_DoIndex::Type::ImportThenParse: {
|
||||
int64_t cache_modification_time;
|
||||
ImportCachedIndex(config, queue_do_index, queue_do_id_map, index_request->path, &cache_modification_time);
|
||||
// This assumes index_request->path is a cc or translation unit file (ie,
|
||||
// it is in compile_commands.json).
|
||||
|
||||
bool needs_reparse = ImportCachedIndex(config, file_consumer_shared, queue_do_id_map, index_request->path);
|
||||
|
||||
// If the file has been updated, we need to reparse it.
|
||||
if (GetLastModificationTime(index_request->path) > cache_modification_time) {
|
||||
if (needs_reparse) {
|
||||
// Instead of parsing the file immediately, we push the request to the
|
||||
// back of the queue so we will finish all of the Import requests
|
||||
// before starting to run actual index jobs. This gives the user a
|
||||
@ -1040,14 +1070,19 @@ bool IndexMain_DoIndex(IndexerConfig* config,
|
||||
}
|
||||
|
||||
case Index_DoIndex::Type::Parse: {
|
||||
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args, nullptr);
|
||||
// index_request->path can be a cc/tu or a dependency path.
|
||||
file_consumer_shared->Reset(index_request->path);
|
||||
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args);
|
||||
break;
|
||||
}
|
||||
|
||||
case Index_DoIndex::Type::Freshen: {
|
||||
std::vector<std::string> dependencies;
|
||||
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args, &dependencies);
|
||||
DispatchDependencyImports(queue_do_index, Index_DoIndex::Type::Freshen, dependencies);
|
||||
// This assumes index_request->path is a cc or translation unit file (ie,
|
||||
// it is in compile_commands.json).
|
||||
|
||||
bool needs_reparse = ResetStaleFiles(config, file_consumer_shared, index_request->path);
|
||||
if (needs_reparse)
|
||||
ParseFile(config, file_consumer_shared, queue_do_id_map, index_request->path, index_request->args);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,18 @@ bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) {
|
||||
return a.data[0] == b.data[0] && a.data[1] == b.data[1] && a.data[2] == b.data[2];
|
||||
}
|
||||
|
||||
bool FileConsumer::SharedState::Mark(const std::string& file) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
return files.insert(file).second;
|
||||
}
|
||||
|
||||
void FileConsumer::SharedState::Reset(const std::string& file) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
auto it = files.find(file);
|
||||
if (it != files.end())
|
||||
files.erase(it);
|
||||
}
|
||||
|
||||
FileConsumer::FileConsumer(SharedState* shared_state) : shared_(shared_state) {}
|
||||
|
||||
IndexedFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) {
|
||||
@ -39,11 +51,7 @@ IndexedFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership)
|
||||
std::string file_name = FileName(file);
|
||||
|
||||
// No result in local; we need to query global.
|
||||
bool did_insert = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shared_->mutex);
|
||||
did_insert = shared_->files.insert(file_name).second;
|
||||
}
|
||||
bool did_insert = shared_->Mark(file_name);
|
||||
*is_first_ownership = did_insert;
|
||||
local_[file_id] = did_insert ? MakeUnique<IndexedFile>(file_name) : nullptr;
|
||||
return local_[file_id].get();
|
||||
|
@ -26,6 +26,11 @@ struct FileConsumer {
|
||||
struct SharedState {
|
||||
mutable std::unordered_set<std::string> files;
|
||||
mutable std::mutex mutex;
|
||||
|
||||
// Mark the file as used. Returns true if the file was not previously used.
|
||||
bool Mark(const std::string& file);
|
||||
// Reset the used state (ie, mark the file as unused).
|
||||
void Reset(const std::string& file);
|
||||
};
|
||||
|
||||
FileConsumer(SharedState* shared_state);
|
||||
|
@ -1341,20 +1341,19 @@ void indexEntityReference(CXClientData client_data,
|
||||
|
||||
std::vector<std::unique_ptr<IndexedFile>> Parse(
|
||||
IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared,
|
||||
std::string desired_index_file, std::string import_file,
|
||||
std::string file,
|
||||
std::vector<std::string> args,
|
||||
bool dump_ast) {
|
||||
|
||||
if (!config->enableIndexing)
|
||||
return {};
|
||||
|
||||
desired_index_file = NormalizePath(desired_index_file);
|
||||
import_file = NormalizePath(import_file);
|
||||
file = NormalizePath(file);
|
||||
|
||||
clang::Index index(0 /*excludeDeclarationsFromPCH*/,
|
||||
0 /*displayDiagnostics*/);
|
||||
std::vector<CXUnsavedFile> unsaved_files;
|
||||
clang::TranslationUnit tu(config, index, import_file, args, unsaved_files, CXTranslationUnit_KeepGoing);
|
||||
clang::TranslationUnit tu(config, index, file, args, unsaved_files, CXTranslationUnit_KeepGoing);
|
||||
|
||||
if (dump_ast)
|
||||
Dump(tu.document_cursor());
|
||||
@ -1369,21 +1368,18 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
|
||||
FileConsumer file_consumer(file_consumer_shared);
|
||||
IndexParam param(&file_consumer);
|
||||
|
||||
CXFile file = clang_getFile(tu.cx_tu, desired_index_file.c_str());
|
||||
param.primary_file = file_consumer.ForceLocal(file);
|
||||
if (desired_index_file != import_file)
|
||||
param.primary_file = nullptr;
|
||||
// TODO: There is no real reason why we need |ForceLocal|. Remove it when we
|
||||
// have argument guessing.
|
||||
CXFile cx_file = clang_getFile(tu.cx_tu, file.c_str());
|
||||
param.primary_file = file_consumer.ForceLocal(cx_file);
|
||||
|
||||
if (desired_index_file != import_file)
|
||||
std::cerr << "!! [START] Indexing desired_index_file=" << desired_index_file << ", import_file=" << import_file << std::endl;
|
||||
else
|
||||
std::cerr << "!! [START] Indexing " << desired_index_file << std::endl;
|
||||
std::cerr << "!! [START] Indexing " << file << std::endl;
|
||||
CXIndexAction index_action = clang_IndexAction_create(index.cx_index);
|
||||
clang_indexTranslationUnit(index_action, ¶m, callbacks, sizeof(callbacks),
|
||||
CXIndexOpt_IndexFunctionLocalSymbols | CXIndexOpt_SkipParsedBodiesInSession | CXIndexOpt_IndexImplicitTemplateInstantiations,
|
||||
tu.cx_tu);
|
||||
clang_IndexAction_dispose(index_action);
|
||||
std::cerr << "!! [END] Indexing " << desired_index_file << std::endl;
|
||||
std::cerr << "!! [END] Indexing " << file << std::endl;
|
||||
|
||||
auto result = param.file_consumer->TakeLocalState();
|
||||
for (auto& entry : result) {
|
||||
@ -1395,7 +1391,7 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
|
||||
entry->id_cache.primary_file = entry->path;
|
||||
|
||||
entry->last_modification_time = GetLastModificationTime(entry->path);
|
||||
entry->import_file = import_file;
|
||||
entry->import_file = file;
|
||||
entry->args = args;
|
||||
}
|
||||
|
||||
@ -1403,21 +1399,21 @@ std::vector<std::unique_ptr<IndexedFile>> Parse(
|
||||
for (auto& entry : result) {
|
||||
for (auto& type : entry->types) {
|
||||
if (!type.HasInterestingState()) {
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl;
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
|
||||
std::cerr << "!!!! USR " << type.def.usr << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
for (auto& func : entry->funcs) {
|
||||
if (!func.HasInterestingState()) {
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl;
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
|
||||
std::cerr << "!!!! USR " << func.def.usr << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
for (auto& var : entry->vars) {
|
||||
if (!var.HasInterestingState()) {
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << desired_index_file << std::endl;
|
||||
std::cerr << "!!!! NO INTERESTING STATE FOR " << entry->path << " of !!! " << file << std::endl;
|
||||
std::cerr << "!!!! USR " << var.def.usr << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
|
@ -503,7 +503,7 @@ struct IndexedFile {
|
||||
// |dependencies| are the existing dependencies of |import_file| if this is a reparse.
|
||||
std::vector<std::unique_ptr<IndexedFile>> Parse(
|
||||
IndexerConfig* config, FileConsumer::SharedState* file_consumer_shared,
|
||||
std::string desired_index_file, std::string import_file,
|
||||
std::string file,
|
||||
std::vector<std::string> args,
|
||||
bool dump_ast = false);
|
||||
void IndexInit();
|
||||
|
@ -139,7 +139,7 @@ void RunTests() {
|
||||
std::cout << "[START] " << path << std::endl;
|
||||
std::vector<std::unique_ptr<IndexedFile>> dbs = Parse(
|
||||
&config, &file_consumer_shared,
|
||||
path, path,
|
||||
path,
|
||||
{
|
||||
"-xc++",
|
||||
"-std=c++11",
|
||||
|
12
src/utils.cc
12
src/utils.cc
@ -66,6 +66,18 @@ std::string ReplaceAll(const std::string& source, const std::string& from, const
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string StringJoin(const std::vector<std::string>& values) {
|
||||
std::string result;
|
||||
bool first = true;
|
||||
for (auto& entry : values) {
|
||||
if (!first)
|
||||
result += ", ";
|
||||
first = false;
|
||||
result += entry;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<std::string> GetFilesInFolderHelper(std::string folder, bool recursive, std::string output_prefix) {
|
||||
std::vector<std::string> result;
|
||||
|
||||
|
@ -18,6 +18,8 @@ bool StartsWith(const std::string& value, const std::string& start);
|
||||
bool EndsWith(const std::string& value, const std::string& ending);
|
||||
std::string ReplaceAll(const std::string& source, const std::string& from, const std::string& to);
|
||||
|
||||
std::string StringJoin(const std::vector<std::string>& values);
|
||||
|
||||
// Finds all files in the given folder. This is recursive.
|
||||
std::vector<std::string> GetFilesInFolder(std::string folder, bool recursive, bool add_folder_to_path);
|
||||
optional<std::string> ReadContent(const std::string& filename);
|
||||
|
Loading…
Reference in New Issue
Block a user