From 786ac0bc4fbfe96c8c3a4e70569f804ad49325d2 Mon Sep 17 00:00:00 2001 From: Jacob Dufault Date: Wed, 10 Jan 2018 21:16:46 -0800 Subject: [PATCH] Merge FileContents and FileContentsWithOffsets. Also try to more aggressively load FileContents when indexing to increase reliability. --- src/cache_manager.cc | 4 +-- src/file_consumer.cc | 37 ++++++++++++++++---- src/file_consumer.h | 13 +++++-- src/file_contents.cc | 29 ++++++++++++++++ src/file_contents.h | 23 +++++++++++++ src/iindexer.cc | 6 ++-- src/indexer.cc | 58 ++++++-------------------------- src/indexer.h | 24 ++----------- src/messages/cquery_call_tree.cc | 8 ++--- src/query.cc | 22 ++++++------ src/serializer.cc | 4 +-- 11 files changed, 130 insertions(+), 98 deletions(-) create mode 100644 src/file_contents.cc create mode 100644 src/file_contents.h diff --git a/src/cache_manager.cc b/src/cache_manager.cc index 8eb2fa6e..41ac12a7 100644 --- a/src/cache_manager.cc +++ b/src/cache_manager.cc @@ -23,12 +23,12 @@ struct RealCacheManager : ICacheManager { std::string cache_path = GetCachePath(file.path); - if (file.file_contents_.empty()) { + if (!file.file_contents_.has_value()) { LOG_S(ERROR) << "No cached file contents; performing potentially stale " << "file-copy for " << file.path; CopyFileTo(cache_path, file.path); } else { - WriteToFile(cache_path, file.file_contents_); + WriteToFile(cache_path, *file.file_contents_); } std::string indexed_content = Serialize(config_->cacheFormat, file); diff --git a/src/file_consumer.cc b/src/file_consumer.cc index 9a05b12e..20f94e5a 100644 --- a/src/file_consumer.cc +++ b/src/file_consumer.cc @@ -7,6 +7,22 @@ #include +namespace { + +optional GetFileContents(const std::string& path, + FileContentsMap* file_contents) { + auto it = file_contents->find(path); + if (it == file_contents->end()) { + optional content = ReadContent(path); + if (content) + (*file_contents)[path] = FileContents(path, *content); + return content; + } + return it->second.content; +} + +} // namespace + bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) { return a.data[0] == b.data[0] && a.data[1] == b.data[1] && a.data[2] == b.data[2]; @@ -28,7 +44,9 @@ FileConsumer::FileConsumer(FileConsumerSharedState* shared_state, const std::string& parse_file) : shared_(shared_state), parse_file_(parse_file) {} -IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) { +IndexFile* FileConsumer::TryConsumeFile(CXFile file, + bool* is_first_ownership, + FileContentsMap* file_contents) { assert(is_first_ownership); CXFileUniqueID file_id; @@ -49,16 +67,20 @@ IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) { // No result in local; we need to query global. bool did_insert = shared_->Mark(file_name); *is_first_ownership = did_insert; - local_[file_id] = did_insert ? MakeUnique(file_name) : nullptr; + local_[file_id] = + did_insert ? MakeUnique( + file_name, GetFileContents(file_name, file_contents)) + : nullptr; return local_[file_id].get(); } -IndexFile* FileConsumer::ForceLocal(CXFile file) { +IndexFile* FileConsumer::ForceLocal(CXFile file, + FileContentsMap* file_contents) { // Try to fetch the file using the normal system, which will insert the file // usage into global storage. { bool is_first; - IndexFile* cache = TryConsumeFile(file, &is_first); + IndexFile* cache = TryConsumeFile(file, &is_first, file_contents); if (cache) return cache; } @@ -71,8 +93,11 @@ IndexFile* FileConsumer::ForceLocal(CXFile file) { } auto it = local_.find(file_id); - if (it == local_.end() || !it->second) - local_[file_id] = MakeUnique(FileName(file)); + if (it == local_.end() || !it->second) { + std::string file_name = FileName(file); + local_[file_id] = MakeUnique( + file_name, GetFileContents(file_name, file_contents)); + } assert(local_.find(file_id) != local_.end()); return local_[file_id].get(); } diff --git a/src/file_consumer.h b/src/file_consumer.h index 85cb7644..12e067a3 100644 --- a/src/file_consumer.h +++ b/src/file_consumer.h @@ -1,5 +1,6 @@ #pragma once +#include "file_contents.h" #include "utils.h" #include @@ -42,10 +43,18 @@ struct FileConsumer { // Returns IndexFile for the file or nullptr. |is_first_ownership| is set // to true iff the function just took ownership over the file. Otherwise it // is set to false. - IndexFile* TryConsumeFile(CXFile file, bool* is_first_ownership); + // + // note: file_contents is passed as a parameter instead of as a member + // variable since it is large and we do not want to copy it. + IndexFile* TryConsumeFile(CXFile file, + bool* is_first_ownership, + FileContentsMap* file_contents); // Forcibly create a local file, even if it has already been parsed. - IndexFile* ForceLocal(CXFile file); + // + // note: file_contents is passed as a parameter instead of as a member + // variable since it is large and we do not want to copy it. + IndexFile* ForceLocal(CXFile file, FileContentsMap* file_contents); // Returns and passes ownership of all local state. std::vector> TakeLocalState(); diff --git a/src/file_contents.cc b/src/file_contents.cc new file mode 100644 index 00000000..09ec8c75 --- /dev/null +++ b/src/file_contents.cc @@ -0,0 +1,29 @@ +#include "file_contents.h" + +FileContents::FileContents() : line_offsets_{0} {} + +FileContents::FileContents(const std::string& path, const std::string& content) + : path(path), content(content) { + line_offsets_.push_back(0); + for (size_t i = 0; i < content.size(); i++) { + if (content[i] == '\n') + line_offsets_.push_back(i + 1); + } +} + +optional FileContents::ToOffset(Position p) const { + if (0 < p.line && size_t(p.line) <= line_offsets_.size()) { + int ret = line_offsets_[p.line - 1] + p.column - 1; + if (size_t(ret) <= content.size()) + return ret; + } + return nullopt; +} + +optional FileContents::ContentsInRange(Range range) const { + optional start_offset = ToOffset(range.start), + end_offset = ToOffset(range.end); + if (start_offset && end_offset && *start_offset < *end_offset) + return content.substr(*start_offset, *end_offset - *start_offset); + return nullopt; +} \ No newline at end of file diff --git a/src/file_contents.h b/src/file_contents.h new file mode 100644 index 00000000..5394a198 --- /dev/null +++ b/src/file_contents.h @@ -0,0 +1,23 @@ +#pragma once + +#include "position.h" + +#include "optional.h" + +#include +#include + +struct FileContents { + FileContents(); + FileContents(const std::string& path, const std::string& content); + + optional ToOffset(Position p) const; + optional ContentsInRange(Range range) const; + + std::string path; + std::string content; + // {0, 1 + position of first newline, 1 + position of second newline, ...} + std::vector line_offsets_; +}; + +using FileContentsMap = std::unordered_map; diff --git a/src/iindexer.cc b/src/iindexer.cc index 1753f23b..9c700c03 100644 --- a/src/iindexer.cc +++ b/src/iindexer.cc @@ -36,10 +36,10 @@ struct TestIndexer : IIndexer { std::vector> indexes; if (entry.num_indexes > 0) - indexes.push_back(MakeUnique(entry.path)); + indexes.push_back(MakeUnique(entry.path, nullopt)); for (int i = 1; i < entry.num_indexes; ++i) { - indexes.push_back(MakeUnique(entry.path + "_extra_" + - std::to_string(i) + ".h")); + indexes.push_back(MakeUnique( + entry.path + "_extra_" + std::to_string(i) + ".h", nullopt)); } result->indexes.insert(std::make_pair(entry.path, std::move(indexes))); diff --git a/src/indexer.cc b/src/indexer.cc index 480ad5b1..b59d1fd5 100644 --- a/src/indexer.cc +++ b/src/indexer.cc @@ -213,7 +213,7 @@ struct ConstructorCache { struct IndexParam { std::unordered_set seen_cx_files; std::vector seen_files; - std::unordered_map file_contents; + FileContentsMap file_contents; std::unordered_map file_modification_times; // Only use this when strictly needed (ie, primary translation unit is @@ -236,8 +236,8 @@ struct IndexParam { IndexFile* ConsumeFile(IndexParam* param, CXFile file) { bool is_first_ownership = false; - IndexFile* db = - param->file_consumer->TryConsumeFile(file, &is_first_ownership); + IndexFile* db = param->file_consumer->TryConsumeFile( + file, &is_first_ownership, ¶m->file_contents); // If this is the first time we have seen the file (ignoring if we are // generating an index for it): @@ -262,7 +262,7 @@ IndexFile* ConsumeFile(IndexParam* param, CXFile file) { if (db && !param->file_contents.count(file_name)) { optional content = ReadContent(file_name); if (content) - param->file_contents.emplace(file_name, *content); + param->file_contents[file_name] = FileContents(file_name, *content); else LOG_S(ERROR) << "[indexer] Failed to read file content for " << file_name; @@ -474,7 +474,9 @@ void OnIndexReference_Function(IndexFile* db, // static int IndexFile::kCurrentVersion = 8; -IndexFile::IndexFile(const std::string& path) : id_cache(path), path(path) { +IndexFile::IndexFile(const std::string& path, + const optional& contents) + : id_cache(path), path(path), file_contents_(contents) { // TODO: Reconsider if we should still be reusing the same id_cache. // Preallocate any existing resolved ids. for (const auto& entry : id_cache.usr_to_type_id) @@ -1516,16 +1518,16 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { // ... } foo;` https://github.com/jacobdufault/cquery/issues/29 if (extent.end.line - extent.start.line < kMaxLinesDisplayTypeAliasDeclarations) { - FileContentsWithOffsets& fc = param->file_contents[db->path]; + FileContents& fc = param->file_contents[db->path]; optional extent_start = fc.ToOffset(extent.start), spell_start = fc.ToOffset(spell.start), spell_end = fc.ToOffset(spell.end), extent_end = fc.ToOffset(extent.end); if (extent_start && spell_start && spell_end && extent_end) { type->def.hover = - fc.contents.substr(*extent_start, *spell_start - *extent_start) + + fc.content.substr(*extent_start, *spell_start - *extent_start) + type->def.detailed_name + - fc.contents.substr(*spell_end, *extent_end - *spell_end); + fc.content.substr(*spell_end, *extent_end - *spell_end); } } @@ -1875,37 +1877,6 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { } } -FileContents::FileContents(const std::string& path, const std::string& content) - : path(path), content(content) {} - -FileContentsWithOffsets::FileContentsWithOffsets() : line_offsets_{0} {} - -FileContentsWithOffsets::FileContentsWithOffsets(std::string s) { - contents = s; - line_offsets_.push_back(0); - for (size_t i = 0; i < s.size(); i++) - if (s[i] == '\n') - line_offsets_.push_back(i + 1); -} - -optional FileContentsWithOffsets::ToOffset(Position p) const { - if (0 < p.line && size_t(p.line) <= line_offsets_.size()) { - int ret = line_offsets_[p.line - 1] + p.column - 1; - if (size_t(ret) <= contents.size()) - return {ret}; - } - return nullopt; -} - -optional FileContentsWithOffsets::ContentsInRange( - Range range) const { - optional start_offset = ToOffset(range.start), - end_offset = ToOffset(range.end); - if (start_offset && end_offset && *start_offset < *end_offset) - return {contents.substr(*start_offset, *end_offset - *start_offset)}; - return nullopt; -} - std::vector> Parse( Config* config, FileConsumerSharedState* file_consumer_shared, @@ -1972,7 +1943,7 @@ std::vector> ParseWithTu( FileConsumer file_consumer(file_consumer_shared, file); IndexParam param(tu, &file_consumer); for (const CXUnsavedFile& contents : file_contents) { - param.file_contents.emplace( + param.file_contents[contents.Filename] = FileContents( contents.Filename, std::string(contents.Contents, contents.Length)); } @@ -2034,7 +2005,6 @@ std::vector> ParseWithTu( } // Update file contents and modification time. - entry->file_contents_ = param.file_contents[entry->path].contents; entry->last_modification_time = param.file_modification_times[entry->path]; // Update dependencies for the file. Do not include the file in its own @@ -2044,12 +2014,6 @@ std::vector> ParseWithTu( std::remove(entry->dependencies.begin(), entry->dependencies.end(), entry->path), entry->dependencies.end()); - - // Make sure we are using correct file contents. - for (const CXUnsavedFile& contents : file_contents) { - if (entry->path == contents.Filename) - entry->file_contents_ = std::string(contents.Contents, contents.Length); - } } return result; diff --git a/src/indexer.h b/src/indexer.h index 7731d043..9ecdebee 100644 --- a/src/indexer.h +++ b/src/indexer.h @@ -4,6 +4,7 @@ #include "clang_translation_unit.h" #include "clang_utils.h" #include "file_consumer.h" +#include "file_contents.h" #include "language_server_api.h" #include "performance.h" #include "position.h" @@ -539,9 +540,9 @@ struct IndexFile { // Diagnostics found when indexing this file. Not serialized. std::vector diagnostics_; // File contents at the time of index. Not serialized. - std::string file_contents_; + optional file_contents_; - IndexFile(const std::string& path); + IndexFile(const std::string& path, const optional& contents); IndexTypeId ToTypeId(const std::string& usr); IndexFuncId ToFuncId(const std::string& usr); @@ -556,25 +557,6 @@ struct IndexFile { std::string ToString(); }; -struct FileContents { - std::string path; - std::string content; - - FileContents(const std::string& path, const std::string& content); -}; - -struct FileContentsWithOffsets { - std::string contents; - // {0, 1 + position of first newline, 1 + position of second newline, ...} - std::vector line_offsets_; - - FileContentsWithOffsets(); - FileContentsWithOffsets(std::string s); - - optional ToOffset(Position p) const; - optional ContentsInRange(Range range) const; -}; - struct NamespaceHelper { std::unordered_map container_cursor_to_qualified_name; diff --git a/src/messages/cquery_call_tree.cc b/src/messages/cquery_call_tree.cc index 679982f0..914f9d35 100644 --- a/src/messages/cquery_call_tree.cc +++ b/src/messages/cquery_call_tree.cc @@ -101,10 +101,10 @@ std::vector BuildExpandCallTree( // TODO: REMOVE |seen_locations| once we fix the querydb update bugs // TODO: REMOVE |seen_locations| once we fix the querydb update bugs // TODO: basically, querydb gets duplicate references inserted into it. - if (!seen_locations.insert(caller.loc).second) { - LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" << std::endl; - return; - } + // if (!seen_locations.insert(caller.loc).second) { + // LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" << + // std::endl; return; + //} if (caller.has_id()) { QueryFunc& call_func = db->funcs[caller.id_.id]; diff --git a/src/query.cc b/src/query.cc index d0c7a911..d8820b81 100644 --- a/src/query.cc +++ b/src/query.cc @@ -442,7 +442,7 @@ IndexUpdate IndexUpdate::CreateDelta(const IdMap* previous_id_map, if (!previous_id_map) { assert(!previous); - IndexFile empty(current->path); + IndexFile empty(current->path, nullopt); return IndexUpdate(*current_id_map, *current_id_map, empty, *current); } return IndexUpdate(*previous_id_map, *current_id_map, *previous, *current); @@ -882,8 +882,8 @@ TEST_SUITE("query") { } TEST_CASE("remove defs") { - IndexFile previous("foo.cc"); - IndexFile current("foo.cc"); + IndexFile previous("foo.cc", nullopt); + IndexFile current("foo.cc", nullopt); previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling = Range(Position(1, 0)); @@ -900,8 +900,8 @@ TEST_SUITE("query") { } TEST_CASE("do not remove ref-only defs") { - IndexFile previous("foo.cc"); - IndexFile current("foo.cc"); + IndexFile previous("foo.cc", nullopt); + IndexFile current("foo.cc", nullopt); previous.Resolve(previous.ToTypeId("usr1")) ->uses.push_back(Range(Position(1, 0))); @@ -919,8 +919,8 @@ TEST_SUITE("query") { } TEST_CASE("func callers") { - IndexFile previous("foo.cc"); - IndexFile current("foo.cc"); + IndexFile previous("foo.cc", nullopt); + IndexFile current("foo.cc", nullopt); IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); @@ -944,8 +944,8 @@ TEST_SUITE("query") { } TEST_CASE("type usages") { - IndexFile previous("foo.cc"); - IndexFile current("foo.cc"); + IndexFile previous("foo.cc", nullopt); + IndexFile current("foo.cc", nullopt); IndexType* pt = previous.Resolve(previous.ToTypeId("usr")); IndexType* ct = current.Resolve(current.ToTypeId("usr")); @@ -965,8 +965,8 @@ TEST_SUITE("query") { } TEST_CASE("apply delta") { - IndexFile previous("foo.cc"); - IndexFile current("foo.cc"); + IndexFile previous("foo.cc", nullopt); + IndexFile current("foo.cc", nullopt); IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); diff --git a/src/serializer.cc b/src/serializer.cc index bc95fc54..062adb12 100644 --- a/src/serializer.cc +++ b/src/serializer.cc @@ -270,7 +270,7 @@ std::unique_ptr Deserialize(SerializeFormat format, } } - file = MakeUnique(path); + file = MakeUnique(path, nullopt); JsonReader json_reader{&reader}; Reflect(json_reader, *file); break; @@ -284,7 +284,7 @@ std::unique_ptr Deserialize(SerializeFormat format, upk.reserve_buffer(serialized.size()); memcpy(upk.buffer(), serialized.data(), serialized.size()); upk.buffer_consumed(serialized.size()); - file = MakeUnique(path); + file = MakeUnique(path, nullopt); MessagePackReader reader(&upk); Reflect(reader, *file); if (file->version != expected_version)