Merge FileContents and FileContentsWithOffsets.

Also try to more aggressively load FileContents when indexing to increase reliability.
This commit is contained in:
Jacob Dufault 2018-01-10 21:16:46 -08:00
parent 7de2a733c8
commit 786ac0bc4f
11 changed files with 130 additions and 98 deletions

View File

@ -23,12 +23,12 @@ struct RealCacheManager : ICacheManager {
std::string cache_path = GetCachePath(file.path); std::string cache_path = GetCachePath(file.path);
if (file.file_contents_.empty()) { if (!file.file_contents_.has_value()) {
LOG_S(ERROR) << "No cached file contents; performing potentially stale " LOG_S(ERROR) << "No cached file contents; performing potentially stale "
<< "file-copy for " << file.path; << "file-copy for " << file.path;
CopyFileTo(cache_path, file.path); CopyFileTo(cache_path, file.path);
} else { } else {
WriteToFile(cache_path, file.file_contents_); WriteToFile(cache_path, *file.file_contents_);
} }
std::string indexed_content = Serialize(config_->cacheFormat, file); std::string indexed_content = Serialize(config_->cacheFormat, file);

View File

@ -7,6 +7,22 @@
#include <loguru.hpp> #include <loguru.hpp>
namespace {
optional<std::string> GetFileContents(const std::string& path,
FileContentsMap* file_contents) {
auto it = file_contents->find(path);
if (it == file_contents->end()) {
optional<std::string> content = ReadContent(path);
if (content)
(*file_contents)[path] = FileContents(path, *content);
return content;
}
return it->second.content;
}
} // namespace
bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) { bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) {
return a.data[0] == b.data[0] && a.data[1] == b.data[1] && return a.data[0] == b.data[0] && a.data[1] == b.data[1] &&
a.data[2] == b.data[2]; a.data[2] == b.data[2];
@ -28,7 +44,9 @@ FileConsumer::FileConsumer(FileConsumerSharedState* shared_state,
const std::string& parse_file) const std::string& parse_file)
: shared_(shared_state), parse_file_(parse_file) {} : shared_(shared_state), parse_file_(parse_file) {}
IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) { IndexFile* FileConsumer::TryConsumeFile(CXFile file,
bool* is_first_ownership,
FileContentsMap* file_contents) {
assert(is_first_ownership); assert(is_first_ownership);
CXFileUniqueID file_id; CXFileUniqueID file_id;
@ -49,16 +67,20 @@ IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) {
// No result in local; we need to query global. // No result in local; we need to query global.
bool did_insert = shared_->Mark(file_name); bool did_insert = shared_->Mark(file_name);
*is_first_ownership = did_insert; *is_first_ownership = did_insert;
local_[file_id] = did_insert ? MakeUnique<IndexFile>(file_name) : nullptr; local_[file_id] =
did_insert ? MakeUnique<IndexFile>(
file_name, GetFileContents(file_name, file_contents))
: nullptr;
return local_[file_id].get(); return local_[file_id].get();
} }
IndexFile* FileConsumer::ForceLocal(CXFile file) { IndexFile* FileConsumer::ForceLocal(CXFile file,
FileContentsMap* file_contents) {
// Try to fetch the file using the normal system, which will insert the file // Try to fetch the file using the normal system, which will insert the file
// usage into global storage. // usage into global storage.
{ {
bool is_first; bool is_first;
IndexFile* cache = TryConsumeFile(file, &is_first); IndexFile* cache = TryConsumeFile(file, &is_first, file_contents);
if (cache) if (cache)
return cache; return cache;
} }
@ -71,8 +93,11 @@ IndexFile* FileConsumer::ForceLocal(CXFile file) {
} }
auto it = local_.find(file_id); auto it = local_.find(file_id);
if (it == local_.end() || !it->second) if (it == local_.end() || !it->second) {
local_[file_id] = MakeUnique<IndexFile>(FileName(file)); std::string file_name = FileName(file);
local_[file_id] = MakeUnique<IndexFile>(
file_name, GetFileContents(file_name, file_contents));
}
assert(local_.find(file_id) != local_.end()); assert(local_.find(file_id) != local_.end());
return local_[file_id].get(); return local_[file_id].get();
} }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "file_contents.h"
#include "utils.h" #include "utils.h"
#include <clang-c/Index.h> #include <clang-c/Index.h>
@ -42,10 +43,18 @@ struct FileConsumer {
// Returns IndexFile for the file or nullptr. |is_first_ownership| is set // Returns IndexFile for the file or nullptr. |is_first_ownership| is set
// to true iff the function just took ownership over the file. Otherwise it // to true iff the function just took ownership over the file. Otherwise it
// is set to false. // is set to false.
IndexFile* TryConsumeFile(CXFile file, bool* is_first_ownership); //
// note: file_contents is passed as a parameter instead of as a member
// variable since it is large and we do not want to copy it.
IndexFile* TryConsumeFile(CXFile file,
bool* is_first_ownership,
FileContentsMap* file_contents);
// Forcibly create a local file, even if it has already been parsed. // Forcibly create a local file, even if it has already been parsed.
IndexFile* ForceLocal(CXFile file); //
// note: file_contents is passed as a parameter instead of as a member
// variable since it is large and we do not want to copy it.
IndexFile* ForceLocal(CXFile file, FileContentsMap* file_contents);
// Returns and passes ownership of all local state. // Returns and passes ownership of all local state.
std::vector<std::unique_ptr<IndexFile>> TakeLocalState(); std::vector<std::unique_ptr<IndexFile>> TakeLocalState();

29
src/file_contents.cc Normal file
View File

@ -0,0 +1,29 @@
#include "file_contents.h"
FileContents::FileContents() : line_offsets_{0} {}
FileContents::FileContents(const std::string& path, const std::string& content)
: path(path), content(content) {
line_offsets_.push_back(0);
for (size_t i = 0; i < content.size(); i++) {
if (content[i] == '\n')
line_offsets_.push_back(i + 1);
}
}
optional<int> FileContents::ToOffset(Position p) const {
if (0 < p.line && size_t(p.line) <= line_offsets_.size()) {
int ret = line_offsets_[p.line - 1] + p.column - 1;
if (size_t(ret) <= content.size())
return ret;
}
return nullopt;
}
optional<std::string> FileContents::ContentsInRange(Range range) const {
optional<int> start_offset = ToOffset(range.start),
end_offset = ToOffset(range.end);
if (start_offset && end_offset && *start_offset < *end_offset)
return content.substr(*start_offset, *end_offset - *start_offset);
return nullopt;
}

23
src/file_contents.h Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include "position.h"
#include "optional.h"
#include <string>
#include <vector>
struct FileContents {
FileContents();
FileContents(const std::string& path, const std::string& content);
optional<int> ToOffset(Position p) const;
optional<std::string> ContentsInRange(Range range) const;
std::string path;
std::string content;
// {0, 1 + position of first newline, 1 + position of second newline, ...}
std::vector<int> line_offsets_;
};
using FileContentsMap = std::unordered_map<std::string, FileContents>;

View File

@ -36,10 +36,10 @@ struct TestIndexer : IIndexer {
std::vector<std::unique_ptr<IndexFile>> indexes; std::vector<std::unique_ptr<IndexFile>> indexes;
if (entry.num_indexes > 0) if (entry.num_indexes > 0)
indexes.push_back(MakeUnique<IndexFile>(entry.path)); indexes.push_back(MakeUnique<IndexFile>(entry.path, nullopt));
for (int i = 1; i < entry.num_indexes; ++i) { for (int i = 1; i < entry.num_indexes; ++i) {
indexes.push_back(MakeUnique<IndexFile>(entry.path + "_extra_" + indexes.push_back(MakeUnique<IndexFile>(
std::to_string(i) + ".h")); entry.path + "_extra_" + std::to_string(i) + ".h", nullopt));
} }
result->indexes.insert(std::make_pair(entry.path, std::move(indexes))); result->indexes.insert(std::make_pair(entry.path, std::move(indexes)));

View File

@ -213,7 +213,7 @@ struct ConstructorCache {
struct IndexParam { struct IndexParam {
std::unordered_set<CXFile> seen_cx_files; std::unordered_set<CXFile> seen_cx_files;
std::vector<std::string> seen_files; std::vector<std::string> seen_files;
std::unordered_map<std::string, FileContentsWithOffsets> file_contents; FileContentsMap file_contents;
std::unordered_map<std::string, int64_t> file_modification_times; std::unordered_map<std::string, int64_t> file_modification_times;
// Only use this when strictly needed (ie, primary translation unit is // Only use this when strictly needed (ie, primary translation unit is
@ -236,8 +236,8 @@ struct IndexParam {
IndexFile* ConsumeFile(IndexParam* param, CXFile file) { IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
bool is_first_ownership = false; bool is_first_ownership = false;
IndexFile* db = IndexFile* db = param->file_consumer->TryConsumeFile(
param->file_consumer->TryConsumeFile(file, &is_first_ownership); file, &is_first_ownership, &param->file_contents);
// If this is the first time we have seen the file (ignoring if we are // If this is the first time we have seen the file (ignoring if we are
// generating an index for it): // generating an index for it):
@ -262,7 +262,7 @@ IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
if (db && !param->file_contents.count(file_name)) { if (db && !param->file_contents.count(file_name)) {
optional<std::string> content = ReadContent(file_name); optional<std::string> content = ReadContent(file_name);
if (content) if (content)
param->file_contents.emplace(file_name, *content); param->file_contents[file_name] = FileContents(file_name, *content);
else else
LOG_S(ERROR) << "[indexer] Failed to read file content for " LOG_S(ERROR) << "[indexer] Failed to read file content for "
<< file_name; << file_name;
@ -474,7 +474,9 @@ void OnIndexReference_Function(IndexFile* db,
// static // static
int IndexFile::kCurrentVersion = 8; int IndexFile::kCurrentVersion = 8;
IndexFile::IndexFile(const std::string& path) : id_cache(path), path(path) { IndexFile::IndexFile(const std::string& path,
const optional<std::string>& contents)
: id_cache(path), path(path), file_contents_(contents) {
// TODO: Reconsider if we should still be reusing the same id_cache. // TODO: Reconsider if we should still be reusing the same id_cache.
// Preallocate any existing resolved ids. // Preallocate any existing resolved ids.
for (const auto& entry : id_cache.usr_to_type_id) for (const auto& entry : id_cache.usr_to_type_id)
@ -1516,16 +1518,16 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
// ... } foo;` https://github.com/jacobdufault/cquery/issues/29 // ... } foo;` https://github.com/jacobdufault/cquery/issues/29
if (extent.end.line - extent.start.line < if (extent.end.line - extent.start.line <
kMaxLinesDisplayTypeAliasDeclarations) { kMaxLinesDisplayTypeAliasDeclarations) {
FileContentsWithOffsets& fc = param->file_contents[db->path]; FileContents& fc = param->file_contents[db->path];
optional<int> extent_start = fc.ToOffset(extent.start), optional<int> extent_start = fc.ToOffset(extent.start),
spell_start = fc.ToOffset(spell.start), spell_start = fc.ToOffset(spell.start),
spell_end = fc.ToOffset(spell.end), spell_end = fc.ToOffset(spell.end),
extent_end = fc.ToOffset(extent.end); extent_end = fc.ToOffset(extent.end);
if (extent_start && spell_start && spell_end && extent_end) { if (extent_start && spell_start && spell_end && extent_end) {
type->def.hover = type->def.hover =
fc.contents.substr(*extent_start, *spell_start - *extent_start) + fc.content.substr(*extent_start, *spell_start - *extent_start) +
type->def.detailed_name + type->def.detailed_name +
fc.contents.substr(*spell_end, *extent_end - *spell_end); fc.content.substr(*spell_end, *extent_end - *spell_end);
} }
} }
@ -1875,37 +1877,6 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
} }
} }
FileContents::FileContents(const std::string& path, const std::string& content)
: path(path), content(content) {}
FileContentsWithOffsets::FileContentsWithOffsets() : line_offsets_{0} {}
FileContentsWithOffsets::FileContentsWithOffsets(std::string s) {
contents = s;
line_offsets_.push_back(0);
for (size_t i = 0; i < s.size(); i++)
if (s[i] == '\n')
line_offsets_.push_back(i + 1);
}
optional<int> FileContentsWithOffsets::ToOffset(Position p) const {
if (0 < p.line && size_t(p.line) <= line_offsets_.size()) {
int ret = line_offsets_[p.line - 1] + p.column - 1;
if (size_t(ret) <= contents.size())
return {ret};
}
return nullopt;
}
optional<std::string> FileContentsWithOffsets::ContentsInRange(
Range range) const {
optional<int> start_offset = ToOffset(range.start),
end_offset = ToOffset(range.end);
if (start_offset && end_offset && *start_offset < *end_offset)
return {contents.substr(*start_offset, *end_offset - *start_offset)};
return nullopt;
}
std::vector<std::unique_ptr<IndexFile>> Parse( std::vector<std::unique_ptr<IndexFile>> Parse(
Config* config, Config* config,
FileConsumerSharedState* file_consumer_shared, FileConsumerSharedState* file_consumer_shared,
@ -1972,7 +1943,7 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
FileConsumer file_consumer(file_consumer_shared, file); FileConsumer file_consumer(file_consumer_shared, file);
IndexParam param(tu, &file_consumer); IndexParam param(tu, &file_consumer);
for (const CXUnsavedFile& contents : file_contents) { for (const CXUnsavedFile& contents : file_contents) {
param.file_contents.emplace( param.file_contents[contents.Filename] = FileContents(
contents.Filename, std::string(contents.Contents, contents.Length)); contents.Filename, std::string(contents.Contents, contents.Length));
} }
@ -2034,7 +2005,6 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
} }
// Update file contents and modification time. // Update file contents and modification time.
entry->file_contents_ = param.file_contents[entry->path].contents;
entry->last_modification_time = param.file_modification_times[entry->path]; entry->last_modification_time = param.file_modification_times[entry->path];
// Update dependencies for the file. Do not include the file in its own // Update dependencies for the file. Do not include the file in its own
@ -2044,12 +2014,6 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
std::remove(entry->dependencies.begin(), entry->dependencies.end(), std::remove(entry->dependencies.begin(), entry->dependencies.end(),
entry->path), entry->path),
entry->dependencies.end()); entry->dependencies.end());
// Make sure we are using correct file contents.
for (const CXUnsavedFile& contents : file_contents) {
if (entry->path == contents.Filename)
entry->file_contents_ = std::string(contents.Contents, contents.Length);
}
} }
return result; return result;

View File

@ -4,6 +4,7 @@
#include "clang_translation_unit.h" #include "clang_translation_unit.h"
#include "clang_utils.h" #include "clang_utils.h"
#include "file_consumer.h" #include "file_consumer.h"
#include "file_contents.h"
#include "language_server_api.h" #include "language_server_api.h"
#include "performance.h" #include "performance.h"
#include "position.h" #include "position.h"
@ -539,9 +540,9 @@ struct IndexFile {
// Diagnostics found when indexing this file. Not serialized. // Diagnostics found when indexing this file. Not serialized.
std::vector<lsDiagnostic> diagnostics_; std::vector<lsDiagnostic> diagnostics_;
// File contents at the time of index. Not serialized. // File contents at the time of index. Not serialized.
std::string file_contents_; optional<std::string> file_contents_;
IndexFile(const std::string& path); IndexFile(const std::string& path, const optional<std::string>& contents);
IndexTypeId ToTypeId(const std::string& usr); IndexTypeId ToTypeId(const std::string& usr);
IndexFuncId ToFuncId(const std::string& usr); IndexFuncId ToFuncId(const std::string& usr);
@ -556,25 +557,6 @@ struct IndexFile {
std::string ToString(); std::string ToString();
}; };
struct FileContents {
std::string path;
std::string content;
FileContents(const std::string& path, const std::string& content);
};
struct FileContentsWithOffsets {
std::string contents;
// {0, 1 + position of first newline, 1 + position of second newline, ...}
std::vector<int> line_offsets_;
FileContentsWithOffsets();
FileContentsWithOffsets(std::string s);
optional<int> ToOffset(Position p) const;
optional<std::string> ContentsInRange(Range range) const;
};
struct NamespaceHelper { struct NamespaceHelper {
std::unordered_map<ClangCursor, std::string> std::unordered_map<ClangCursor, std::string>
container_cursor_to_qualified_name; container_cursor_to_qualified_name;

View File

@ -101,10 +101,10 @@ std::vector<Out_CqueryCallTree::CallEntry> BuildExpandCallTree(
// TODO: REMOVE |seen_locations| once we fix the querydb update bugs // TODO: REMOVE |seen_locations| once we fix the querydb update bugs
// TODO: REMOVE |seen_locations| once we fix the querydb update bugs // TODO: REMOVE |seen_locations| once we fix the querydb update bugs
// TODO: basically, querydb gets duplicate references inserted into it. // TODO: basically, querydb gets duplicate references inserted into it.
if (!seen_locations.insert(caller.loc).second) { // if (!seen_locations.insert(caller.loc).second) {
LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" << std::endl; // LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" <<
return; // std::endl; return;
} //}
if (caller.has_id()) { if (caller.has_id()) {
QueryFunc& call_func = db->funcs[caller.id_.id]; QueryFunc& call_func = db->funcs[caller.id_.id];

View File

@ -442,7 +442,7 @@ IndexUpdate IndexUpdate::CreateDelta(const IdMap* previous_id_map,
if (!previous_id_map) { if (!previous_id_map) {
assert(!previous); assert(!previous);
IndexFile empty(current->path); IndexFile empty(current->path, nullopt);
return IndexUpdate(*current_id_map, *current_id_map, empty, *current); return IndexUpdate(*current_id_map, *current_id_map, empty, *current);
} }
return IndexUpdate(*previous_id_map, *current_id_map, *previous, *current); return IndexUpdate(*previous_id_map, *current_id_map, *previous, *current);
@ -882,8 +882,8 @@ TEST_SUITE("query") {
} }
TEST_CASE("remove defs") { TEST_CASE("remove defs") {
IndexFile previous("foo.cc"); IndexFile previous("foo.cc", nullopt);
IndexFile current("foo.cc"); IndexFile current("foo.cc", nullopt);
previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling = previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling =
Range(Position(1, 0)); Range(Position(1, 0));
@ -900,8 +900,8 @@ TEST_SUITE("query") {
} }
TEST_CASE("do not remove ref-only defs") { TEST_CASE("do not remove ref-only defs") {
IndexFile previous("foo.cc"); IndexFile previous("foo.cc", nullopt);
IndexFile current("foo.cc"); IndexFile current("foo.cc", nullopt);
previous.Resolve(previous.ToTypeId("usr1")) previous.Resolve(previous.ToTypeId("usr1"))
->uses.push_back(Range(Position(1, 0))); ->uses.push_back(Range(Position(1, 0)));
@ -919,8 +919,8 @@ TEST_SUITE("query") {
} }
TEST_CASE("func callers") { TEST_CASE("func callers") {
IndexFile previous("foo.cc"); IndexFile previous("foo.cc", nullopt);
IndexFile current("foo.cc"); IndexFile current("foo.cc", nullopt);
IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));
@ -944,8 +944,8 @@ TEST_SUITE("query") {
} }
TEST_CASE("type usages") { TEST_CASE("type usages") {
IndexFile previous("foo.cc"); IndexFile previous("foo.cc", nullopt);
IndexFile current("foo.cc"); IndexFile current("foo.cc", nullopt);
IndexType* pt = previous.Resolve(previous.ToTypeId("usr")); IndexType* pt = previous.Resolve(previous.ToTypeId("usr"));
IndexType* ct = current.Resolve(current.ToTypeId("usr")); IndexType* ct = current.Resolve(current.ToTypeId("usr"));
@ -965,8 +965,8 @@ TEST_SUITE("query") {
} }
TEST_CASE("apply delta") { TEST_CASE("apply delta") {
IndexFile previous("foo.cc"); IndexFile previous("foo.cc", nullopt);
IndexFile current("foo.cc"); IndexFile current("foo.cc", nullopt);
IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));

View File

@ -270,7 +270,7 @@ std::unique_ptr<IndexFile> Deserialize(SerializeFormat format,
} }
} }
file = MakeUnique<IndexFile>(path); file = MakeUnique<IndexFile>(path, nullopt);
JsonReader json_reader{&reader}; JsonReader json_reader{&reader};
Reflect(json_reader, *file); Reflect(json_reader, *file);
break; break;
@ -284,7 +284,7 @@ std::unique_ptr<IndexFile> Deserialize(SerializeFormat format,
upk.reserve_buffer(serialized.size()); upk.reserve_buffer(serialized.size());
memcpy(upk.buffer(), serialized.data(), serialized.size()); memcpy(upk.buffer(), serialized.data(), serialized.size());
upk.buffer_consumed(serialized.size()); upk.buffer_consumed(serialized.size());
file = MakeUnique<IndexFile>(path); file = MakeUnique<IndexFile>(path, nullopt);
MessagePackReader reader(&upk); MessagePackReader reader(&upk);
Reflect(reader, *file); Reflect(reader, *file);
if (file->version != expected_version) if (file->version != expected_version)