Merge FileContents and FileContentsWithOffsets.

Also try to more aggressively load FileContents when indexing to increase reliability.
2026-02-06 10:47:49 +00:00 · 2018-01-10 21:16:46 -08:00 · 2018-01-10 21:16:46 -08:00 · 786ac0bc4f
commit 786ac0bc4f
parent 7de2a733c8
11 changed files with 130 additions and 98 deletions
--- a/src/cache_manager.cc
+++ b/src/cache_manager.cc
@ -23,12 +23,12 @@ struct RealCacheManager : ICacheManager {
    std::string cache_path = GetCachePath(file.path);
-    if (file.file_contents_.empty()) {
+    if (!file.file_contents_.has_value()) {
      LOG_S(ERROR) << "No cached file contents; performing potentially stale "
                   << "file-copy for " << file.path;
      CopyFileTo(cache_path, file.path);
    } else {
-      WriteToFile(cache_path, file.file_contents_);
+      WriteToFile(cache_path, *file.file_contents_);
    }
    std::string indexed_content = Serialize(config_->cacheFormat, file);
--- a/src/file_consumer.cc
+++ b/src/file_consumer.cc
@ -7,6 +7,22 @@
 #include <loguru.hpp>
 namespace {
 optional<std::string> GetFileContents(const std::string& path,
                                      FileContentsMap* file_contents) {
  auto it = file_contents->find(path);
  if (it == file_contents->end()) {
    optional<std::string> content = ReadContent(path);
    if (content)
      (*file_contents)[path] = FileContents(path, *content);
    return content;
  }
  return it->second.content;
 }
 }  // namespace
 bool operator==(const CXFileUniqueID& a, const CXFileUniqueID& b) {
  return a.data[0] == b.data[0] && a.data[1] == b.data[1] &&
         a.data[2] == b.data[2];
@ -28,7 +44,9 @@ FileConsumer::FileConsumer(FileConsumerSharedState* shared_state,
                           const std::string& parse_file)
    : shared_(shared_state), parse_file_(parse_file) {}
-IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) {
+IndexFile* FileConsumer::TryConsumeFile(CXFile file,
                                        bool* is_first_ownership,
                                        FileContentsMap* file_contents) {
  assert(is_first_ownership);
  CXFileUniqueID file_id;
@ -49,16 +67,20 @@ IndexFile* FileConsumer::TryConsumeFile(CXFile file, bool* is_first_ownership) {
  // No result in local; we need to query global.
  bool did_insert = shared_->Mark(file_name);
  *is_first_ownership = did_insert;
-  local_[file_id] = did_insert ? MakeUnique<IndexFile>(file_name) : nullptr;
+  local_[file_id] =
      did_insert ? MakeUnique<IndexFile>(
                       file_name, GetFileContents(file_name, file_contents))
                 : nullptr;
  return local_[file_id].get();
 }
-IndexFile* FileConsumer::ForceLocal(CXFile file) {
+IndexFile* FileConsumer::ForceLocal(CXFile file,
                                    FileContentsMap* file_contents) {
  // Try to fetch the file using the normal system, which will insert the file
  // usage into global storage.
  {
    bool is_first;
-    IndexFile* cache = TryConsumeFile(file, &is_first);
+    IndexFile* cache = TryConsumeFile(file, &is_first, file_contents);
    if (cache)
      return cache;
  }
@ -71,8 +93,11 @@ IndexFile* FileConsumer::ForceLocal(CXFile file) {
  }
  auto it = local_.find(file_id);
-  if (it == local_.end() || !it->second)
+  if (it == local_.end() || !it->second) {
-    local_[file_id] = MakeUnique<IndexFile>(FileName(file));
+    std::string file_name = FileName(file);
    local_[file_id] = MakeUnique<IndexFile>(
        file_name, GetFileContents(file_name, file_contents));
  }
  assert(local_.find(file_id) != local_.end());
  return local_[file_id].get();
 }
--- a/src/file_consumer.h
+++ b/src/file_consumer.h
@ -1,5 +1,6 @@
 #pragma once
 #include "file_contents.h"
 #include "utils.h"
 #include <clang-c/Index.h>
@ -42,10 +43,18 @@ struct FileConsumer {
  // Returns IndexFile for the file or nullptr. |is_first_ownership| is set
  // to true iff the function just took ownership over the file. Otherwise it
  // is set to false.
-  IndexFile* TryConsumeFile(CXFile file, bool* is_first_ownership);
+  //
  // note: file_contents is passed as a parameter instead of as a member
  // variable since it is large and we do not want to copy it.
  IndexFile* TryConsumeFile(CXFile file,
                            bool* is_first_ownership,
                            FileContentsMap* file_contents);
  // Forcibly create a local file, even if it has already been parsed.
-  IndexFile* ForceLocal(CXFile file);
+  //
  // note: file_contents is passed as a parameter instead of as a member
  // variable since it is large and we do not want to copy it.
  IndexFile* ForceLocal(CXFile file, FileContentsMap* file_contents);
  // Returns and passes ownership of all local state.
  std::vector<std::unique_ptr<IndexFile>> TakeLocalState();
--- a/src/file_contents.cc
+++ b/src/file_contents.cc
@ -0,0 +1,29 @@
 #include "file_contents.h"
 FileContents::FileContents() : line_offsets_{0} {}
 FileContents::FileContents(const std::string& path, const std::string& content)
    : path(path), content(content) {
  line_offsets_.push_back(0);
  for (size_t i = 0; i < content.size(); i++) {
    if (content[i] == '\n')
      line_offsets_.push_back(i + 1);
  }
 }
 optional<int> FileContents::ToOffset(Position p) const {
  if (0 < p.line && size_t(p.line) <= line_offsets_.size()) {
    int ret = line_offsets_[p.line - 1] + p.column - 1;
    if (size_t(ret) <= content.size())
      return ret;
  }
  return nullopt;
 }
 optional<std::string> FileContents::ContentsInRange(Range range) const {
  optional<int> start_offset = ToOffset(range.start),
                end_offset = ToOffset(range.end);
  if (start_offset && end_offset && *start_offset < *end_offset)
    return content.substr(*start_offset, *end_offset - *start_offset);
  return nullopt;
 }
--- a/src/file_contents.h
+++ b/src/file_contents.h
@ -0,0 +1,23 @@
 #pragma once
 #include "position.h"
 #include "optional.h"
 #include <string>
 #include <vector>
 struct FileContents {
  FileContents();
  FileContents(const std::string& path, const std::string& content);
  optional<int> ToOffset(Position p) const;
  optional<std::string> ContentsInRange(Range range) const;
  std::string path;
  std::string content;
  // {0, 1 + position of first newline, 1 + position of second newline, ...}
  std::vector<int> line_offsets_;
 };
 using FileContentsMap = std::unordered_map<std::string, FileContents>;
--- a/src/iindexer.cc
+++ b/src/iindexer.cc
@ -36,10 +36,10 @@ struct TestIndexer : IIndexer {
      std::vector<std::unique_ptr<IndexFile>> indexes;
      if (entry.num_indexes > 0)
-        indexes.push_back(MakeUnique<IndexFile>(entry.path));
+        indexes.push_back(MakeUnique<IndexFile>(entry.path, nullopt));
      for (int i = 1; i < entry.num_indexes; ++i) {
-        indexes.push_back(MakeUnique<IndexFile>(entry.path + "_extra_" +
+        indexes.push_back(MakeUnique<IndexFile>(
-                                                std::to_string(i) + ".h"));
+            entry.path + "_extra_" + std::to_string(i) + ".h", nullopt));
      }
      result->indexes.insert(std::make_pair(entry.path, std::move(indexes)));
--- a/src/indexer.cc
+++ b/src/indexer.cc
@ -213,7 +213,7 @@ struct ConstructorCache {
 struct IndexParam {
  std::unordered_set<CXFile> seen_cx_files;
  std::vector<std::string> seen_files;
-  std::unordered_map<std::string, FileContentsWithOffsets> file_contents;
+  FileContentsMap file_contents;
  std::unordered_map<std::string, int64_t> file_modification_times;
  // Only use this when strictly needed (ie, primary translation unit is
@ -236,8 +236,8 @@ struct IndexParam {
 IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
  bool is_first_ownership = false;
-  IndexFile* db =
+  IndexFile* db = param->file_consumer->TryConsumeFile(
-      param->file_consumer->TryConsumeFile(file, &is_first_ownership);
+      file, &is_first_ownership, &param->file_contents);
  // If this is the first time we have seen the file (ignoring if we are
  // generating an index for it):
@ -262,7 +262,7 @@ IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
      if (db && !param->file_contents.count(file_name)) {
        optional<std::string> content = ReadContent(file_name);
        if (content)
-          param->file_contents.emplace(file_name, *content);
+          param->file_contents[file_name] = FileContents(file_name, *content);
        else
          LOG_S(ERROR) << "[indexer] Failed to read file content for "
                       << file_name;
@ -474,7 +474,9 @@ void OnIndexReference_Function(IndexFile* db,
 // static
 int IndexFile::kCurrentVersion = 8;
-IndexFile::IndexFile(const std::string& path) : id_cache(path), path(path) {
+IndexFile::IndexFile(const std::string& path,
                     const optional<std::string>& contents)
    : id_cache(path), path(path), file_contents_(contents) {
  // TODO: Reconsider if we should still be reusing the same id_cache.
  // Preallocate any existing resolved ids.
  for (const auto& entry : id_cache.usr_to_type_id)
@ -1516,16 +1518,16 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
      // ... } foo;` https://github.com/jacobdufault/cquery/issues/29
      if (extent.end.line - extent.start.line <
          kMaxLinesDisplayTypeAliasDeclarations) {
-        FileContentsWithOffsets& fc = param->file_contents[db->path];
+        FileContents& fc = param->file_contents[db->path];
        optional<int> extent_start = fc.ToOffset(extent.start),
                      spell_start = fc.ToOffset(spell.start),
                      spell_end = fc.ToOffset(spell.end),
                      extent_end = fc.ToOffset(extent.end);
        if (extent_start && spell_start && spell_end && extent_end) {
          type->def.hover =
-              fc.contents.substr(*extent_start, *spell_start - *extent_start) +
+              fc.content.substr(*extent_start, *spell_start - *extent_start) +
              type->def.detailed_name +
-              fc.contents.substr(*spell_end, *extent_end - *spell_end);
+              fc.content.substr(*spell_end, *extent_end - *spell_end);
        }
      }
@ -1875,37 +1877,6 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
  }
 }
 FileContents::FileContents(const std::string& path, const std::string& content)
    : path(path), content(content) {}
 FileContentsWithOffsets::FileContentsWithOffsets() : line_offsets_{0} {}
 FileContentsWithOffsets::FileContentsWithOffsets(std::string s) {
  contents = s;
  line_offsets_.push_back(0);
  for (size_t i = 0; i < s.size(); i++)
    if (s[i] == '\n')
      line_offsets_.push_back(i + 1);
 }
 optional<int> FileContentsWithOffsets::ToOffset(Position p) const {
  if (0 < p.line && size_t(p.line) <= line_offsets_.size()) {
    int ret = line_offsets_[p.line - 1] + p.column - 1;
    if (size_t(ret) <= contents.size())
      return {ret};
  }
  return nullopt;
 }
 optional<std::string> FileContentsWithOffsets::ContentsInRange(
    Range range) const {
  optional<int> start_offset = ToOffset(range.start),
                end_offset = ToOffset(range.end);
  if (start_offset && end_offset && *start_offset < *end_offset)
    return {contents.substr(*start_offset, *end_offset - *start_offset)};
  return nullopt;
 }
 std::vector<std::unique_ptr<IndexFile>> Parse(
    Config* config,
    FileConsumerSharedState* file_consumer_shared,
@ -1972,7 +1943,7 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
  FileConsumer file_consumer(file_consumer_shared, file);
  IndexParam param(tu, &file_consumer);
  for (const CXUnsavedFile& contents : file_contents) {
-    param.file_contents.emplace(
+    param.file_contents[contents.Filename] = FileContents(
        contents.Filename, std::string(contents.Contents, contents.Length));
  }
@ -2034,7 +2005,6 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
    }
    // Update file contents and modification time.
    entry->file_contents_ = param.file_contents[entry->path].contents;
    entry->last_modification_time = param.file_modification_times[entry->path];
    // Update dependencies for the file. Do not include the file in its own
@ -2044,12 +2014,6 @@ std::vector<std::unique_ptr<IndexFile>> ParseWithTu(
        std::remove(entry->dependencies.begin(), entry->dependencies.end(),
                    entry->path),
        entry->dependencies.end());
    // Make sure we are using correct file contents.
    for (const CXUnsavedFile& contents : file_contents) {
      if (entry->path == contents.Filename)
        entry->file_contents_ = std::string(contents.Contents, contents.Length);
    }
  }
  return result;
--- a/src/indexer.h
+++ b/src/indexer.h
@ -4,6 +4,7 @@
 #include "clang_translation_unit.h"
 #include "clang_utils.h"
 #include "file_consumer.h"
 #include "file_contents.h"
 #include "language_server_api.h"
 #include "performance.h"
 #include "position.h"
@ -539,9 +540,9 @@ struct IndexFile {
  // Diagnostics found when indexing this file. Not serialized.
  std::vector<lsDiagnostic> diagnostics_;
  // File contents at the time of index. Not serialized.
-  std::string file_contents_;
+  optional<std::string> file_contents_;
-  IndexFile(const std::string& path);
+  IndexFile(const std::string& path, const optional<std::string>& contents);
  IndexTypeId ToTypeId(const std::string& usr);
  IndexFuncId ToFuncId(const std::string& usr);
@ -556,25 +557,6 @@ struct IndexFile {
  std::string ToString();
 };
 struct FileContents {
  std::string path;
  std::string content;
  FileContents(const std::string& path, const std::string& content);
 };
 struct FileContentsWithOffsets {
  std::string contents;
  // {0, 1 + position of first newline, 1 + position of second newline, ...}
  std::vector<int> line_offsets_;
  FileContentsWithOffsets();
  FileContentsWithOffsets(std::string s);
  optional<int> ToOffset(Position p) const;
  optional<std::string> ContentsInRange(Range range) const;
 };
 struct NamespaceHelper {
  std::unordered_map<ClangCursor, std::string>
      container_cursor_to_qualified_name;
--- a/src/messages/cquery_call_tree.cc
+++ b/src/messages/cquery_call_tree.cc
@ -101,10 +101,10 @@ std::vector<Out_CqueryCallTree::CallEntry> BuildExpandCallTree(
    // TODO: REMOVE |seen_locations| once we fix the querydb update bugs
    // TODO: REMOVE |seen_locations| once we fix the querydb update bugs
    // TODO: basically, querydb gets duplicate references inserted into it.
-    if (!seen_locations.insert(caller.loc).second) {
+    // if (!seen_locations.insert(caller.loc).second) {
-      LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" << std::endl;
+    //  LOG_S(ERROR) << "!!!! FIXME DUPLICATE REFERENCE IN QUERYDB" <<
-      return;
+    //  std::endl; return;
-    }
+    //}
    if (caller.has_id()) {
      QueryFunc& call_func = db->funcs[caller.id_.id];
--- a/src/query.cc
+++ b/src/query.cc
@ -442,7 +442,7 @@ IndexUpdate IndexUpdate::CreateDelta(const IdMap* previous_id_map,
  if (!previous_id_map) {
    assert(!previous);
-    IndexFile empty(current->path);
+    IndexFile empty(current->path, nullopt);
    return IndexUpdate(*current_id_map, *current_id_map, empty, *current);
  }
  return IndexUpdate(*previous_id_map, *current_id_map, *previous, *current);
@ -882,8 +882,8 @@ TEST_SUITE("query") {
  }
  TEST_CASE("remove defs") {
-    IndexFile previous("foo.cc");
+    IndexFile previous("foo.cc", nullopt);
-    IndexFile current("foo.cc");
+    IndexFile current("foo.cc", nullopt);
    previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling =
        Range(Position(1, 0));
@ -900,8 +900,8 @@ TEST_SUITE("query") {
  }
  TEST_CASE("do not remove ref-only defs") {
-    IndexFile previous("foo.cc");
+    IndexFile previous("foo.cc", nullopt);
-    IndexFile current("foo.cc");
+    IndexFile current("foo.cc", nullopt);
    previous.Resolve(previous.ToTypeId("usr1"))
        ->uses.push_back(Range(Position(1, 0)));
@ -919,8 +919,8 @@ TEST_SUITE("query") {
  }
  TEST_CASE("func callers") {
-    IndexFile previous("foo.cc");
+    IndexFile previous("foo.cc", nullopt);
-    IndexFile current("foo.cc");
+    IndexFile current("foo.cc", nullopt);
    IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
    IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));
@ -944,8 +944,8 @@ TEST_SUITE("query") {
  }
  TEST_CASE("type usages") {
-    IndexFile previous("foo.cc");
+    IndexFile previous("foo.cc", nullopt);
-    IndexFile current("foo.cc");
+    IndexFile current("foo.cc", nullopt);
    IndexType* pt = previous.Resolve(previous.ToTypeId("usr"));
    IndexType* ct = current.Resolve(current.ToTypeId("usr"));
@ -965,8 +965,8 @@ TEST_SUITE("query") {
  }
  TEST_CASE("apply delta") {
-    IndexFile previous("foo.cc");
+    IndexFile previous("foo.cc", nullopt);
-    IndexFile current("foo.cc");
+    IndexFile current("foo.cc", nullopt);
    IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
    IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));
--- a/src/serializer.cc
+++ b/src/serializer.cc
@ -270,7 +270,7 @@ std::unique_ptr<IndexFile> Deserialize(SerializeFormat format,
        }
      }
-      file = MakeUnique<IndexFile>(path);
+      file = MakeUnique<IndexFile>(path, nullopt);
      JsonReader json_reader{&reader};
      Reflect(json_reader, *file);
      break;
@ -284,7 +284,7 @@ std::unique_ptr<IndexFile> Deserialize(SerializeFormat format,
        upk.reserve_buffer(serialized.size());
        memcpy(upk.buffer(), serialized.data(), serialized.size());
        upk.buffer_consumed(serialized.size());
-        file = MakeUnique<IndexFile>(path);
+        file = MakeUnique<IndexFile>(path, nullopt);
        MessagePackReader reader(&upk);
        Reflect(reader, *file);
        if (file->version != expected_version)