some renames and add plan for lower mem usage

2026-02-03 17:27:40 +00:00 · 2017-04-06 23:57:26 -07:00 · 2017-04-06 23:57:26 -07:00 · 590797bcf2
commit 590797bcf2
parent 443ccd24b9
4 changed files with 98 additions and 86 deletions
--- a/src/indexer.cc
+++ b/src/indexer.cc
@ -17,59 +17,59 @@ IndexedFile::IndexedFile(const std::string& path) : id_cache(path), path(path) {
 }

 // TODO: Optimize for const char*?
-TypeId IndexedFile::ToTypeId(const std::string& usr) {
+IndexTypeId IndexedFile::ToTypeId(const std::string& usr) {
  auto it = id_cache.usr_to_type_id.find(usr);
  if (it != id_cache.usr_to_type_id.end())
    return it->second;

-  TypeId id(types.size());
+  IndexTypeId id(types.size());
  types.push_back(IndexedTypeDef(id, usr));
  id_cache.usr_to_type_id[usr] = id;
  id_cache.type_id_to_usr[id] = usr;
  return id;
 }
-FuncId IndexedFile::ToFuncId(const std::string& usr) {
+IndexFuncId IndexedFile::ToFuncId(const std::string& usr) {
  auto it = id_cache.usr_to_func_id.find(usr);
  if (it != id_cache.usr_to_func_id.end())
    return it->second;

-  FuncId id(funcs.size());
+  IndexFuncId id(funcs.size());
  funcs.push_back(IndexedFuncDef(id, usr));
  id_cache.usr_to_func_id[usr] = id;
  id_cache.func_id_to_usr[id] = usr;
  return id;
 }
-VarId IndexedFile::ToVarId(const std::string& usr) {
+IndexVarId IndexedFile::ToVarId(const std::string& usr) {
  auto it = id_cache.usr_to_var_id.find(usr);
  if (it != id_cache.usr_to_var_id.end())
    return it->second;

-  VarId id(vars.size());
+  IndexVarId id(vars.size());
  vars.push_back(IndexedVarDef(id, usr));
  id_cache.usr_to_var_id[usr] = id;
  id_cache.var_id_to_usr[id] = usr;
  return id;
 }

-TypeId IndexedFile::ToTypeId(const CXCursor& cursor) {
+IndexTypeId IndexedFile::ToTypeId(const CXCursor& cursor) {
  return ToTypeId(clang::Cursor(cursor).get_usr());
 }

-FuncId IndexedFile::ToFuncId(const CXCursor& cursor) {
+IndexFuncId IndexedFile::ToFuncId(const CXCursor& cursor) {
  return ToFuncId(clang::Cursor(cursor).get_usr());
 }

-VarId IndexedFile::ToVarId(const CXCursor& cursor) {
+IndexVarId IndexedFile::ToVarId(const CXCursor& cursor) {
  return ToVarId(clang::Cursor(cursor).get_usr());
 }

-IndexedTypeDef* IndexedFile::Resolve(TypeId id) {
+IndexedTypeDef* IndexedFile::Resolve(IndexTypeId id) {
  return &types[id.id];
 }
-IndexedFuncDef* IndexedFile::Resolve(FuncId id) {
+IndexedFuncDef* IndexedFile::Resolve(IndexFuncId id) {
  return &funcs[id.id];
 }
-IndexedVarDef* IndexedFile::Resolve(VarId id) {
+IndexedVarDef* IndexedFile::Resolve(IndexVarId id) {
  return &vars[id.id];
 }

@ -77,7 +77,7 @@ std::string IndexedFile::ToString() {
  return Serialize(*this);
 }

-IndexedTypeDef::IndexedTypeDef(TypeId id, const std::string& usr)
+IndexedTypeDef::IndexedTypeDef(IndexTypeId id, const std::string& usr)
    : def(usr), id(id) {
  assert(usr.size() > 0);
  // std::cerr << "Creating type with usr " << usr << std::endl;
@ -355,7 +355,7 @@ struct VisitDeclForTypeUsageParam {
  bool is_interesting;
  int has_processed_any = false;
  optional<clang::Cursor> previous_cursor;
-  optional<TypeId> initial_type;
+  optional<IndexTypeId> initial_type;

  VisitDeclForTypeUsageParam(IndexedFile* db, bool is_interesting)
      : db(db), is_interesting(is_interesting) {}
@ -374,7 +374,7 @@ void VisitDeclForTypeUsageVisitorHandler(clang::Cursor cursor,
  if (referenced_usr == "")
    return;

-  TypeId ref_type_id = db->ToTypeId(referenced_usr);
+  IndexTypeId ref_type_id = db->ToTypeId(referenced_usr);

  if (!param->initial_type)
    param->initial_type = ref_type_id;
@ -437,7 +437,7 @@ clang::VisiterResult VisitDeclForTypeUsageVisitor(
 // strips
 // qualifies from |cursor| (ie, Foo* => Foo) and removes template arguments
 // (ie, Foo<A,B> => Foo<*,*>).
-optional<TypeId> ResolveToDeclarationType(IndexedFile* db,
+optional<IndexTypeId> ResolveToDeclarationType(IndexedFile* db,
                                          clang::Cursor cursor) {
  clang::Cursor declaration =
      cursor.get_type().strip_qualifiers().get_declaration();
@ -453,7 +453,7 @@ optional<TypeId> ResolveToDeclarationType(IndexedFile* db,
 // useful if trying to figure out ie, what a using statement refers to. If
 // trying to generally resolve a cursor to a type, use
 // ResolveToDeclarationType, which works in more scenarios.
-optional<TypeId> AddDeclTypeUsages(
+optional<IndexTypeId> AddDeclTypeUsages(
    IndexedFile* db,
    clang::Cursor decl_cursor,
    bool is_interesting,
@ -637,7 +637,7 @@ clang::VisiterResult AddDeclInitializerUsagesVisitor(clang::Cursor cursor,
      // std::cerr << "Adding usage to id=" << ref_id.id << " usr=" << ref_usr
      // << " at " << loc.ToString() << std::endl;
      if (loc) {
-        VarId ref_id = db->ToVarId(ref_usr);
+        IndexVarId ref_id = db->ToVarId(ref_usr);
        IndexedVarDef* ref_def = db->Resolve(ref_id);
        AddUsage(ref_def->uses, loc.value());
      }
@ -705,7 +705,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {

      std::string decl_usr = decl_cursor.get_usr();

-      VarId var_id = db->ToVarId(decl->entityInfo->USR);
+      IndexVarId var_id = db->ToVarId(decl->entityInfo->USR);
      IndexedVarDef* var_def = db->Resolve(var_id);

      // TODO: Eventually run with this if. Right now I want to iron out bugs
@ -744,7 +744,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
      // We don't need to assign declaring type multiple times if this variable
      // has already been seen.
      if (!decl->isRedeclaration) {
-        optional<TypeId> var_type = ResolveToDeclarationType(db, decl_cursor);
+        optional<IndexTypeId> var_type = ResolveToDeclarationType(db, decl_cursor);
        if (var_type.has_value()) {
          // Don't treat enum definition variables as instantiations.
          bool is_enum_member = decl->semanticContainer && decl->semanticContainer->cursor.kind == CXCursor_EnumDecl;
@ -757,7 +757,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {

      // TODO: Refactor handlers so more things are under 'if (!decl->isRedeclaration)'
      if (decl->isDefinition && IsTypeDefinition(decl->semanticContainer)) {
-        TypeId declaring_type_id =
+        IndexTypeId declaring_type_id =
          db->ToTypeId(decl->semanticContainer->cursor);
        IndexedTypeDef* declaring_type_def = db->Resolve(declaring_type_id);
        var_def->def.declaring_type = declaring_type_id;
@ -781,7 +781,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
      clang::Cursor resolved =
          decl_cursor.template_specialization_to_template_definition();

-      FuncId func_id = db->ToFuncId(resolved.cx_cursor);
+      IndexFuncId func_id = db->ToFuncId(resolved.cx_cursor);
      IndexedFuncDef* func_def = db->Resolve(func_id);

      AddUsage(func_def->uses, decl_loc_spelling.value());
@ -826,7 +826,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
        // ever
        // be one of those in the entire program.
        if (IsTypeDefinition(decl->semanticContainer)) {
-          TypeId declaring_type_id =
+          IndexTypeId declaring_type_id =
              db->ToTypeId(decl->semanticContainer->cursor);
          IndexedTypeDef* declaring_type_def = db->Resolve(declaring_type_id);
          func_def->def.declaring_type = declaring_type_id;
@ -898,7 +898,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
            // TODO: How to handle multiple parent overrides??
            for (unsigned int i = 0; i < num_overridden; ++i) {
              clang::Cursor parent = overridden[i];
-              FuncId parent_id = db->ToFuncId(parent.get_usr());
+              IndexFuncId parent_id = db->ToFuncId(parent.get_usr());
              IndexedFuncDef* parent_def = db->Resolve(parent_id);
              func_def = db->Resolve(func_id);  // ToFuncId invalidated func_def

@ -931,11 +931,11 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
      // Note we want to fetch the first TypeRef. Running
      // ResolveCursorType(decl->cursor) would return
      // the type of the typedef/using, not the type of the referenced type.
-      optional<TypeId> alias_of =
+      optional<IndexTypeId> alias_of =
          AddDeclTypeUsages(db, decl->cursor, true /*is_interesting*/,
                            decl->semanticContainer, decl->lexicalContainer);

-      TypeId type_id = db->ToTypeId(decl->entityInfo->USR);
+      IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
      IndexedTypeDef* type_def = db->Resolve(type_id);

      if (alias_of)
@ -959,7 +959,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
      if (!decl_loc_spelling)
        break;

-      TypeId type_id = db->ToTypeId(decl->entityInfo->USR);
+      IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
      IndexedTypeDef* type_def = db->Resolve(type_id);

      // TODO: Eventually run with this if. Right now I want to iron out bugs
@ -1003,7 +1003,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {

          AddDeclTypeUsages(db, base_class->cursor, true /*is_interesting*/,
                            decl->semanticContainer, decl->lexicalContainer);
-          optional<TypeId> parent_type_id =
+          optional<IndexTypeId> parent_type_id =
              ResolveToDeclarationType(db, base_class->cursor);
          // type_def ptr could be invalidated by ResolveToDeclarationType.
          IndexedTypeDef* type_def = db->Resolve(type_id);
@ -1095,7 +1095,7 @@ void indexEntityReference(CXClientData client_data,
      clang::Cursor referenced = ref->referencedEntity->cursor;
      referenced = referenced.template_specialization_to_template_definition();

-      VarId var_id = db->ToVarId(referenced.get_usr());
+      IndexVarId var_id = db->ToVarId(referenced.get_usr());
      IndexedVarDef* var_def = db->Resolve(var_id);
      AddUsage(var_def->uses, loc_spelling.value());
      break;
@ -1127,9 +1127,9 @@ void indexEntityReference(CXClientData client_data,
      param->last_func_usage_location = loc_spelling.value();

      // Note: be careful, calling db->ToFuncId invalidates the FuncDef* ptrs.
-      FuncId called_id = db->ToFuncId(ref->referencedEntity->USR);
+      IndexFuncId called_id = db->ToFuncId(ref->referencedEntity->USR);
      if (IsFunction(ref->container->cursor.kind)) {
-        FuncId caller_id = db->ToFuncId(ref->container->cursor);
+        IndexFuncId caller_id = db->ToFuncId(ref->container->cursor);
        IndexedFuncDef* caller_def = db->Resolve(caller_id);
        IndexedFuncDef* called_def = db->Resolve(called_id);

@ -1183,7 +1183,7 @@ void indexEntityReference(CXClientData client_data,

      clang::Cursor referenced = ref->referencedEntity->cursor;
      referenced = referenced.template_specialization_to_template_definition();
-      TypeId referenced_id = db->ToTypeId(referenced.get_usr());
+      IndexTypeId referenced_id = db->ToTypeId(referenced.get_usr());

      IndexedTypeDef* referenced_def = db->Resolve(referenced_id);

--- a/src/indexer.h
+++ b/src/indexer.h
@ -50,10 +50,9 @@ bool operator==(const Id<T>& a, const Id<T>& b) {
  return a.id == b.id;
 }

-struct _FakeFileType {};
-using TypeId = Id<IndexedTypeDef>;
-using FuncId = Id<IndexedFuncDef>;
-using VarId = Id<IndexedVarDef>;
+using IndexTypeId = Id<IndexedTypeDef>;
+using IndexFuncId = Id<IndexedFuncDef>;
+using IndexVarId = Id<IndexedVarDef>;

 struct IdCache;

@ -167,18 +166,18 @@ void Reflect(TVisitor& visitor,
 }

 struct IndexedTypeDef {
-  using Def = TypeDefDefinitionData<TypeId, FuncId, VarId, Range>;
+  using Def = TypeDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
  Def def;

-  TypeId id;
+  IndexTypeId id;

  // Immediate derived types.
-  std::vector<TypeId> derived;
+  std::vector<IndexTypeId> derived;

  // Declared variables of this type.
  // TODO: this needs a lot more work and lots of tests.
  // TODO: add instantiation on ctor / dtor, do not add instantiation if type is ptr
-  std::vector<VarId> instantiations;
+  std::vector<IndexVarId> instantiations;

  // Every usage, useful for things like renames.
  // NOTE: Do not insert directly! Use AddUsage instead.
@ -186,7 +185,7 @@ struct IndexedTypeDef {

  IndexedTypeDef() : def("") {}  // For serialization

-  IndexedTypeDef(TypeId id, const std::string& usr);
+  IndexedTypeDef(IndexTypeId id, const std::string& usr);

  bool HasInterestingState() const {
    return
@ -273,16 +272,16 @@ void Reflect(
 }

 struct IndexedFuncDef {
-  using Def = FuncDefDefinitionData<TypeId, FuncId, VarId, FuncRef, Range>;
+  using Def = FuncDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, FuncRef, Range>;
  Def def;

-  FuncId id;
+  IndexFuncId id;

  // Places the function is forward-declared.
  std::vector<Range> declarations;

  // Methods which directly override this one.
-  std::vector<FuncId> derived;
+  std::vector<IndexFuncId> derived;

  // Functions which call this one.
  // TODO: Functions can get called outside of just functions - for example,
@ -296,7 +295,7 @@ struct IndexedFuncDef {
  std::vector<Range> uses;

  IndexedFuncDef() {}  // For reflection.
-  IndexedFuncDef(FuncId id, const std::string& usr) : def(usr), id(id) {
+  IndexedFuncDef(IndexFuncId id, const std::string& usr) : def(usr), id(id) {
    // assert(usr.size() > 0);
  }

@ -376,17 +375,17 @@ void Reflect(TVisitor& visitor,
 }

 struct IndexedVarDef {
-  using Def = VarDefDefinitionData<TypeId, FuncId, VarId, Range>;
+  using Def = VarDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
  Def def;

-  VarId id;
+  IndexVarId id;

  // Usages.
  std::vector<Range> uses;

  IndexedVarDef() : def("") {}  // For serialization

-  IndexedVarDef(VarId id, const std::string& usr) : def(usr), id(id) {
+  IndexedVarDef(IndexVarId id, const std::string& usr) : def(usr), id(id) {
    // assert(usr.size() > 0);
  }

@ -405,12 +404,12 @@ MAKE_HASHABLE(IndexedVarDef, t.def.usr);

 struct IdCache {
  std::string primary_file;
-  std::unordered_map<std::string, TypeId> usr_to_type_id;
-  std::unordered_map<std::string, FuncId> usr_to_func_id;
-  std::unordered_map<std::string, VarId> usr_to_var_id;
-  std::unordered_map<TypeId, std::string> type_id_to_usr;
-  std::unordered_map<FuncId, std::string> func_id_to_usr;
-  std::unordered_map<VarId, std::string> var_id_to_usr;
+  std::unordered_map<std::string, IndexTypeId> usr_to_type_id;
+  std::unordered_map<std::string, IndexFuncId> usr_to_func_id;
+  std::unordered_map<std::string, IndexVarId> usr_to_var_id;
+  std::unordered_map<IndexTypeId, std::string> type_id_to_usr;
+  std::unordered_map<IndexFuncId, std::string> func_id_to_usr;
+  std::unordered_map<IndexVarId, std::string> var_id_to_usr;

  IdCache(const std::string& primary_file);

@ -436,15 +435,15 @@ struct IndexedFile {

  IndexedFile(const std::string& path);

-  TypeId ToTypeId(const std::string& usr);
-  FuncId ToFuncId(const std::string& usr);
-  VarId ToVarId(const std::string& usr);
-  TypeId ToTypeId(const CXCursor& usr);
-  FuncId ToFuncId(const CXCursor& usr);
-  VarId ToVarId(const CXCursor& usr);
-  IndexedTypeDef* Resolve(TypeId id);
-  IndexedFuncDef* Resolve(FuncId id);
-  IndexedVarDef* Resolve(VarId id);
+  IndexTypeId ToTypeId(const std::string& usr);
+  IndexFuncId ToFuncId(const std::string& usr);
+  IndexVarId ToVarId(const std::string& usr);
+  IndexTypeId ToTypeId(const CXCursor& usr);
+  IndexFuncId ToFuncId(const CXCursor& usr);
+  IndexVarId ToVarId(const CXCursor& usr);
+  IndexedTypeDef* Resolve(IndexTypeId id);
+  IndexedFuncDef* Resolve(IndexFuncId id);
+  IndexedVarDef* Resolve(IndexVarId id);

  std::string ToString();
 };
--- a/src/query.cc
+++ b/src/query.cc
@ -14,6 +14,21 @@
 // TODO: Make all copy constructors explicit.


+struct IdGlobalizer {
+  // TODO threading model
+  //  - [querydb] Create IdGlobalizer mapping from every id registered in local_ids
+  //  - [indexer] Create IndexUpdate using IdGlobalizer cached state
+  //  - [querydb] Apply IndexUpdate
+  //
+  // Then lookup in cached_* should *never* fail.
+
+  const IdCache& local_ids;
+  QueryFileId index_file_id;
+  std::unordered_map<IndexTypeId, QueryTypeId> cached_type_ids_;
+  std::unordered_map<IndexFuncId, QueryFuncId> cached_func_ids_;
+  std::unordered_map<IndexVarId, QueryVarId> cached_var_ids_;
+};
+



@ -26,37 +41,36 @@ std::vector<Out> Transform(const std::vector<In>& input, std::function<Out(In)>
  return result;
 }

-// TODO: These functions are failing. Investigate why.
-Usr MapIdToUsr(const IdCache& id_cache, const TypeId& id) {
+
+Usr MapIdToUsr(const IdCache& id_cache, const IndexTypeId& id) {
  assert(id_cache.type_id_to_usr.find(id) != id_cache.type_id_to_usr.end());
  return id_cache.type_id_to_usr.find(id)->second;
 }
-Usr MapIdToUsr(const IdCache& id_cache, const FuncId& id) {
+Usr MapIdToUsr(const IdCache& id_cache, const IndexFuncId& id) {
  assert(id_cache.func_id_to_usr.find(id) != id_cache.func_id_to_usr.end());
  return id_cache.func_id_to_usr.find(id)->second;
 }
-Usr MapIdToUsr(const IdCache& id_cache, const VarId& id) {
+Usr MapIdToUsr(const IdCache& id_cache, const IndexVarId& id) {
  assert(id_cache.var_id_to_usr.find(id) != id_cache.var_id_to_usr.end());
  return id_cache.var_id_to_usr.find(id)->second;
 }
 QueryableLocation MapIdToUsr(const IdCache& id_cache, const Range& range) {
  return QueryableLocation(id_cache.primary_file, range);
 }
-
-std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<TypeId>& ids) {
-  return Transform<TypeId, Usr>(ids, [&](TypeId id) {
+std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexTypeId>& ids) {
+  return Transform<IndexTypeId, Usr>(ids, [&](IndexTypeId id) {
    assert(id_cache.type_id_to_usr.find(id) != id_cache.type_id_to_usr.end());
    return id_cache.type_id_to_usr.find(id)->second;
  });
 }
-std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<FuncId>& ids) {
-  return Transform<FuncId, Usr>(ids, [&](FuncId id) {
+std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexFuncId>& ids) {
+  return Transform<IndexFuncId, Usr>(ids, [&](IndexFuncId id) {
    assert(id_cache.func_id_to_usr.find(id) != id_cache.func_id_to_usr.end());
    return id_cache.func_id_to_usr.find(id)->second;
  });
 }
-std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<VarId>& ids) {
-  return Transform<VarId, Usr>(ids, [&](VarId id) {
+std::vector<Usr> MapIdToUsr(const IdCache& id_cache, const std::vector<IndexVarId>& ids) {
+  return Transform<IndexVarId, Usr>(ids, [&](IndexVarId id) {
    assert(id_cache.var_id_to_usr.find(id) != id_cache.var_id_to_usr.end());
    return id_cache.var_id_to_usr.find(id)->second;
  });
@ -299,7 +313,7 @@ bool ComputeDifferenceForUpdate(
    previous.begin(), previous.end(),
    current.begin(), current.end(),
    std::back_inserter(*removed));
-  // Returns the elmeents in |current| that are not in |previous|.
+  // Returns the elements in |current| that are not in |previous|.
  std::set_difference(
    current.begin(), current.end(),
    previous.begin(), previous.end(),
@ -321,13 +335,7 @@ void CompareGroups(
  while (prev_it != previous_data.end() && curr_it != current_data.end()) {
    // same id
    if (prev_it->def.usr == curr_it->def.usr) {
-      //if (!prev_it->is_bad_def && !curr_it->is_bad_def)
      on_found(&*prev_it, &*curr_it);
-      //else if (prev_it->is_bad_def)
-      //  on_added(&*curr_it);
-      //else if (curr_it->is_bad_def)
-      //  on_removed(&*curr_it);
-
      ++prev_it;
      ++curr_it;
    }
@ -665,11 +673,6 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
 //             are joined to. So that way even if the main db is busy we can
 //             still be joining. Joining the partially joined db to the main
 //             db should be faster since we will have larger data lanes to use.
-// TODO: I think we can run libclang multiple times in one process. So we might
-//       only need two processes. Still, for perf reasons it would be good if
-//       we could stay in one process. We could probably just use shared
-//       memory. May want to run libclang in separate process to protect from
-//       crashes/issues there.
 // TODO: allow user to store configuration as json? file in home dir; also
 //       allow local overrides (scan up dirs)
 // TODO: add opt to dump config when starting (--dump-config)
--- a/src/query.h
+++ b/src/query.h
@ -5,6 +5,16 @@

 using Usr = std::string;

+struct QueryableFile;
+struct QueryableTypeDef;
+struct QueryableFuncDef;
+struct QueryableVarDef;
+
+using QueryFileId = Id<QueryableFile>;
+using QueryTypeId = Id<QueryableTypeDef>;
+using QueryFuncId = Id<QueryableFuncDef>;
+using QueryVarId = Id<QueryableVarDef>;
+
 // TODO: in types, store refs separately from irefs. Then we can drop
 // 'interesting' from location when that is cleaned up.