From 69b1dcadaa2c4c11d75fc9b937a2f4f0e833e553 Mon Sep 17 00:00:00 2001
From: Jacob Dufault <jacobdufault@gmail.com>
Date: Thu, 6 Apr 2017 22:42:57 -0700
Subject: [PATCH] cleanup some query code to prepare for space optimizations

---
 src/command_line.cc |  18 +--
 src/query.cc        | 289 ++++++++++++++++++++------------------------
 src/query.h         | 138 +++++++--------------
 src/string_db.cc    |  40 ++++++
 src/string_db.h     | 154 +++++++++++++++++++++++
 5 files changed, 377 insertions(+), 262 deletions(-)
 create mode 100644 src/string_db.cc
 create mode 100644 src/string_db.h
diff --git a/src/command_line.cc b/src/command_line.cc
index 4f36b80e..0c7e3a95 100644
--- a/src/command_line.cc
+++ b/src/command_line.cc
@@ -272,7 +272,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) {
       // from the primary file though, so that should be ok. We need to cleanup indexer output.
       optional<IndexedFile> old_index = LoadCachedFile(request->path);
       if (old_index.has_value()) {
-        IndexUpdate update(old_index.value());
+        IndexUpdate update = IndexUpdate::CreateImport(old_index.value());
         IndexTranslationUnitResponse response(update);
         responses->Enqueue(response);
         time.ResetAndPrint("Loading cached index");
@@ -296,7 +296,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) {
     time.ResetAndPrint("Loading previous index");
     if (old_index) {
       // Apply delta update.
-      IndexUpdate update(old_index.value(), new_index);
+      IndexUpdate update = IndexUpdate::CreateDelta(old_index.value(), new_index);
       IndexTranslationUnitResponse response(update);
       time.ResetAndPrint("Creating delta index update/response");
       responses->Enqueue(response);
@@ -304,7 +304,7 @@ void IndexMain(IndexRequestQueue* requests, IndexResponseQueue* responses) {
     }
     else {
       // Apply full update.
-      IndexUpdate update(new_index);
+      IndexUpdate update = IndexUpdate::CreateImport(new_index);
       IndexTranslationUnitResponse response(update);
       time.ResetAndPrint("Creating index update/response");
       responses->Enqueue(response);
@@ -322,7 +322,7 @@ QueryableFile* FindFile(QueryableDatabase* db, const std::string& filename) {
   // TODO: hashmap lookup.
   for (auto& file : db->files) {
     // std::cerr << " - Have file " << file.file_id << std::endl;
-    if (file.file_id == filename) {
+    if (file.def.usr == filename) {
       //std::cerr << "Found file " << filename << std::endl;
       return &file;
     }
@@ -516,7 +516,7 @@ void QueryDbMainLoop(
       int target_line = msg->params.position.line + 1;
       int target_column = msg->params.position.character + 1;
 
-      for (const UsrRef& ref : file->all_symbols) {
+      for (const UsrRef& ref : file->def.all_symbols) {
         if (ref.loc.start.line >= target_line && ref.loc.end.line <= target_line &&
             ref.loc.start.column <= target_column && ref.loc.end.column >= target_column) {
           optional<QueryableRange> location = GetDefinitionSpellingOfUsr(db, ref.usr);
@@ -542,8 +542,8 @@ void QueryDbMainLoop(
         break;
       }
 
-      std::cerr << "File outline size is " << file->outline.size() << std::endl;
-      for (UsrRef ref : file->outline) {
+      std::cerr << "File outline size is " << file->def.outline.size() << std::endl;
+      for (UsrRef ref : file->def.outline) {
         SymbolIdx symbol = db->usr_to_symbol[ref.usr];
 
         lsSymbolInformation info;
@@ -606,7 +606,7 @@ void QueryDbMainLoop(
         break;
       }
 
-      for (UsrRef ref : file->outline) {
+      for (UsrRef ref : file->def.outline) {
         // NOTE: We OffsetColumn so that the code lens always show up in a
         // predictable order. Otherwise, the client may randomize it.
 
@@ -950,7 +950,7 @@ int main(int argc, char** argv) {
   //bool loop = true;
   //while (loop)
   //  std::this_thread::sleep_for(std::chrono::milliseconds(10));
-  //std::this_thread::sleep_for(std::chrono::seconds(3));
+  std::this_thread::sleep_for(std::chrono::seconds(3));
 
   PlatformInit();
   RegisterMessageTypes();
diff --git a/src/query.cc b/src/query.cc
index 01e42bb3..66eabd14 100644
--- a/src/query.cc
+++ b/src/query.cc
@@ -17,12 +17,6 @@
 
 
 
-
-// NOTE: When not inside of a |def| object, there can be duplicates of the same
-//       information if that information is contributed from separate sources.
-//       If we need to avoid this duplication in the future, we will have to
-//       add a refcount.
-
 template<typename In, typename Out>
 std::vector<Out> Transform(const std::vector<In>& input, std::function<Out(In)> op) {
   std::vector<Out> result;
@@ -137,21 +131,15 @@ QueryableVarDef::DefUpdate MapIdToUsr(const IdCache& id_cache, const IndexedVarD
   return result;
 }
 
-QueryableFile::QueryableFile(const IndexedFile& indexed)
-  : file_id(indexed.path) {
+QueryableFile::Def BuildFileDef(const IndexedFile& indexed) {
+  QueryableFile::Def def;
+  def.usr = indexed.path;
 
-  // TODO: investigate this
-  //std::cerr << "Adding QueryableFile for " << indexed.path
-  //          << ", file_path_to_file_id.size()=" << indexed.id_cache.file_path_to_file_id.size() << std::endl;
-  //for (auto& entry : indexed.id_cache.file_path_to_file_id)
-  //  std::cerr << "-" << entry.first << std::endl;
-  //assert(indexed.id_cache.file_path_to_file_id.find(indexed.path) !=
-  //       indexed.id_cache.file_path_to_file_id.end());
-  auto add_outline = [this, &indexed](Usr usr, Range range) {
-    outline.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range)));
+  auto add_outline = [&def, &indexed](Usr usr, Range range) {
+    def.outline.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range)));
   };
-  auto add_all_symbols = [this, &indexed](Usr usr, Range range) {
-    all_symbols.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range)));
+  auto add_all_symbols = [&def, &indexed](Usr usr, Range range) {
+    def.all_symbols.push_back(UsrRef(usr, MapIdToUsr(indexed.id_cache, range)));
   };
 
   for (const IndexedTypeDef& def : indexed.types) {
@@ -183,14 +171,19 @@ QueryableFile::QueryableFile(const IndexedFile& indexed)
       add_all_symbols(def.def.usr, use);
   }
 
-  std::sort(outline.begin(), outline.end(), [](const UsrRef& a, const UsrRef& b) {
+  std::sort(def.outline.begin(), def.outline.end(), [](const UsrRef& a, const UsrRef& b) {
     return a.loc.start < b.loc.start;
   });
-  std::sort(all_symbols.begin(), all_symbols.end(), [](const UsrRef& a, const UsrRef& b) {
+  std::sort(def.all_symbols.begin(), def.all_symbols.end(), [](const UsrRef& a, const UsrRef& b) {
     return a.loc.start < b.loc.start;
   });
+
+  return def;
 }
 
+QueryableFile::QueryableFile(const IndexedFile& indexed)
+  : def(BuildFileDef(indexed)) {}
+
 QueryableTypeDef::QueryableTypeDef(IdCache& id_cache, const IndexedTypeDef& indexed)
   : def(MapIdToUsr(id_cache, indexed.def)) {
   derived = MapIdToUsr(id_cache, indexed.derived);
@@ -224,8 +217,6 @@ QueryableVarDef::QueryableVarDef(IdCache& id_cache, const IndexedVarDef& indexed
 //       the pain of a global UsrDb less by
 //          (parallel)clangindex -> (main)commit USRs to global -> (parallel)transfer IDs to global USRs -> (main)import
 
-// TODO: remove GroupId concept.
-
 struct CachedIndexedFile {
   // Path to the file indexed.
   std::string path;
@@ -388,21 +379,50 @@ void CompareGroups(
 
 
 
-
+#if false
 IndexUpdate::IndexUpdate(IndexedFile& file) {
-  // TODO: Do not add empty data (ie, def has nothing but USR)
+  // TODO: use delta constructor with an empty file.
 
-  files_added.push_back(QueryableFile(file));
-  for (const IndexedTypeDef& def : file.types) {
-    types_added.push_back(QueryableTypeDef(file.id_cache, def));
+  auto file_def = BuildFileDef(file);
+  files_def_update.push_back(file_def);
+
+  for (const IndexedTypeDef& indexed : file.types) {
+    QueryableTypeDef query(file.id_cache, indexed);
+    types_def_update.push_back(query.def);
+    types_derived.push_back(QueryableTypeDef::DerivedUpdate(query.def.usr, query.derived));
+    types_instantiations.push_back(QueryableTypeDef::InstantiationsUpdate(query.def.usr, query.instantiations));
+    types_uses.push_back(QueryableTypeDef::UsesUpdate(query.def.usr, query.uses));
   }
-  for (const IndexedFuncDef& def : file.funcs) {
-    funcs_added.push_back(QueryableFuncDef(file.id_cache, def));
+
+  for (const IndexedFuncDef& indexed : file.funcs) {
+    QueryableFuncDef query(file.id_cache, indexed);
+    funcs_def_update.push_back(query.def);
+    funcs_declarations.push_back(QueryableFuncDef::DeclarationsUpdate(query.def.usr, query.declarations));
+    funcs_derived.push_back(QueryableFuncDef::DerivedUpdate(query.def.usr, query.derived));
+    funcs_callers.push_back(QueryableFuncDef::CallersUpdate(query.def.usr, query.callers));
+    funcs_uses.push_back(QueryableFuncDef::UsesUpdate(query.def.usr, query.uses));
   }
-  for (const IndexedVarDef& def : file.vars) {
-    vars_added.push_back(QueryableVarDef(file.id_cache, def));
+
+  for (const IndexedVarDef& indexed : file.vars) {
+    QueryableVarDef query(file.id_cache, indexed);
+    vars_def_update.push_back(query.def);
+    vars_uses.push_back(QueryableVarDef::UsesUpdate(query.def.usr, query.uses));
   }
 }
+#endif
+
+// static
+IndexUpdate IndexUpdate::CreateImport(IndexedFile& file) {
+  // Return standard diff constructor but with an empty file so everything is
+  // added.
+  IndexedFile previous(file.path);
+  return IndexUpdate(previous, file);
+}
+
+// static
+IndexUpdate IndexUpdate::CreateDelta(IndexedFile& current, IndexedFile& updated) {
+  return IndexUpdate(current, updated);
+}
 
 IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file) {
   // |query_name| is the name of the variable on the query type.
@@ -423,24 +443,8 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file)
     } \
   }
 
-
   // File
-  do {
-    // Outline is a special property and needs special handling, because it is a computed property
-    // of the IndexedFile (ie, to view it we need to construct a QueryableFile instance).
-    assert(previous_file.path == current_file.path);
-    QueryableFile previous_queryable_file(previous_file);
-    QueryableFile current_queryable_file(previous_file);
-    std::vector<UsrRef> removed, added;
-    bool did_add = ComputeDifferenceForUpdate(
-      previous_queryable_file.outline,
-      current_queryable_file.outline,
-      &removed, &added);
-    if (did_add) {
-      std::cerr << "Adding mergeable update on outline (" << current_file.path << ")" << std::endl;
-      files_outline.push_back(MergeableUpdate<UsrRef>(current_file.path, removed, added));
-    }
-  } while (false); // do while false instead of just {} to appease Visual Studio code formatter.
+  files_def_update.push_back(BuildFileDef(current_file));
 
   // Types
   CompareGroups<IndexedTypeDef>(previous_file.types, current_file.types,
@@ -448,15 +452,20 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file)
     types_removed.push_back(def->def.usr);
   },
     /*onAdded:*/[this, &current_file](IndexedTypeDef* def) {
-    types_added.push_back(QueryableTypeDef(current_file.id_cache, *def));
+    QueryableTypeDef query(current_file.id_cache, *def);
+    types_def_update.push_back(query.def);
+    types_derived.push_back(QueryableTypeDef::DerivedUpdate(query.def.usr, query.derived));
+    types_instantiations.push_back(QueryableTypeDef::InstantiationsUpdate(query.def.usr, query.instantiations));
+    types_uses.push_back(QueryableTypeDef::UsesUpdate(query.def.usr, query.uses));
   },
     /*onFound:*/[this, &previous_file, &current_file](IndexedTypeDef* previous_def, IndexedTypeDef* current_def) {
     QueryableTypeDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def);
     QueryableTypeDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def);
     if (previous_remapped_def != current_remapped_def)
-      types_def_changed.push_back(current_remapped_def);
+      types_def_update.push_back(current_remapped_def);
 
     PROCESS_UPDATE_DIFF(types_derived, derived, Usr);
+    PROCESS_UPDATE_DIFF(types_instantiations, instantiations, Usr);
     PROCESS_UPDATE_DIFF(types_uses, uses, QueryableRange);
   });
 
@@ -466,13 +475,18 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file)
     funcs_removed.push_back(def->def.usr);
   },
     /*onAdded:*/[this, &current_file](IndexedFuncDef* def) {
-    funcs_added.push_back(QueryableFuncDef(current_file.id_cache, *def));
+    QueryableFuncDef query(current_file.id_cache, *def);
+    funcs_def_update.push_back(query.def);
+    funcs_declarations.push_back(QueryableFuncDef::DeclarationsUpdate(query.def.usr, query.declarations));
+    funcs_derived.push_back(QueryableFuncDef::DerivedUpdate(query.def.usr, query.derived));
+    funcs_callers.push_back(QueryableFuncDef::CallersUpdate(query.def.usr, query.callers));
+    funcs_uses.push_back(QueryableFuncDef::UsesUpdate(query.def.usr, query.uses));
   },
     /*onFound:*/[this, &previous_file, &current_file](IndexedFuncDef* previous_def, IndexedFuncDef* current_def) {
     QueryableFuncDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def);
     QueryableFuncDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def);
     if (previous_remapped_def != current_remapped_def)
-      funcs_def_changed.push_back(current_remapped_def);
+      funcs_def_update.push_back(current_remapped_def);
 
     PROCESS_UPDATE_DIFF(funcs_declarations, declarations, QueryableRange);
     PROCESS_UPDATE_DIFF(funcs_derived, derived, Usr);
@@ -486,13 +500,15 @@ IndexUpdate::IndexUpdate(IndexedFile& previous_file, IndexedFile& current_file)
     vars_removed.push_back(def->def.usr);
   },
     /*onAdded:*/[this, &current_file](IndexedVarDef* def) {
-    vars_added.push_back(QueryableVarDef(current_file.id_cache, *def));
+    QueryableVarDef query(current_file.id_cache, *def);
+    vars_def_update.push_back(query.def);
+    vars_uses.push_back(QueryableVarDef::UsesUpdate(query.def.usr, query.uses));
   },
     /*onFound:*/[this, &previous_file, &current_file](IndexedVarDef* previous_def, IndexedVarDef* current_def) {
     QueryableVarDef::DefUpdate previous_remapped_def = MapIdToUsr(previous_file.id_cache, previous_def->def);
     QueryableVarDef::DefUpdate current_remapped_def = MapIdToUsr(current_file.id_cache, current_def->def);
     if (previous_remapped_def != current_remapped_def)
-      vars_def_changed.push_back(current_remapped_def);
+      vars_def_update.push_back(current_remapped_def);
 
     PROCESS_UPDATE_DIFF(vars_uses, uses, QueryableRange);
   });
@@ -505,28 +521,23 @@ void IndexUpdate::Merge(const IndexUpdate& update) {
     AddRange(&name, update.name);
 
   INDEX_UPDATE_MERGE(files_removed);
-  INDEX_UPDATE_MERGE(files_added);
-  INDEX_UPDATE_MERGE(files_outline);
-  INDEX_UPDATE_MERGE(files_all_symbols);
+  INDEX_UPDATE_MERGE(files_def_update);
 
   INDEX_UPDATE_MERGE(types_removed);
-  INDEX_UPDATE_MERGE(types_added);
-  INDEX_UPDATE_MERGE(types_def_changed);
+  INDEX_UPDATE_MERGE(types_def_update);
   INDEX_UPDATE_MERGE(types_derived);
   INDEX_UPDATE_MERGE(types_instantiations);
   INDEX_UPDATE_MERGE(types_uses);
 
   INDEX_UPDATE_MERGE(funcs_removed);
-  INDEX_UPDATE_MERGE(funcs_added);
-  INDEX_UPDATE_MERGE(funcs_def_changed);
+  INDEX_UPDATE_MERGE(funcs_def_update);
   INDEX_UPDATE_MERGE(funcs_declarations);
   INDEX_UPDATE_MERGE(funcs_derived);
   INDEX_UPDATE_MERGE(funcs_callers);
   INDEX_UPDATE_MERGE(funcs_uses);
 
   INDEX_UPDATE_MERGE(vars_removed);
-  INDEX_UPDATE_MERGE(vars_added);
-  INDEX_UPDATE_MERGE(vars_def_changed);
+  INDEX_UPDATE_MERGE(vars_def_update);
   INDEX_UPDATE_MERGE(vars_uses);
 
 #undef INDEX_UPDATE_MERGE
@@ -552,8 +563,6 @@ void IndexUpdate::Merge(const IndexUpdate& update) {
 
 
 
-
-
 
 
 
@@ -565,107 +574,93 @@ void QueryableDatabase::RemoveUsrs(const std::vector<Usr>& to_remove) {
   // TODO: also remove from qualified_names?
 }
 
-void QueryableDatabase::Import(const std::vector<QueryableFile>& defs) {
-  for (auto& def : defs) {
-    auto it = usr_to_symbol.find(def.file_id);
+void QueryableDatabase::ImportOrUpdate(const std::vector<QueryableFile::DefUpdate>& updates) {
+  for (auto& def : updates) {
+    auto it = usr_to_symbol.find(def.usr);
     if (it == usr_to_symbol.end()) {
-      qualified_names.push_back(def.file_id);
+      qualified_names.push_back(def.usr);
       symbols.push_back(SymbolIdx(SymbolKind::File, files.size()));
-      usr_to_symbol[def.file_id] = SymbolIdx(SymbolKind::File, files.size());
+      usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::File, files.size());
 
-      files.push_back(def);
+      QueryableFile query;
+      query.def = def;
+      files.push_back(query);
     }
     else {
       QueryableFile& existing = files[it->second.idx];
-      // Replace the entire file. We don't ever want to merge files.
-      existing = def;
+      existing.def = def;
     }
   }
 }
 
-void QueryableDatabase::Import(const std::vector<QueryableTypeDef>& defs) {
-  for (auto& def : defs) {
-    auto it = usr_to_symbol.find(def.def.usr);
+void QueryableDatabase::ImportOrUpdate(const std::vector<QueryableTypeDef::DefUpdate>& updates) {
+  for (auto& def : updates) {
+    auto it = usr_to_symbol.find(def.usr);
     if (it == usr_to_symbol.end()) {
-      qualified_names.push_back(def.def.qualified_name);
+      qualified_names.push_back(def.qualified_name);
       symbols.push_back(SymbolIdx(SymbolKind::Type, types.size()));
-      usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Type, types.size());
-      types.push_back(def);
+      usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Type, types.size());
+
+      QueryableTypeDef query;
+      query.def = def;
+      types.push_back(query);
     }
     else {
       QueryableTypeDef& existing = types[it->second.idx];
-      if (def.def.definition_extent)
-        existing.def = def.def;
-      AddRange(&existing.derived, def.derived);
-      AddRange(&existing.uses, def.uses);
+      if (def.definition_extent)
+        existing.def = def;
     }
   }
 }
 
-void QueryableDatabase::Import(const std::vector<QueryableFuncDef>& defs) {
-  for (auto& def : defs) {
-    auto it = usr_to_symbol.find(def.def.usr);
+void QueryableDatabase::ImportOrUpdate(const std::vector<QueryableFuncDef::DefUpdate>& updates) {
+  for (auto& def : updates) {
+    auto it = usr_to_symbol.find(def.usr);
     if (it == usr_to_symbol.end()) {
-      qualified_names.push_back(def.def.qualified_name);
+      qualified_names.push_back(def.qualified_name);
       symbols.push_back(SymbolIdx(SymbolKind::Func, funcs.size()));
-      usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Func, funcs.size());
-      funcs.push_back(def);
+      usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Func, funcs.size());
+
+      QueryableFuncDef query;
+      query.def = def;
+      funcs.push_back(query);
     }
     else {
       QueryableFuncDef& existing = funcs[it->second.idx];
-      if (def.def.definition_extent)
-        existing.def = def.def;
-      AddRange(&existing.callers, def.callers);
-      AddRange(&existing.declarations, def.declarations);
-      AddRange(&existing.derived, def.derived);
-      AddRange(&existing.uses, def.uses);
+      if (def.definition_extent)
+        existing.def = def;
     }
   }
 }
 
-void QueryableDatabase::Import(const std::vector<QueryableVarDef>& defs) {
-  for (auto& def : defs) {
-    auto it = usr_to_symbol.find(def.def.usr);
+void QueryableDatabase::ImportOrUpdate(const std::vector<QueryableVarDef::DefUpdate>& updates) {
+  for (auto& def : updates) {
+    auto it = usr_to_symbol.find(def.usr);
     if (it == usr_to_symbol.end()) {
-      qualified_names.push_back(def.def.qualified_name);
+      qualified_names.push_back(def.qualified_name);
       symbols.push_back(SymbolIdx(SymbolKind::Var, vars.size()));
-      usr_to_symbol[def.def.usr] = SymbolIdx(SymbolKind::Var, vars.size());
-      vars.push_back(def);
+      usr_to_symbol[def.usr] = SymbolIdx(SymbolKind::Var, vars.size());
+
+      QueryableVarDef query;
+      query.def = def;
+      vars.push_back(query);
     }
     else {
       QueryableVarDef& existing = vars[it->second.idx];
-      if (def.def.definition_extent)
-        existing.def = def.def;
-      AddRange(&existing.uses, def.uses);
+      if (def.definition_extent)
+        existing.def = def;
     }
   }
 }
 
-void QueryableDatabase::Update(const std::vector<QueryableTypeDef::DefUpdate>& updates) {
-  for (auto& def : updates) {
-    SymbolIdx idx = usr_to_symbol[def.usr];
-    assert(idx.kind == SymbolKind::Type);
-    types[idx.idx].def = def;
-  }
-}
-
-void QueryableDatabase::Update(const std::vector<QueryableFuncDef::DefUpdate>& updates) {
-  for (auto& def : updates) {
-    SymbolIdx idx = usr_to_symbol[def.usr];
-    assert(idx.kind == SymbolKind::Func);
-    funcs[idx.idx].def = def;
-  }
-}
-
-void QueryableDatabase::Update(const std::vector<QueryableVarDef::DefUpdate>& updates) {
-  for (auto& def : updates) {
-    SymbolIdx idx = usr_to_symbol[def.usr];
-    assert(idx.kind == SymbolKind::Var);
-    vars[idx.idx].def = def;
-  }
-}
-
 void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
+#define HANDLE_REPLACEMENT(update_var_name, def_var_name, storage_name) \
+  for (auto replacement_update : update->update_var_name) { \
+    SymbolIdx index = usr_to_symbol[replacement_update.usr]; \
+    auto* def = &storage_name[index.idx]; \
+    def->def_var_name = replacement_update.entries; \
+  }
+
 #define HANDLE_MERGEABLE(update_var_name, def_var_name, storage_name) \
   for (auto merge_update : update->update_var_name) { \
     SymbolIdx index = usr_to_symbol[merge_update.usr]; \
@@ -675,26 +670,22 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
   }
 
   RemoveUsrs(update->files_removed);
-  Import(update->files_added);
-  HANDLE_MERGEABLE(files_outline, outline, files);
+  ImportOrUpdate(update->files_def_update);
 
   RemoveUsrs(update->types_removed);
-  Import(update->types_added);
-  Update(update->types_def_changed);
+  ImportOrUpdate(update->types_def_update);
   HANDLE_MERGEABLE(types_derived, derived, types);
   HANDLE_MERGEABLE(types_uses, uses, types);
 
   RemoveUsrs(update->funcs_removed);
-  Import(update->funcs_added);
-  Update(update->funcs_def_changed);
+  ImportOrUpdate(update->funcs_def_update);
   HANDLE_MERGEABLE(funcs_declarations, declarations, funcs);
   HANDLE_MERGEABLE(funcs_derived, derived, funcs);
   HANDLE_MERGEABLE(funcs_callers, callers, funcs);
   HANDLE_MERGEABLE(funcs_uses, uses, funcs);
 
   RemoveUsrs(update->vars_removed);
-  Import(update->vars_added);
-  Update(update->vars_def_changed);
+  ImportOrUpdate(update->vars_def_update);
   HANDLE_MERGEABLE(vars_uses, uses, vars);
 
 #undef HANDLE_MERGEABLE
@@ -708,30 +699,6 @@ void QueryableDatabase::ApplyIndexUpdate(IndexUpdate* update) {
 
 
 
-
-int main233(int argc, char** argv) {
-  IndexedFile indexed_file_a = Parse("full_tests/index_delta/a_v0.cc", {});
-  std::cerr << indexed_file_a.ToString() << std::endl;
-
-  std::cerr << std::endl;
-  IndexedFile indexed_file_b = Parse("full_tests/index_delta/a_v1.cc", {});
-  std::cerr << indexed_file_b.ToString() << std::endl;
-  // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate.
-  IndexUpdate import(indexed_file_a);
-  /*
-  dest_ids.Import(indexed_file_b.file_db, indexed_file_b.id_cache);
-  IndexUpdate update = ComputeDiff(indexed_file_a, indexed_file_b);
-  */
-  QueryableDatabase db;
-  db.ApplyIndexUpdate(&import);
-  //db.ApplyIndexUpdate(&update);
-
-  return 0;
-}
-
-
-
-
 // TODO: Idea: when indexing and joining to the main db, allow many dbs that
 //             are joined to. So that way even if the main db is busy we can
 //             still be joining. Joining the partially joined db to the main
diff --git a/src/query.h b/src/query.h
index ed4d8f2b..224199cb 100644
--- a/src/query.h
+++ b/src/query.h
@@ -5,6 +5,9 @@
 
 using Usr = std::string;
 
+// TODO: in types, store refs separately from irefs. Then we can drop
+// 'interesting' from location when that is cleaned up.
+
 // TODO: Switch over to QueryableLocation. Figure out if there is
 //       a good way to get the indexer using it. I don't think so
 //       since we may discover more files while indexing a file.
@@ -42,7 +45,6 @@ struct QueryableLocation {
       interesting < o.interesting;
   }
 };
-MAKE_REFLECT_STRUCT(QueryableLocation, path, line, column, interesting);
 
 struct QueryableRange {
   QueryableLocation start;
@@ -64,7 +66,6 @@ struct QueryableRange {
     return start < o.start;
   }
 };
-MAKE_REFLECT_STRUCT(QueryableRange, start, end);
 
 struct UsrRef {
   Usr usr;
@@ -81,7 +82,6 @@ struct UsrRef {
     return usr < other.usr && loc.start < other.loc.start;
   }
 };
-MAKE_REFLECT_STRUCT(UsrRef, usr, loc);
 
 // There are two sources of reindex updates: the (single) definition of a
 // symbol has changed, or one of many users of the symbol has changed.
@@ -101,34 +101,39 @@ struct MergeableUpdate {
   std::vector<TValue> to_add;
   std::vector<TValue> to_remove;
 
-  MergeableUpdate() {} // For reflection
+  MergeableUpdate(Usr usr, const std::vector<TValue>& to_add)
+    : usr(usr), to_add(to_add) {}
   MergeableUpdate(Usr usr, const std::vector<TValue>& to_add, const std::vector<TValue>& to_remove)
     : usr(usr), to_add(to_add), to_remove(to_remove) {}
 };
 
-template<typename TVisitor, typename TValue>
-void Reflect(TVisitor& visitor, MergeableUpdate<TValue>& value) {
-  REFLECT_MEMBER_START();
-  REFLECT_MEMBER(usr);
-  REFLECT_MEMBER(to_add);
-  REFLECT_MEMBER(to_remove);
-  REFLECT_MEMBER_END();
-}
+template<typename TValue>
+struct ReplacementUpdate {
+  // The type/func/var which is getting new usages.
+  Usr usr;
+  // New entries.
+  std::vector<TValue> values;
+
+  ReplacementUpdate(Usr usr, const std::vector<TValue>& entries)
+    : usr(usr), entries(entries) {}
+};
 
 struct QueryableFile {
-  using OutlineUpdate = MergeableUpdate<UsrRef>;
-  using AllSymboslUpdate = MergeableUpdate<UsrRef>;
+  struct Def {
+    Usr usr;
+    // Outline of the file (ie, for code lens).
+    std::vector<UsrRef> outline;
+    // Every symbol found in the file (ie, for goto definition)
+    std::vector<UsrRef> all_symbols;
+  };
 
-  Usr file_id;
-  // Outline of the file (ie, for code lens).
-  std::vector<UsrRef> outline;
-  // Every symbol found in the file (ie, for goto definition)
-  std::vector<UsrRef> all_symbols;
+  using DefUpdate = Def;
 
-  QueryableFile() {} // For serialization.
+  DefUpdate def;
+
+  QueryableFile() {}
   QueryableFile(const IndexedFile& indexed);
 };
-MAKE_REFLECT_STRUCT(QueryableFile, file_id, outline, all_symbols);
 
 struct QueryableTypeDef {
   using DefUpdate = TypeDefDefinitionData<Usr, Usr, Usr, QueryableRange, QueryableRange>;
@@ -141,10 +146,9 @@ struct QueryableTypeDef {
   std::vector<Usr> instantiations;
   std::vector<QueryableRange> uses;
 
-  QueryableTypeDef() : def("") {} // For serialization.
+  QueryableTypeDef() : def("") {}
   QueryableTypeDef(IdCache& id_cache, const IndexedTypeDef& indexed);
 };
-MAKE_REFLECT_STRUCT(QueryableTypeDef, def, derived, instantiations, uses);
 
 struct QueryableFuncDef {
   using DefUpdate = FuncDefDefinitionData<Usr, Usr, Usr, UsrRef, QueryableRange, QueryableRange>;
@@ -159,10 +163,9 @@ struct QueryableFuncDef {
   std::vector<UsrRef> callers;
   std::vector<QueryableRange> uses;
 
-  QueryableFuncDef() : def("") {} // For serialization.
+  QueryableFuncDef() : def("") {}
   QueryableFuncDef(IdCache& id_cache, const IndexedFuncDef& indexed);
 };
-MAKE_REFLECT_STRUCT(QueryableFuncDef, def, declarations, derived, callers, uses);
 
 struct QueryableVarDef {
   using DefUpdate = VarDefDefinitionData<Usr, Usr, Usr, QueryableRange, QueryableRange>;
@@ -171,10 +174,9 @@ struct QueryableVarDef {
   DefUpdate def;
   std::vector<QueryableRange> uses;
 
-  QueryableVarDef() : def("") {} // For serialization.
+  QueryableVarDef() : def("") {}
   QueryableVarDef(IdCache& id_cache, const IndexedVarDef& indexed);
 };
-MAKE_REFLECT_STRUCT(QueryableVarDef, def, uses);
 
 enum class SymbolKind { Invalid, File, Type, Func, Var };
 struct SymbolIdx {
@@ -186,45 +188,29 @@ struct SymbolIdx {
 };
 
 
-// TODO: We need to control Usr, std::vector allocation to make sure it happens on shmem. That or we
-// make IndexUpdate a POD type.
-// TODO: Instead of all of that work above, we pipe the IndexUpdate across processes as JSON.
-//       We need to verify we need multiple processes first. Maybe libclang can run in a single process...
-// TODO: Compute IndexUpdates in main process, off the blocking thread. Use separate process for running
-//       libclang. Solves memory worries.
-// TODO: Instead of passing to/from json, we can probably bass the IndexedFile type almost directly as
-// a raw memory dump - the type has almost zero pointers inside of it. We could do a little bit of fixup
-// so that passing from a separate process to the main db is really fast (no need to go through JSON).
-/*
-namespace foo2 {
-  using Usr = size_t;
-  struct UsrTable {
-    size_t allocated;
-    size_t used;
-    const char* usrs[];
-  };
-}
-*/
-
 struct IndexUpdate {
+  // Creates a new IndexUpdate that will import |file|.
+  static IndexUpdate CreateImport(IndexedFile& file);
+  static IndexUpdate CreateDelta(IndexedFile& current, IndexedFile& updated);
+
+  // Merge |update| into this update; this can reduce overhead / index update
+  // work can be parallelized.
+  void Merge(const IndexUpdate& update);
+
   // File updates.
   std::vector<Usr> files_removed;
-  std::vector<QueryableFile> files_added;
-  std::vector<QueryableFile::OutlineUpdate> files_outline;
-  std::vector<QueryableFile::AllSymboslUpdate> files_all_symbols;
+  std::vector<QueryableFile::DefUpdate> files_def_update;
 
   // Type updates.
   std::vector<Usr> types_removed;
-  std::vector<QueryableTypeDef> types_added;
-  std::vector<QueryableTypeDef::DefUpdate> types_def_changed;
+  std::vector<QueryableTypeDef::DefUpdate> types_def_update;
   std::vector<QueryableTypeDef::DerivedUpdate> types_derived;
   std::vector<QueryableTypeDef::InstantiationsUpdate> types_instantiations;
   std::vector<QueryableTypeDef::UsesUpdate> types_uses;
 
   // Function updates.
   std::vector<Usr> funcs_removed;
-  std::vector<QueryableFuncDef> funcs_added;
-  std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed;
+  std::vector<QueryableFuncDef::DefUpdate> funcs_def_update;
   std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations;
   std::vector<QueryableFuncDef::DerivedUpdate> funcs_derived;
   std::vector<QueryableFuncDef::CallersUpdate> funcs_callers;
@@ -232,44 +218,15 @@ struct IndexUpdate {
 
   // Variable updates.
   std::vector<Usr> vars_removed;
-  std::vector<QueryableVarDef> vars_added;
-  std::vector<QueryableVarDef::DefUpdate> vars_def_changed;
+  std::vector<QueryableVarDef::DefUpdate> vars_def_update;
   std::vector<QueryableVarDef::UsesUpdate> vars_uses;
 
-  IndexUpdate() {}
-
-  // Creates a new IndexUpdate that will import |file|.
-  explicit IndexUpdate(IndexedFile& file);
-
+ private:
   // Creates an index update assuming that |previous| is already
   // in the index, so only the delta between |previous| and |current|
   // will be applied.
   IndexUpdate(IndexedFile& previous, IndexedFile& current);
-
-  // Merges the contents of |update| into this IndexUpdate instance.
-  void Merge(const IndexUpdate& update);
 };
-MAKE_REFLECT_STRUCT(IndexUpdate,
-  files_removed,
-  files_added,
-  files_outline,
-  files_all_symbols,
-  types_removed,
-  types_added,
-  types_def_changed,
-  types_derived,
-  types_uses,
-  funcs_removed,
-  funcs_added,
-  funcs_def_changed,
-  funcs_declarations,
-  funcs_derived,
-  funcs_callers,
-  funcs_uses,
-  vars_removed,
-  vars_added,
-  vars_def_changed,
-  vars_uses);
 
 
 // The query database is heavily optimized for fast queries. It is stored
@@ -293,13 +250,10 @@ struct QueryableDatabase {
   void ApplyIndexUpdate(IndexUpdate* update);
 
   void RemoveUsrs(const std::vector<Usr>& to_remove);
-  void Import(const std::vector<QueryableFile>& defs);
-  void Import(const std::vector<QueryableTypeDef>& defs);
-  void Import(const std::vector<QueryableFuncDef>& defs);
-  void Import(const std::vector<QueryableVarDef>& defs);
-  void Update(const std::vector<QueryableTypeDef::DefUpdate>& updates);
-  void Update(const std::vector<QueryableFuncDef::DefUpdate>& updates);
-  void Update(const std::vector<QueryableVarDef::DefUpdate>& updates);
+  void ImportOrUpdate(const std::vector<QueryableFile::DefUpdate>& updates);
+  void ImportOrUpdate(const std::vector<QueryableTypeDef::DefUpdate>& updates);
+  void ImportOrUpdate(const std::vector<QueryableFuncDef::DefUpdate>& updates);
+  void ImportOrUpdate(const std::vector<QueryableVarDef::DefUpdate>& updates);
 };
 
 
diff --git a/src/string_db.cc b/src/string_db.cc
new file mode 100644
index 00000000..0abb2720
--- /dev/null
+++ b/src/string_db.cc
@@ -0,0 +1,40 @@
+#if false
+#include "string_db.h"
+
+StringStorage::~StringStorage() {
+  if (owns_str) {
+    free((void*)str);
+    str = nullptr;
+    owns_str= false;
+  }
+}
+
+StringStorage StringStorage::CreateUnowned(const char* data, size_t len) {
+  StringStorage result;
+  result.str = data;
+  result.len = len;
+  result.owns_str = false;
+  return result;
+}
+
+void StringStorage::Copy() const {
+  // Copy
+  char* new_str = (char*)malloc(len + 1);
+  strncpy(new_str, str, len + 1);
+  // Assign
+  str = new_str;
+  owns_str = true;
+}
+
+bool StringStorage::operator==(const StringStorage& that) const {
+  return len == that.len && strcmp(str, that.str) == 0;
+}
+
+bool StringStorage::operator!=(const StringStorage& that) const {
+  return !(*this == that);
+}
+
+bool StringStorage::operator<(const StringStorage& that) const {
+  return strcmp(str, that.str);
+}
+#endif
\ No newline at end of file
diff --git a/src/string_db.h b/src/string_db.h
new file mode 100644
index 00000000..14f1d403
--- /dev/null
+++ b/src/string_db.h
@@ -0,0 +1,154 @@
+#if false
+#pragma once
+
+#include "buffer.h"
+#include "utils.h"
+
+#include <clang-c/Index.h>
+
+#include <cassert>
+#include <cstring>
+#include <cstdint>
+#include <functional>
+#include <unordered_set>
+#include <string>
+
+
+template <typename T>
+struct StringView {
+  StringView();
+  StringView(const StringView& that);
+  StringView(const char* str, size_t len);
+
+  bool operator==(const StringView& that) const;
+  bool operator!=(const StringView& that) const;
+  bool operator<(const StringView& that) const;
+  StringView& operator=(const StringView& that);
+
+  std::string AsString() const;
+
+  size_t len;
+  const char* str;
+};
+// See MAKE_HASHABLE for QueryUsr, IndexUsr
+
+struct StringStorage {
+  ~StringStorage();
+
+  static StringStorage CreateUnowned(const char* str, size_t len);
+  void Copy() const;
+
+  bool operator==(const StringStorage& that) const;
+  bool operator!=(const StringStorage& that) const;
+  bool operator<(const StringStorage& that) const;
+
+  size_t len;
+  mutable const char* str;
+  mutable bool owns_str;
+};
+MAKE_HASHABLE(StringStorage, t.len, t.str);
+
+template <typename T>
+struct StringDb {
+  TStringView<T> GetString(const char* str, size_t len);
+  TStringView<T> GetString(const std::string& str);
+  TStringView<T> GetString(CXString cx_string);
+
+  std::unordered_set<StringStorage> data_;
+};
+
+
+
+
+
+
+
+
+struct _DummyQueryType {};
+struct _DummyIndexType {};
+
+using QueryUsr = StringView<_DummyQueryType>;
+using QueryStringDb = StringDb<_DummyQueryType>;
+
+using IndexUsr = StringView<_DummyIndexType>;
+using IndexStringDb = StringDb<_DummyIndexType>;
+
+
+
+// TODO: See if we can move this next to StringView definition.
+MAKE_HASHABLE(QueryUsr, t.len, t.str);
+MAKE_HASHABLE(IndexUsr, t.len, t.str);
+
+
+
+
+
+
+
+
+
+
+
+
+
+template <typename T>
+StringView<T>::StringView() : len(0), str(nullptr) {}
+
+template <typename T>
+StringView<T>::StringView(const StringView& that) : len(that.len), str(that.str) {}
+
+template <typename T>
+StringView<T>::StringView(const char* str, size_t len) : len(len), str(str) {}
+
+template <typename T>
+bool StringView<T>::operator==(const StringView& that) const {
+  return len == that.len && strcmp(str, that.str) == 0;
+}
+
+template <typename T>
+bool StringView<T>::operator!=(const StringView& that) const {
+  return !(*this == that);
+}
+
+template <typename T>
+bool StringView<T>::operator<(const StringView& that) const {
+  return strcmp(str, that.str);
+}
+
+template <typename T>
+StringView& StringView<T>::operator=(const StringView& that) {
+  len = that.len;
+  str = that.str;
+  return *this;
+}
+
+template <typename T>
+std::string StringView<T>::AsString() const {
+  return std::string(str, len);
+}
+
+template <typename T>
+TStringView<T> StringDb<T>::GetString(const char* str, size_t len) {
+  StringStorage lookup = StringStorage::CreateUnowned(str, len);
+
+  auto it = data_.insert(lookup);
+  if (it.second)
+    it.first->Copy();
+
+  return TStringView<T>(it.first->str, it.first->len);
+}
+
+template <typename T>
+TStringView<T> StringDb<T>::GetString(const std::string& str) {
+  return GetString(str.c_str());
+}
+
+template <typename T>
+TStringView<T> StringDb<T>::GetString(CXString cx_string) {
+  assert(cx_string.data);
+  const char* str = clang_getCString(cx_string);
+  StringView result = GetString(str);
+  clang_disposeString(cx_string);
+  return result;
+}
+#endif
\ No newline at end of file