wip

2025-12-16 04:03:25 +00:00 · 2017-02-25 15:59:09 -08:00 · 2017-02-25 15:59:09 -08:00 · b7d9a0f815
commit b7d9a0f815
parent f3f72a0dfa
9 changed files with 1096 additions and 405 deletions
--- a/command_line.cc
+++ b/command_line.cc
@ -0,0 +1,184 @@
+#include <iostream>
+#include <string>
+#include <unordered_map>
+
+#include "compilation_database_loader.h"
+#include "indexer.h"
+#include "query.h"
+
+bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) {
+#define PARSE_AS(name, string)      \
+  if (content == #string) {         \
+    *obj = name;                    \
+    return true;                    \
+  }
+
+  PARSE_AS(PreferredSymbolLocation::Declaration, "declaration");
+  PARSE_AS(PreferredSymbolLocation::Definition, "definition");
+
+  return false;
+#undef PARSE_AS
+}
+
+bool ParseCommand(const std::string& content, Command* obj) {
+#define PARSE_AS(name, string)      \
+  if (content == #string) {         \
+    *obj = name;                    \
+    return true;                    \
+  }
+
+  PARSE_AS(Command::Callees, "callees");
+  PARSE_AS(Command::Callers, "callers");
+  PARSE_AS(Command::FindAllUsages, "find-all-usages");
+  PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages");
+  PARSE_AS(Command::GotoReferenced, "goto-referenced");
+  PARSE_AS(Command::Hierarchy, "hierarchy");
+  PARSE_AS(Command::Outline, "outline");
+  PARSE_AS(Command::Search, "search");
+
+  return false;
+#undef PARSE_AS
+}
+
+
+std::unordered_map<std::string, std::string> ParseOptions(int argc, char** argv) {
+  std::unordered_map<std::string, std::string> output;
+
+  std::string previous_arg;
+
+  for (int i = 1; i < argc; ++i) {
+    std::string arg = argv[i];
+
+    if (arg[0] != '-') {
+      if (previous_arg.size() == 0) {
+        std::cerr << "Invalid arguments; switches must start with -" << std::endl;
+        exit(1);
+      }
+
+      output[previous_arg] = arg;
+      previous_arg = "";
+    }
+    else {
+      output[arg] = "";
+      previous_arg = arg;
+    }
+  }
+
+  return output;
+}
+
+bool HasOption(const std::unordered_map<std::string, std::string>& options, const std::string& option) {
+  return options.find(option) != options.end();
+}
+
+int main2(int argc, char** argv) {
+  std::unordered_map<std::string, std::string> options = ParseOptions(argc, argv);
+
+  if (argc == 1 || options.find("--help") != options.end()) {
+    std::cout << R"help(clang-indexer help:
+
+  General:
+    --help        Print this help information.
+    --help-commands
+                  Print all available query commands.
+    --project     Path to compile_commands.json. Needed for the server, and
+                  optionally by clients if there are multiple servers running.
+    --print-config
+                  Emit all configuration data this executable is using.
+    
+
+  Server:
+    --server      If present, this binary will run in server mode. The binary
+                  will not return until killed or an exit is requested. The
+                  server computes and caches an index of the entire program
+                  which is then queried by short-lived client processes. A
+                  client is created by running this binary with a --command
+                  flag.
+    --cache-dir   Directory to cache the index and other useful information. If
+                  a previous cache is present, the database will try to reuse
+                  it. If this flag is not present, the database will be
+                  in-memory only.
+    --threads     Number of threads to use for indexing and querying tasks.
+                  This value is optional; a good estimate is computed by
+                  default.
+
+                  
+  Client:
+    --command     Execute a query command against the index. See
+                  --command-help for a listing of valid commands and a
+                  description of what they do. Presence of this flag indicates
+                  that the indexer is in client mode; this flag is mutually
+                  exclusive with --server.
+    --location    Location of the query. Some commands require only a file,
+                  other require a line and column as well. Format is
+                  filename[:line:column]. For example, "foobar.cc" and
+                  "foobar.cc:1:10" are valid inputs.
+    --preferred-symbol-location
+                  When looking up symbols, try to return either the
+                  'declaration' or the 'definition'. Defaults to 'definition'.
+)help";
+    exit(0);
+  }
+
+  if (HasOption(options, "--help-commands")) {
+    std::cout << R"(Available commands:
+
+  callees:
+  callers:
+    Emit all functions (with location) that this function calls ("callees") or
+    that call this function ("callers"). Requires a location.
+
+  find-all-usages:
+    Emit every usage of the given symbol. This is intended to support a rename
+    refactoring. This output contains many uninteresting usages of symbols;
+    prefer find-interesting-usges. Requires a location.
+
+  find-interesting-usages:
+    Emit only usages of the given symbol which are semantically interesting.
+    Requires a location.
+
+  goto-referenced:
+    Find an associated reference (either definition or declaration) for the
+    given symbol. Requires a location.
+
+  hierarchy:
+    List the type hierarchy (ie, inherited and derived members) for the given
+    method or type. Requires a location.
+
+  outline:
+    Emit a file outline, listing all of the symbols in the file.
+
+  search:
+    Search for a symbol by name.
+)";
+    exit(0);
+  }
+
+  if (HasOption(options, "--project")) {
+    std::vector<CompilationEntry> entries = LoadCompilationEntriesFromDirectory(options["--project"]);
+
+
+    std::vector<IndexedFile> dbs;
+    for (const CompilationEntry& entry : entries) {
+      std::cout << "Parsing " << entry.filename << std::endl;
+      //IndexedFile db = Parse(2, entry.filename, entry.args);
+      //dbs.emplace_back(db);
+      //std::cout << db.ToString() << std::endl << std::endl;
+    }
+
+    std::cin.get();
+    exit(0);
+  }
+
+  if (HasOption(options, "--command")) {
+    Command command;
+    if (!ParseCommand(options["--command"], &command))
+      Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands");
+
+
+  }
+
+  std::cout << "Invalid arguments. Try --help.";
+  exit(1);
+  return 0;
+}
--- a/compilation_database_loader.h
+++ b/compilation_database_loader.h
@ -9,4 +9,8 @@ struct CompilationEntry {
  std::vector<std::string> args;
 };

+// TODO: Add support for loading when there is no compilation_database.json
+//       file. We will just recursively scan the directory and support a global
+//       set of defines and include directories.
+
 std::vector<CompilationEntry> LoadCompilationEntriesFromDirectory(const std::string& project_directory);
--- a/full_tests/index_delta/a_v0.cc
+++ b/full_tests/index_delta/a_v0.cc
@ -0,0 +1,7 @@
+void called();
+
+void caller() {
+
+}
+
+void missing() {}
--- a/full_tests/index_delta/a_v1.cc
+++ b/full_tests/index_delta/a_v1.cc
@ -0,0 +1,7 @@
+void called();
+
+void caller() {
+  called();
+}
+
+void added() {}
--- a/function_output_iterator.hpp
+++ b/function_output_iterator.hpp
@ -0,0 +1,62 @@
+// (C) Copyright Jeremy Siek 2001.
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// Revision History:
+
+// 27 Feb 2001   Jeremy Siek
+//      Initial checkin.
+
+#ifndef BOOST_FUNCTION_OUTPUT_ITERATOR_HPP
+#define BOOST_FUNCTION_OUTPUT_ITERATOR_HPP
+
+#include <iterator>
+
+namespace boost {
+namespace iterators {
+
+  template <class UnaryFunction>
+  class function_output_iterator {
+    typedef function_output_iterator self;
+  public:
+    typedef std::output_iterator_tag iterator_category;
+    typedef void                value_type;
+    typedef void                difference_type;
+    typedef void                pointer;
+    typedef void                reference;
+
+    explicit function_output_iterator() {}
+
+    explicit function_output_iterator(const UnaryFunction& f)
+      : m_f(f) {}
+
+    struct output_proxy {
+      output_proxy(UnaryFunction& f) : m_f(f) { }
+      template <class T> output_proxy& operator=(const T& value) {
+        m_f(value);
+        return *this;
+      }
+      UnaryFunction& m_f;
+    };
+    output_proxy operator*() { return output_proxy(m_f); }
+    self& operator++() { return *this; }
+    self& operator++(int) { return *this; }
+  private:
+    UnaryFunction m_f;
+  };
+
+  template <class UnaryFunction>
+  inline function_output_iterator<UnaryFunction>
+  make_function_output_iterator(const UnaryFunction& f = UnaryFunction()) {
+    return function_output_iterator<UnaryFunction>(f);
+  }
+
+} // namespace iterators
+
+using iterators::function_output_iterator;
+using iterators::make_function_output_iterator;
+
+} // namespace boost
+
+#endif // BOOST_FUNCTION_OUTPUT_ITERATOR_HPP
--- a/indexer.cpp
+++ b/indexer.cpp
@ -2,37 +2,47 @@

 #include "serializer.h"

-IndexedFile::IndexedFile() {}
+IndexedFile::IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db)
+  : usr_to_id(usr_to_id), file_db(file_db) {
+
+  // Preallocate any existing resolved ids.
+  for (const auto& entry : usr_to_id->usr_to_type_id)
+    types.push_back(IndexedTypeDef(entry.second, entry.first));
+  for (const auto& entry : usr_to_id->usr_to_func_id)
+    funcs.push_back(IndexedFuncDef(entry.second, entry.first));
+  for (const auto& entry : usr_to_id->usr_to_var_id)
+    vars.push_back(IndexedVarDef(entry.second, entry.first));
+}

 // TODO: Optimize for const char*?
 TypeId IndexedFile::ToTypeId(const std::string& usr) {
-  auto it = usr_to_type_id.find(usr);
-  if (it != usr_to_type_id.end())
+  auto it = usr_to_id->usr_to_type_id.find(usr);
+  if (it != usr_to_id->usr_to_type_id.end())
    return it->second;

-  TypeId id(types.size());
+  TypeId id(usr_to_id->group, types.size());
  types.push_back(IndexedTypeDef(id, usr));
-  usr_to_type_id[usr] = id;
+  usr_to_id->usr_to_type_id[usr] = id;
  return id;
 }
 FuncId IndexedFile::ToFuncId(const std::string& usr) {
-  auto it = usr_to_func_id.find(usr);
-  if (it != usr_to_func_id.end())
+  auto it = usr_to_id->usr_to_func_id.find(usr);
+  if (it != usr_to_id->usr_to_func_id.end())
    return it->second;

-  FuncId id(funcs.size());
+  FuncId id(usr_to_id->group, funcs.size());
  funcs.push_back(IndexedFuncDef(id, usr));
-  usr_to_func_id[usr] = id;
+  usr_to_id->usr_to_func_id[usr] = id;
  return id;
 }
 VarId IndexedFile::ToVarId(const std::string& usr) {
-  auto it = usr_to_var_id.find(usr);
-  if (it != usr_to_var_id.end())
+  auto it = usr_to_id->usr_to_var_id.find(usr);
+  if (it != usr_to_id->usr_to_var_id.end())
    return it->second;

-  VarId id(vars.size());
+  VarId id(usr_to_id->group, vars.size());
  vars.push_back(IndexedVarDef(id, usr));
-  usr_to_var_id[usr] = id;
+  usr_to_id->usr_to_var_id[usr] = id;
  return id;
 }

@ -50,13 +60,13 @@ VarId IndexedFile::ToVarId(const CXCursor& cursor) {


 IndexedTypeDef* IndexedFile::Resolve(TypeId id) {
-  return &types[id.local_id];
+  return &types[id.id];
 }
 IndexedFuncDef* IndexedFile::Resolve(FuncId id) {
-  return &funcs[id.local_id];
+  return &funcs[id.id];
 }
 IndexedVarDef* IndexedFile::Resolve(VarId id) {
-  return &vars[id.local_id];
+  return &vars[id.id];
 }

 std::string IndexedFile::ToString() {
@ -311,7 +321,7 @@ void VisitDeclForTypeUsageVisitorHandler(clang::Cursor cursor, VisitDeclForTypeU

  if (param->is_interesting) {
    IndexedTypeDef* ref_type_def = db->Resolve(ref_type_id);
-    Location loc = db->file_db.Resolve(cursor, true /*interesting*/);
+    Location loc = db->file_db->Resolve(cursor, true /*interesting*/);
    ref_type_def->AddUsage(loc);
  }
 }
@ -434,7 +444,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
    var_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, var_def->def.short_name);
    //}

-    Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/);
+    Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/);
    if (decl->isDefinition)
      var_def->def.definition = decl_loc;
    else
@ -480,7 +490,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
    func_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, func_def->def.short_name);
    //}

-    Location decl_loc = db->file_db.Resolve(decl->loc, false /*interesting*/);
+    Location decl_loc = db->file_db->Resolve(decl->loc, false /*interesting*/);
    if (decl->isDefinition)
      func_def->def.definition = decl_loc;
    else
@ -597,7 +607,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
    type_def->def.short_name = decl->entityInfo->name;
    type_def->def.qualified_name = ns->QualifiedName(decl->semanticContainer, type_def->def.short_name);

-    Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/);
+    Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/);
    type_def->def.definition = decl_loc.WithInteresting(false);
    type_def->AddUsage(decl_loc);
    break;
@ -631,7 +641,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
    // }

    assert(decl->isDefinition);
-    Location decl_loc = db->file_db.Resolve(decl->loc, true /*interesting*/);
+    Location decl_loc = db->file_db->Resolve(decl->loc, true /*interesting*/);
    type_def->def.definition = decl_loc.WithInteresting(false);
    type_def->AddUsage(decl_loc);

@ -660,7 +670,7 @@ void indexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
  }

  default:
-    std::cout << "!! Unhandled indexDeclaration:     " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db.Resolve(decl->loc, false /*interesting*/).ToString() << std::endl;
+    std::cout << "!! Unhandled indexDeclaration:     " << clang::Cursor(decl->cursor).ToString() << " at " << db->file_db->Resolve(decl->loc, false /*interesting*/).ToString() << std::endl;
    std::cout << "     entityInfo->kind  = " << decl->entityInfo->kind << std::endl;
    std::cout << "     entityInfo->USR   = " << decl->entityInfo->USR << std::endl;
    if (decl->declAsContainer)
@ -696,7 +706,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
  {
    VarId var_id = db->ToVarId(ref->referencedEntity->cursor);
    IndexedVarDef* var_def = db->Resolve(var_id);
-    var_def->uses.push_back(db->file_db.Resolve(ref->loc, false /*interesting*/));
+    var_def->uses.push_back(db->file_db->Resolve(ref->loc, false /*interesting*/));
    break;
  }

@ -718,7 +728,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re

    // Don't report duplicate usages.
    // TODO: search full history?
-    Location loc = db->file_db.Resolve(ref->loc, false /*interesting*/);
+    Location loc = db->file_db->Resolve(ref->loc, false /*interesting*/);
    if (param->last_func_usage_location == loc) break;
    param->last_func_usage_location = loc;

@ -746,8 +756,8 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
    if (ref->referencedEntity->kind == CXIdxEntity_CXXConstructor ||
      ref->referencedEntity->kind == CXIdxEntity_CXXDestructor) {

-      Location parent_loc = db->file_db.Resolve(ref->parentEntity->cursor, true /*interesting*/);
-      Location our_loc = db->file_db.Resolve(ref->loc, true /*is_interesting*/);
+      Location parent_loc = db->file_db->Resolve(ref->parentEntity->cursor, true /*interesting*/);
+      Location our_loc = db->file_db->Resolve(ref->loc, true /*is_interesting*/);
      if (!parent_loc.IsEqualTo(our_loc)) {
        IndexedFuncDef* called_def = db->Resolve(called_id);
        assert(called_def->def.declaring_type.has_value());
@ -783,16 +793,16 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
    //    Foo f;
    //  }
    //
-    referenced_def->AddUsage(db->file_db.Resolve(ref->loc, false /*interesting*/));
+    referenced_def->AddUsage(db->file_db->Resolve(ref->loc, false /*interesting*/));
    break;
  }

  default:
-    std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
+    std::cout << "!! Unhandled indexEntityReference: " << cursor.ToString() << " at " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
    std::cout << "     ref->referencedEntity->kind = " << ref->referencedEntity->kind << std::endl;
    if (ref->parentEntity)
      std::cout << "     ref->parentEntity->kind = " << ref->parentEntity->kind << std::endl;
-    std::cout << "     ref->loc          = " << db->file_db.Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
+    std::cout << "     ref->loc          = " << db->file_db->Resolve(ref->loc, false /*interesting*/).ToString() << std::endl;
    std::cout << "     ref->kind         = " << ref->kind << std::endl;
    if (ref->parentEntity)
      std::cout << "     parentEntity      = " << clang::Cursor(ref->parentEntity->cursor).ToString() << std::endl;
@ -807,7 +817,7 @@ void indexEntityReference(CXClientData client_data, const CXIdxEntityRefInfo* re
 static bool DUMP_AST = true;


-IndexedFile Parse(std::string filename, std::vector<std::string> args) {
+IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args) {
  clang::Index index(0 /*excludeDeclarationsFromPCH*/, 0 /*displayDiagnostics*/);
  clang::TranslationUnit tu(index, filename, args);

@ -830,7 +840,7 @@ IndexedFile Parse(std::string filename, std::vector<std::string> args) {
    */
  };

-  IndexedFile db;
+  IndexedFile db(usr_to_id, file_db);
  NamespaceHelper ns;
  IndexParam param(&db, &ns);
  clang_indexTranslationUnit(index_action, &param, callbacks, sizeof(callbacks),
@ -971,7 +981,9 @@ int main(int argc, char** argv) {

    // Run test.
    std::cout << "[START] " << path << std::endl;
-    IndexedFile db = Parse(path, {});
+    UsrToIdResolver usr_to_id(1);
+    FileDb file_db(1);
+    IndexedFile db = Parse(&usr_to_id, &file_db, path, {});
    std::string actual_output = db.ToString();

    //WriteToFile("output.json", actual_output);
--- a/indexer.h
+++ b/indexer.h
@ -23,25 +23,79 @@ struct IndexedTypeDef;
 struct IndexedFuncDef;
 struct IndexedVarDef;

-using FileId = int64_t;
 using namespace std::experimental;

-// TODO: Move off of this weird wrapper, use struct with custom wrappers
-//       directly.
-BEGIN_BITFIELD_TYPE(Location, uint64_t)

-ADD_BITFIELD_MEMBER(interesting, /*start:*/ 0,  /*len:*/ 1);    // 2 values
-ADD_BITFIELD_MEMBER(file_id,     /*start:*/ 1,  /*len:*/ 29);   // 536,870,912 values
-ADD_BITFIELD_MEMBER(line,        /*start:*/ 30, /*len:*/ 20);   // 1,048,576 values
-ADD_BITFIELD_MEMBER(column,      /*start:*/ 50, /*len:*/ 14);   // 16,384 values
+using GroupId = int;

-Location(bool interesting, FileId file_id, uint32_t line, uint32_t column) {
+template<typename T>
+struct Id {
+  GroupId group;
+  uint64_t id;
+
+  Id() : id(0) {} // Needed for containers. Do not use directly.
+  Id(GroupId group, uint64_t id) : group(group), id(id) {}
+
+  bool operator==(const Id<T>& other) const {
+    assert(group == other.group && "Cannot compare Ids from different groups");
+    return id == other.id;
+  }
+
+  bool operator<(const Id<T>& other) const {
+    assert(group == other.group);
+    return id < other.id;
+  }
+};
+
+namespace std {
+  template<typename T>
+  struct hash<Id<T>> {
+    size_t operator()(const Id<T>& k) const {
+      return ((hash<uint64_t>()(k.id) ^ (hash<int>()(k.group) << 1)) >> 1);
+    }
+  };
+}
+
+
+template<typename T>
+bool operator==(const Id<T>& a, const Id<T>& b) {
+  assert(a.group == b.group && "Cannot compare Ids from different groups");
+  return a.id == b.id;
+}
+
+struct _FakeFileType {};
+using FileId = Id<_FakeFileType>;
+using TypeId = Id<IndexedTypeDef>;
+using FuncId = Id<IndexedFuncDef>;
+using VarId = Id<IndexedVarDef>;
+
+struct Location {
+  bool interesting;
+  int raw_file_group;
+  int raw_file_id;
+  int line;
+  int column;
+
+  Location() {
+    interesting = false;
+    raw_file_group = -1;
+    raw_file_id = -1;
+    line = -1;
+    column = -1;
+  }
+
+  Location(bool interesting, FileId file, uint32_t line, uint32_t column) {
    this->interesting = interesting;
-  this->file_id = file_id;
+    this->raw_file_group = file.group;
+    this->raw_file_id = file.id;
    this->line = line;
    this->column = column;
  }

+  FileId file_id() {
+    return FileId(raw_file_id, raw_file_group);
+  }
+
  std::string ToString() {
    // Output looks like this:
    //
@ -55,7 +109,82 @@ std::string ToString() {
    std::string result;
    if (interesting)
      result += '*';
-  result += std::to_string(file_id);
+    result += std::to_string(raw_file_id);
+    result += ':';
+    result += std::to_string(line);
+    result += ':';
+    result += std::to_string(column);
+    return result;
+  }
+
+  // Compare two Locations and check if they are equal. Ignores the value of
+  // |interesting|.
+  // operator== doesn't seem to work properly...
+  bool IsEqualTo(const Location& o) const {
+    // When comparing, ignore the value of |interesting|.
+    return
+      raw_file_group == o.raw_file_group &&
+      raw_file_id == o.raw_file_id &&
+      line == o.line &&
+      column == o.column;
+  }
+
+  bool operator==(const Location& o) const {
+    return IsEqualTo(o);
+  }
+  bool operator<(const Location& o) const {
+    return
+      interesting < o.interesting &&
+      raw_file_group < o.raw_file_group &&
+      raw_file_id < o.raw_file_id &&
+      line < o.line &&
+      column < o.column;
+  }
+
+  Location WithInteresting(bool interesting) {
+    Location result = *this;
+    result.interesting = interesting;
+    return result;
+  }
+};
+
+#if false
+// TODO: Move off of this weird wrapper, use struct with custom wrappers
+//       directly.
+BEGIN_BITFIELD_TYPE(Location, uint64_t)
+
+ADD_BITFIELD_MEMBER(interesting,    /*start:*/ 0,  /*len:*/ 1);    // 2 values
+ADD_BITFIELD_MEMBER(raw_file_group, /*start:*/ 1,  /*len:*/ 4);    // 16 values, ok if they wrap around.
+ADD_BITFIELD_MEMBER(raw_file_id,    /*start:*/ 5,  /*len:*/ 25);   // 33,554,432 values
+ADD_BITFIELD_MEMBER(line,           /*start:*/ 30, /*len:*/ 20);   // 1,048,576 values
+ADD_BITFIELD_MEMBER(column,         /*start:*/ 50, /*len:*/ 14);   // 16,384 values
+
+Location(bool interesting, FileId file, uint32_t line, uint32_t column) {
+  this->interesting = interesting;
+  this->raw_file_group = file.group;
+  this->raw_file_id = file.id;
+  this->line = line;
+  this->column = column;
+}
+
+FileId file_id() {
+  return FileId(raw_file_id, raw_file_group);
+}
+
+std::string ToString() {
+  // Output looks like this:
+  //
+  //  *1:2:3
+  //
+  // * => interesting
+  // 1 => file id
+  // 2 => line
+  // 3 => column
+
+  std::string result;
+  if (interesting)
+    result += '*';
+  result += std::to_string(raw_file_id);
  result += ':';
  result += std::to_string(line);
  result += ':';
@ -78,15 +207,17 @@ Location WithInteresting(bool interesting) {
 }

 END_BITFIELD_TYPE()
+#endif

-struct IndexedFileDb {
+struct FileDb {
+  GroupId group;
  std::unordered_map<std::string, FileId> file_path_to_file_id;
  std::unordered_map<FileId, std::string> file_id_to_file_path;

-  IndexedFileDb() {
+  FileDb(GroupId group) : group(group) {
    // Reserve id 0 for unfound.
-    file_path_to_file_id[""] = 0;
-    file_id_to_file_path[0] = "";
+    file_path_to_file_id[""] = FileId(group, 0);
+    file_id_to_file_path[FileId(group, 0)] = "";
  }

  Location Resolve(const CXSourceLocation& cx_loc, bool interesting) {
@ -103,7 +234,7 @@ struct IndexedFileDb {
        file_id = it->second;
      }
      else {
-        file_id = file_path_to_file_id.size();
+        file_id = FileId(group, file_path_to_file_id.size());
        file_path_to_file_id[path] = file_id;
        file_id_to_file_path[file_id] = path;
      }
@ -127,35 +258,33 @@ struct IndexedFileDb {
 };


-template<typename T>
-struct LocalId {
-  uint64_t local_id;
-
-  LocalId() : local_id(0) {} // Needed for containers. Do not use directly.
-  explicit LocalId(uint64_t local_id) : local_id(local_id) {}
-
-  bool operator==(const LocalId<T>& other) {
-    return local_id == other.local_id;
-  }
-};
-
-template<typename T>
-bool operator==(const LocalId<T>& a, const LocalId<T>& b) {
-  return a.local_id == b.local_id;
-}
-
-using TypeId = LocalId<IndexedTypeDef>;
-using FuncId = LocalId<IndexedFuncDef>;
-using VarId = LocalId<IndexedVarDef>;
-
-
 template<typename T>
 struct Ref {
-  LocalId<T> id;
+  Id<T> id;
  Location loc;

-  Ref(LocalId<T> id, Location loc) : id(id), loc(loc) {}
+  Ref(Id<T> id, Location loc) : id(id), loc(loc) {}
+
+  bool operator==(const Ref<T>& other) {
+    return id == other.id && loc == other.loc;
+  }
+  bool operator!=(const Ref<T>& other) {
+    return !(*this == other);
+  }
+  bool operator<(const Ref<T>& other) const {
+    return id < other.id && loc < other.loc;
+  }
 };
+
+template<typename T>
+bool operator==(const Ref<T>& a, const Ref<T>& b) {
+  return a.id == b.id && a.loc == b.loc;
+}
+template<typename T>
+bool operator!=(const Ref<T>& a, const Ref<T>& b) {
+  return !(a == b);
+}
+
 using TypeRef = Ref<IndexedTypeDef>;
 using FuncRef = Ref<IndexedFuncDef>;
 using VarRef = Ref<IndexedVarDef>;
@ -197,6 +326,24 @@ struct TypeDefDefinitionData {
  std::vector<VarId> vars;

  TypeDefDefinitionData(TypeId id, const std::string& usr) : id(id), usr(usr) {}
+
+  bool operator==(const TypeDefDefinitionData& other) const {
+    return
+      id == other.id &&
+      usr == other.usr &&
+      short_name == other.short_name &&
+      qualified_name == other.qualified_name &&
+      definition == other.definition &&
+      alias_of == other.alias_of &&
+      parents == other.parents &&
+      types == other.types &&
+      funcs == other.funcs &&
+      vars == other.vars;
+  }
+
+  bool operator!=(const TypeDefDefinitionData& other) const {
+    return !(*this == other);
+  }
 };

 struct IndexedTypeDef {
@ -213,8 +360,21 @@ struct IndexedTypeDef {

  IndexedTypeDef(TypeId id, const std::string& usr);
  void AddUsage(Location loc, bool insert_if_not_present = true);
+
+  bool operator<(const IndexedTypeDef& other) const {
+    return def.id < other.def.id;
+  }
 };

+namespace std {
+  template <>
+  struct hash<IndexedTypeDef> {
+    size_t operator()(const IndexedTypeDef& k) const {
+      return hash<string>()(k.def.usr);
+    }
+  };
+}
+
 struct FuncDefDefinitionData {
  // General metadata.
  FuncId id;
@ -238,6 +398,23 @@ struct FuncDefDefinitionData {
  FuncDefDefinitionData(FuncId id, const std::string& usr) : id(id), usr(usr) {
    assert(usr.size() > 0);
  }
+
+  bool operator==(const FuncDefDefinitionData& other) const {
+    return
+      id == other.id &&
+      usr == other.usr &&
+      short_name == other.short_name &&
+      qualified_name == other.qualified_name &&
+      definition == other.definition &&
+      declaring_type == other.declaring_type &&
+      base == other.base &&
+      locals == other.locals &&
+      callees == other.callees;
+  }
+
+  bool operator!=(const FuncDefDefinitionData& other) const {
+    return !(*this == other);
+  }
 };

 struct IndexedFuncDef {
@ -265,8 +442,22 @@ struct IndexedFuncDef {
  IndexedFuncDef(FuncId id, const std::string& usr) : def(id, usr) {
    assert(usr.size() > 0);
  }
+
+  bool operator<(const IndexedFuncDef& other) const {
+    return def.id < other.def.id;
+  }
 };

+namespace std {
+  template <>
+  struct hash<IndexedFuncDef> {
+    size_t operator()(const IndexedFuncDef& k) const {
+      return hash<string>()(k.def.usr);
+    }
+  };
+}
+
+
 struct VarDefDefinitionData {
  // General metadata.
  VarId id;
@ -285,6 +476,22 @@ struct VarDefDefinitionData {
  optional<TypeId> declaring_type;

  VarDefDefinitionData(VarId id, const std::string& usr) : id(id), usr(usr) {}
+
+  bool operator==(const VarDefDefinitionData& other) const {
+    return
+      id == other.id &&
+      usr == other.usr &&
+      short_name == other.short_name &&
+      qualified_name == other.qualified_name &&
+      declaration == other.declaration &&
+      definition == other.definition &&
+      variable_type == other.variable_type &&
+      declaring_type == other.declaring_type;
+  }
+
+  bool operator!=(const VarDefDefinitionData& other) const {
+    return !(*this == other);
+  }
 };

 struct IndexedVarDef {
@ -298,23 +505,41 @@ struct IndexedVarDef {
  IndexedVarDef(VarId id, const std::string& usr) : def(id, usr) {
    assert(usr.size() > 0);
  }
+
+  bool operator<(const IndexedVarDef& other) const {
+    return def.id < other.def.id;
+  }
 };

+namespace std {
+  template <>
+  struct hash<IndexedVarDef> {
+    size_t operator()(const IndexedVarDef& k) const {
+      return hash<string>()(k.def.usr);
+    }
+  };
+}

-struct IndexedFile {
+struct UsrToIdResolver {
  // NOTE: Every Id is resolved to a file_id of 0. The correct file_id needs
  //       to get fixed up when inserting into the real db.
+  GroupId group;
  std::unordered_map<std::string, TypeId> usr_to_type_id;
  std::unordered_map<std::string, FuncId> usr_to_func_id;
  std::unordered_map<std::string, VarId> usr_to_var_id;

+  UsrToIdResolver(GroupId group) : group(group) {}
+};
+
+struct IndexedFile {
+  FileDb* file_db;
+  UsrToIdResolver* usr_to_id;
+
  std::vector<IndexedTypeDef> types;
  std::vector<IndexedFuncDef> funcs;
  std::vector<IndexedVarDef> vars;

-  IndexedFileDb file_db;
-
-  IndexedFile();
+  IndexedFile(UsrToIdResolver* usr_to_id, FileDb* file_db);

  TypeId ToTypeId(const std::string& usr);
  FuncId ToFuncId(const std::string& usr);
@ -332,32 +557,4 @@ struct IndexedFile {



-// TODO: Maybe instead of clearing/adding diffs, we should just clear out the
-//       entire previous index and readd the new one? That would be simpler.
-// TODO: ^^^ I don't think we can do this. It will probably stall the main
-//       indexer for far too long since we will have to iterate over tons of
-//       data.
-// TODO: Idea: when indexing and joining to the main db, allow many dbs that
-//             are joined to. So that way even if the main db is busy we can
-//             still be joining. Joining the partially joined db to the main
-//             db should be faster since we will have larger data lanes to use.
-struct IndexedTypeDefDiff {};
-struct IndexedFuncDefDiff {};
-struct IndexedVarDefDiff {};
-
-struct IndexedFileDiff {
-  std::vector<IndexedTypeDefDiff> removed_types;
-  std::vector<IndexedFuncDefDiff> removed_funcs;
-  std::vector<IndexedVarDefDiff> removed_vars;
-
-  std::vector<IndexedTypeDefDiff> added_types;
-  std::vector<IndexedFuncDefDiff> added_funcs;
-  std::vector<IndexedVarDefDiff> added_vars;
-
-  // TODO: Instead of change, maybe we just remove and then add again? not sure.
-  std::vector<IndexedTypeDefDiff> changed_types;
-  std::vector<IndexedFuncDefDiff> changed_funcs;
-  std::vector<IndexedVarDefDiff> changed_vars;
-};
-
-IndexedFile Parse(std::string filename, std::vector<std::string> args);
+IndexedFile Parse(UsrToIdResolver* usr_to_id, FileDb* file_db, std::string filename, std::vector<std::string> args);
--- a/query.cc
+++ b/query.cc
@ -1,19 +1,200 @@
 #include "query.h"

 #include <cstdint>
+#include <functional>
+#include <unordered_set>
 #include <unordered_map>
 #include <string>
 #include <iostream>

+#include "function_output_iterator.hpp"
 #include "compilation_database_loader.h"
 #include "optional.h"
 #include "indexer.h"

-struct FileDatabase {
-  std::unordered_map<std::string, FileId> filename_to_file_id;
-  std::unordered_map<FileId, std::string> file_id_to_filename;
+//#define CATCH_CONFIG_MAIN
+//#include "catch.hpp"
+
+// TODO: Make all copy constructors explicit.
+
+struct IdMap {
+  // The first vector is indexed by TId::group.
+  // The second vector is indexed by TId::id.
+  template<typename TId>
+  using GroupMap = std::vector<std::unordered_map<TId, TId>>;
+
+  GroupId target_group;
+  int64_t next_file_id = 1;
+  int64_t next_type_id = 1;
+  int64_t next_func_id = 1;
+  int64_t next_var_id = 1;
+
+  GroupMap<FileId> remap_file_id;
+  GroupMap<TypeId> remap_type_id;
+  GroupMap<FuncId> remap_func_id;
+  GroupMap<VarId> remap_var_id;
+
+  IdMap(GroupId target_group) : target_group(target_group) {}
+
+  template<typename TId>
+  inline TId GenericRemap(GroupMap<TId>* map, int64_t* next_id, TId from) {
+    // PERF: If this function is a hot-spot we can pull the group computation
+    // out, ie,
+    //
+    //    IdMap id_map;
+    //    GroupIdMap group_map = id_map.ResolveIdGroup(file.group)
+    //    for (...)
+    //      group_map.Remap(id)
+
+    // Find the group that |from| belongs to. Create groups if needed.
+    if (from.group >= map->size())
+      map->resize(from.group + 1);
+
+    // If the group doesn't have an ID already mapped out for |from|, map it.
+    /*
+    // TODO: The concern with this approach is that it going to waste huge
+    // amounts of memory, because the first 16k+ ids can be unused.
+    std::vector<TId>& group = (*map)[from.group];
+
+    if (from.id >= group.size()) {
+    group.reserve(from.id + 1);
+    for (size_t i = group.size(); i < from.id; ++i)
+    group.emplace_back(TId(target_group, (*next_id)++));
+    }
+    */
+
+    std::unordered_map<TId, TId> group = (*map)[from.group];
+
+    // Lookup the id from the group or add it.
+    auto it = group.find(from);
+    if (it == group.end()) {
+      TId result(target_group, (*next_id)++);
+      group[from] = result;
+      return result;
+    }
+    return it->second;
+  }
+
+  template<typename TId>
+  inline std::vector<TId> GenericVectorRemap(GroupMap<TId>* map, int64_t* next_id, const std::vector<TId>& from) {
+    if (from.empty())
+      return {};
+
+    int group_id = from[0].group;
+    if (group_id >= map->size())
+      map->resize(group_id + 1);
+
+    std::unordered_map<TId, TId> group = (*map)[group_id];
+
+    std::vector<TId> result;
+    result.reserve(from.size());
+    for (TId id : from) {
+      // Lookup the id from the group or add it.
+      auto it = group.find(id);
+      if (it == group.end()) {
+        TId new_id(target_group, (*next_id)++);
+        group[id] = new_id;
+        result.push_back(new_id);
+      }
+      else {
+        result.push_back(it->second);
+      }
+    }
+
+    return result;
+  }
+
+  FileId Remap(FileId from) {
+    return GenericRemap(&remap_file_id, &next_file_id, from);
+  }
+  Location Remap(Location from) {
+    FileId file = Remap(from.file_id());
+    from.raw_file_group = file.group;
+    from.raw_file_id = file.id;
+    return from;
+  }
+  TypeId Remap(TypeId from) {
+    return GenericRemap(&remap_type_id, &next_type_id, from);
+  }
+  FuncId Remap(FuncId from) {
+    return GenericRemap(&remap_func_id, &next_func_id, from);
+  }
+  VarId Remap(VarId from) {
+    return GenericRemap(&remap_var_id, &next_var_id, from);
+  }
+  FuncRef Remap(FuncRef from) {
+    from.id = Remap(from.id);
+    from.loc = Remap(from.loc);
+    return from;
+  }
+  TypeDefDefinitionData Remap(TypeDefDefinitionData  def) {
+    def.id = Remap(def.id);
+    if (def.definition)
+      def.definition = Remap(def.definition.value());
+    if (def.alias_of)
+      def.alias_of = Remap(def.alias_of.value());
+    def.parents = Remap(def.parents);
+    def.types = Remap(def.types);
+    def.funcs = Remap(def.funcs);
+    def.vars = Remap(def.vars);
+    return def;
+  }
+  FuncDefDefinitionData Remap(FuncDefDefinitionData def) {
+    def.id = Remap(def.id);
+    if (def.definition)
+      def.definition = Remap(def.definition.value());
+    if (def.declaring_type)
+      def.declaring_type = Remap(def.declaring_type.value());
+    if (def.base)
+      def.base = Remap(def.base.value());
+    def.locals = Remap(def.locals);
+    def.callees = Remap(def.callees);
+    return def;
+  }
+  VarDefDefinitionData Remap(VarDefDefinitionData def) {
+    def.id = Remap(def.id);
+    if (def.declaration)
+      def.declaration = Remap(def.declaration.value());
+    if (def.definition)
+      def.definition = Remap(def.definition.value());
+    if (def.variable_type)
+      def.variable_type = Remap(def.variable_type.value());
+    if (def.declaring_type)
+      def.declaring_type = Remap(def.declaring_type.value());
+    return def;
+  }
+
+
+  //std::vector<FileId> Remap(const std::vector<FileId>& from) {
+  //  return GenericVectorRemap(&remap_file_id, &next_file_id, from);
+  //}
+  std::vector<Location> Remap(const std::vector<Location>& from) {
+    std::vector<Location> result;
+    result.reserve(from.size());
+    for (Location l : from)
+      result.push_back(Remap(l));
+    return result;
+  }
+  std::vector<TypeId> Remap(const std::vector<TypeId>& from) {
+    return GenericVectorRemap(&remap_type_id, &next_type_id, from);
+  }
+  std::vector<FuncId> Remap(const std::vector<FuncId>& from) {
+    return GenericVectorRemap(&remap_func_id, &next_func_id, from);
+  }
+  std::vector<VarId> Remap(const std::vector<VarId>& from) {
+    return GenericVectorRemap(&remap_var_id, &next_var_id, from);
+  }
+  std::vector<FuncRef> Remap(const std::vector<FuncRef>& from) {
+    std::vector<FuncRef> result;
+    result.reserve(from.size());
+    for (FuncRef r : from)
+      result.push_back(Remap(r));
+    return result;
+  }
 };

+
+
 enum class SymbolKind { Type, Func, Var };
 struct SymbolIdx {
  SymbolKind kind;
@ -45,6 +226,20 @@ struct MergeableUpdate {
  std::vector<TValue> to_remove;
 };

+template<typename TId, typename TValue>
+MergeableUpdate<TId, TValue> MakeMergeableUpdate(IdMap* id_map, TId symbol_id, const std::vector<TValue>& removed, const std::vector<TValue>& added) {
+  MergeableUpdate<TId, TValue> update;
+  update.id = id_map->Remap(symbol_id);
+  update.to_remove = id_map->Remap(removed);
+  update.to_add = id_map->Remap(added);
+  return update;
+}
+
+// NOTE: When not inside of a |def| object, there can be duplicates of the same
+//       information if that information is contributed from separate sources.
+//       If we need to avoid this duplication in the future, we will have to
+//       add a refcount.
+
 struct QueryableTypeDef {
  TypeDefDefinitionData def;
  std::vector<TypeId> derived;
@ -53,6 +248,12 @@ struct QueryableTypeDef {
  using DefUpdate = TypeDefDefinitionData;
  using DerivedUpdate = MergeableUpdate<TypeId, TypeId>;
  using UsesUpdate = MergeableUpdate<TypeId, Location>;
+
+  QueryableTypeDef(IdMap& id_map, const IndexedTypeDef& indexed)
+    : def(id_map.Remap(indexed.def)) {
+    derived = id_map.Remap(indexed.derived);
+    uses = id_map.Remap(indexed.uses);
+  }
 };

 struct QueryableFuncDef {
@ -67,6 +268,14 @@ struct QueryableFuncDef {
  using DerivedUpdate = MergeableUpdate<FuncId, FuncId>;
  using CallersUpdate = MergeableUpdate<FuncId, FuncRef>;
  using UsesUpdate = MergeableUpdate<FuncId, Location>;
+
+  QueryableFuncDef(IdMap& id_map, const IndexedFuncDef& indexed)
+    : def(id_map.Remap(indexed.def)) {
+    declarations = id_map.Remap(indexed.declarations);
+    derived = id_map.Remap(indexed.derived);
+    callers = id_map.Remap(indexed.callers);
+    uses = id_map.Remap(indexed.uses);
+  }
 };

 struct QueryableVarDef {
@ -75,9 +284,16 @@ struct QueryableVarDef {

  using DefUpdate = VarDefDefinitionData;
  using UsesUpdate = MergeableUpdate<VarId, Location>;
+
+  QueryableVarDef(IdMap& id_map, const IndexedVarDef& indexed)
+    : def(id_map.Remap(indexed.def)) {
+    uses = id_map.Remap(indexed.uses);
+  }
 };

 struct QueryableFile {
+  FileId file_id;
+
  // Symbols declared in the file.
  std::vector<SymbolIdx> declared_symbols;
  // Symbols which have definitions in the file.
@ -91,6 +307,8 @@ struct QueryableEntry {
 // The query database is heavily optimized for fast queries. It is stored
 // in-memory.
 struct QueryableDatabase {
+  IdMap id_map;
+
  // Indicies between lookup vectors are related to symbols, ie, index 5 in
  // |qualified_names| matches index 5 in |symbols|.
  std::vector<QueryableEntry> qualified_names;
@ -102,220 +320,324 @@ struct QueryableDatabase {
  std::vector<QueryableVarDef> vars;

  // |files| is indexed by FileId. Retrieve a FileId from a path using
-  // |file_locator|.
-  FileDatabase file_locator;
+  // |file_db|.
+  FileDb file_db;
  std::vector<QueryableFile> files;
+
+  // When importing data into the global db we need to remap ids from an
+  // arbitrary group into the global group.
+  IdMap local_id_group_to_global_id_group;
 };



-struct Query {

-};
+
+
+
+
+


 struct CachedIndexedFile {
  // Path to the file indexed.
  std::string path;
+  // GroupId of the indexed file.
+  GroupId group;

-  // Full in-memory storage for the index. Empty if not loaded into memory.
-  // |path| can be used to fetch the index from disk.
-  optional<rapidjson::Document> index;
+  // TODO: Make sure that |previous_index| and |current_index| use the same id
+  // to USR mapping. This lets us greatly speed up difference computation.
+
+  // The previous index. This is used for index updates, so we only apply a
+  // an update diff when changing the global db.
+  optional<IndexedFile> previous_index;
+  IndexedFile current_index;
+
+  CachedIndexedFile(const IndexedFile& indexed)
+    : group(indexed.usr_to_id->group), current_index(indexed) {}
 };

-struct DocumentDiff {

+struct IndexUpdate {
+  IdMap* id_map;
+
+  // Type updates.
+  std::vector<TypeId> types_removed;
+  std::vector<QueryableTypeDef> types_added;
+  std::vector<QueryableTypeDef::DefUpdate> types_def_changed;
+  std::vector<QueryableTypeDef::DerivedUpdate> types_derived;
+  std::vector<QueryableTypeDef::UsesUpdate> types_uses;
+
+  // Function updates.
+  std::vector<FuncId> funcs_removed;
+  std::vector<QueryableFuncDef> funcs_added;
+  std::vector<QueryableFuncDef::DefUpdate> funcs_def_changed;
+  std::vector<QueryableFuncDef::DeclarationsUpdate> funcs_declarations;
+  std::vector<QueryableFuncDef::DerivedUpdate> funcs_derived;
+  std::vector<QueryableFuncDef::CallersUpdate> funcs_callers;
+  std::vector<QueryableFuncDef::UsesUpdate> funcs_uses;
+
+  // Variable updates.
+  std::vector<VarId> vars_removed;
+  std::vector<QueryableVarDef> vars_added;
+  std::vector<QueryableVarDef::DefUpdate> vars_def_changed;
+  std::vector<QueryableVarDef::UsesUpdate> vars_uses;
+
+  IndexUpdate(IdMap* id_map) : id_map(id_map) {}
 };
-// Compute a diff between |original| and |updated|.
-//rapidjson::Document DiffIndex(rapidjson::Document original, rapidjson::Document updated) {
-
-//}


-
-
-bool ParsePreferredSymbolLocation(const std::string& content, PreferredSymbolLocation* obj) {
-#define PARSE_AS(name, string)      \
-  if (content == #string) {         \
-    *obj = name;                    \
-    return true;                    \
+template<typename TValue>
+TValue* TryFind(std::unordered_set<TValue*>& set, TValue* value) {
+  // TODO: Make |value| a const ref?
+  auto it = set.find(value);
+  if (it == set.end())
+    return nullptr;
+  return *it;
 }

-  PARSE_AS(PreferredSymbolLocation::Declaration, "declaration");
-  PARSE_AS(PreferredSymbolLocation::Definition, "definition");
-
-  return false;
-#undef PARSE_AS
+template<typename T>
+std::unordered_set<T*> CreateSet(std::vector<T>& elements) {
+  std::unordered_set<T*> result;
+  result.reserve(elements.size());
+  for (T& element : elements)
+    result.insert(&element);
+  return result;
 }

-bool ParseCommand(const std::string& content, Command* obj) {
-#define PARSE_AS(name, string)      \
-  if (content == #string) {         \
-    *obj = name;                    \
-    return true;                    \
+
+// Compares |previous| and |current|, adding all elements that are
+// in |previous| but not |current| to |removed|, and all elements
+// that are in |current| but not |previous| to |added|.
+//
+// Returns true iff |removed| or |added| are non-empty.
+template<typename T>
+bool ComputeDifferenceForUpdate(
+  std::vector<T>& previous, std::vector<T>& current,
+  std::vector<T>* removed, std::vector<T>* added) {
+
+  // We need to sort to use std::set_difference.
+  std::sort(previous.begin(), previous.end());
+  std::sort(current.begin(), current.end());
+
+  // Returns the elements in |previous| that are not in |current|.
+  std::set_difference(
+    previous.begin(), previous.end(),
+    current.begin(), current.end(),
+    std::back_inserter(*removed));
+  // Returns the elmeents in |current| that are not in |previous|.
+  std::set_difference(
+    current.begin(), current.end(),
+    previous.begin(), previous.end(),
+    std::back_inserter(*added));
+
+  return !removed->empty() || !added->empty();
 }

-  PARSE_AS(Command::Callees, "callees");
-  PARSE_AS(Command::Callers, "callers");
-  PARSE_AS(Command::FindAllUsages, "find-all-usages");
-  PARSE_AS(Command::FindInterestingUsages, "find-interesting-usages");
-  PARSE_AS(Command::GotoReferenced, "goto-referenced");
-  PARSE_AS(Command::Hierarchy, "hierarchy");
-  PARSE_AS(Command::Outline, "outline");
-  PARSE_AS(Command::Search, "search");
+#if false
+template<typename T>
+void CompareGroups(
+  std::vector<T>& previous_data, std::vector<T>& current_data,
+  std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
+  // TODO: It could be faster to use set_intersection and set_difference to
+  //       compute these values. We will have to presort the input by ID, though.

-  return false;
-#undef PARSE_AS
+  // Precompute sets so we stay around O(3N) instead of O(N^2). Otherwise
+  // lookups for duplicate elements will be O(N) and we need them to be O(1).
+  std::unordered_set<T*> previous_set = CreateSet(previous_data);
+  std::unordered_set<T*> current_set = CreateSet(current_data);
+
+  // TODO: TryFind is just comparing pointers which obviously fails because they point to different memory...
+
+  for (T* current_entry : current_set) {
+    // Possibly updated.
+    if (T* previous_entry = TryFind(previous_set, current_entry))
+      on_found(previous_entry, current_entry);
+    // Added
+    else
+      on_added(current_entry);
+  }
+  for (T* previous_entry : previous_set) {
+    // Removed
+    if (!TryFind(current_set, previous_entry))
+      on_removed(previous_entry);
+}
+}
+#endif
+
+template<typename T>
+void CompareGroups(
+  std::vector<T>& previous_data, std::vector<T>& current_data,
+  std::function<void(T*)> on_removed, std::function<void(T*)> on_added, std::function<void(T*, T*)> on_found) {
+  // TODO: It could be faster to use set_intersection and set_difference to
+  //       compute these values. We will have to presort the input by ID, though.
+
+  std::sort(previous_data.begin(), previous_data.end());
+  std::sort(current_data.begin(), current_data.end());
+
+  /*
+  std::set_difference(
+    current_data.begin(), current_data.end(),
+    previous_data.begin(), previous_data.end(),
+    boost::make_function_output_iterator([](const T& val) {
+
+  }));
+  */
+
+  auto prev_it = previous_data.begin();
+  auto curr_it = current_data.begin();
+  while (prev_it != previous_data.end() && curr_it != current_data.end()) {
+    // same id
+    if (prev_it->def.id == curr_it->def.id) {
+      on_found(&*prev_it, &*curr_it);
+      ++prev_it;
+      ++curr_it;
    }

+    // prev_id is smaller - prev_it has data curr_it does not have.
+    else if (prev_it->def.id < curr_it->def.id) {
+      on_removed(&*prev_it);
+      ++prev_it;
+    }
+
+    // prev_id is bigger - curr_it has data prev_it does not have.
+    else {
+      on_added(&*curr_it);
+      ++curr_it;
+    }
+  }
+
+  // if prev_it still has data, that means it is not in curr_it and was removed.
+  while (prev_it != previous_data.end()) {
+    on_removed(&*prev_it);
+    ++prev_it;
+  }
+
+  // if curr_it still has data, that means it is not in prev_it and was added.
+  while (curr_it != current_data.end()) {
+    on_added(&*curr_it);
+    ++curr_it;
+  }
+}
+
+// TODO: make this const correct.
+IndexUpdate ComputeDiff(IdMap* id_map, IndexedFile& previous, IndexedFile& current) {
+#define JOIN(a, b) a##b
+  // |query_name| is the name of the variable on the query type.
+  // |index_name| is the name of the variable on the index type.
+  // |type| is the type of the variable.
+#define PROCESS_UPDATE_DIFF(query_name, index_name, type) \
+  { \
+    /* Check for changes. */ \
+    std::vector<type> removed, added; \
+    bool did_add = ComputeDifferenceForUpdate(JOIN(previous->, index_name), JOIN(current->, index_name), &removed, &added); \
+    if (did_add) {\
+      std::cout << "Adding mergeable update on " << current->def.short_name << " (" << current->def.usr << ") for field " << #index_name << std::endl; \
+      JOIN(update., query_name).push_back(MakeMergeableUpdate(id_map, current->def.id, removed, added)); \
+    } \
+  }
+
+  assert(previous.usr_to_id == current.usr_to_id);
+  assert(previous.file_db == current.file_db);
+  IndexUpdate update(id_map);
+
+  // Types
+  CompareGroups<IndexedTypeDef>(previous.types, current.types,
+    /*onRemoved:*/[&update, &id_map](IndexedTypeDef* def) {
+    update.types_removed.push_back(id_map->Remap(def->def.id));
+  },
+    /*onAdded:*/[&update, &id_map](IndexedTypeDef* def) {
+    update.types_added.push_back(QueryableTypeDef(*id_map, *def));
+  },
+    /*onChanged:*/[&update, &id_map](IndexedTypeDef* previous, IndexedTypeDef* current) {
+    if (previous->def != current->def)
+      update.types_def_changed.push_back(id_map->Remap(current->def));
+
+    PROCESS_UPDATE_DIFF(types_derived, derived, TypeId);
+    PROCESS_UPDATE_DIFF(types_uses, uses, Location);
+  });
+
+  // Functions
+  CompareGroups<IndexedFuncDef>(previous.funcs, current.funcs,
+    /*onRemoved:*/[&update, &id_map](IndexedFuncDef* def) {
+    update.funcs_removed.push_back(id_map->Remap(def->def.id));
+  },
+    /*onAdded:*/[&update, &id_map](IndexedFuncDef* def) {
+    update.funcs_added.push_back(QueryableFuncDef(*id_map, *def));
+  },
+    /*onChanged:*/[&update, &id_map](IndexedFuncDef* previous, IndexedFuncDef* current) {
+    if (previous->def != current->def)
+      update.funcs_def_changed.push_back(id_map->Remap(current->def));
+    PROCESS_UPDATE_DIFF(funcs_declarations, declarations, Location);
+    PROCESS_UPDATE_DIFF(funcs_derived, derived, FuncId);
+    PROCESS_UPDATE_DIFF(funcs_callers, callers, FuncRef);
+    PROCESS_UPDATE_DIFF(funcs_uses, uses, Location);
+  });
+
+  // Variables
+  CompareGroups<IndexedVarDef>(previous.vars, current.vars,
+    /*onRemoved:*/[&update, &id_map](IndexedVarDef* def) {
+    update.vars_removed.push_back(id_map->Remap(def->def.id));
+  },
+    /*onAdded:*/[&update, &id_map](IndexedVarDef* def) {
+    update.vars_added.push_back(QueryableVarDef(*id_map, *def));
+  },
+    /*onChanged:*/[&update, &id_map](IndexedVarDef* previous, IndexedVarDef* current) {
+    if (previous->def != current->def)
+      update.vars_def_changed.push_back(id_map->Remap(current->def));
+    PROCESS_UPDATE_DIFF(vars_uses, uses, Location);
+  });
+
+  return update;
+
+#undef PROCESS_UPDATE_DIFF
+#undef JOIN
+}
+
+// Merge the contents of |source| into |destination|.
+void Merge(const IndexUpdate& source, IndexUpdate* destination) {
+  // TODO.
+}
+
+// Insert the contents of |update| into |db|.
+void ApplyIndexUpdate(const IndexUpdate& update, QueryableDatabase* db) {
+
+}
+
+
+
+int ma333in(int argc, char** argv) {
+  // TODO: Unify UserToIdResolver and FileDb
+  UsrToIdResolver usr_to_id(1);
+  FileDb file_db(1);
+
+  IndexedFile indexed_file_a = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v0.cc", {});
+  std::cout << indexed_file_a.ToString() << std::endl;
+
+  std::cout << std::endl;
+  IndexedFile indexed_file_b = Parse(&usr_to_id, &file_db, "full_tests/index_delta/a_v1.cc", {});
+  std::cout << indexed_file_b.ToString() << std::endl;
+
+  // TODO: We don't need to do ID remapping when computting a diff. Well, we need to do it for the IndexUpdate.
+  IdMap dest_ids(2);
+  IndexUpdate update = ComputeDiff(&dest_ids, indexed_file_a, indexed_file_b);
+
+  return 0;
+}
+
+
+
+
+// TODO: Idea: when indexing and joining to the main db, allow many dbs that
+//             are joined to. So that way even if the main db is busy we can
+//             still be joining. Joining the partially joined db to the main
+//             db should be faster since we will have larger data lanes to use.
 // TODO: I think we can run libclang multiple times in one process. So we might
 //       only need two processes. Still, for perf reasons it would be good if
-//       we could stay in one process.
+//       we could stay in one process. We could probably just use shared
+//       memory. May want to run libclang in separate process to protect from
+//       crashes/issues there.
 // TODO: allow user to store configuration as json? file in home dir; also
 //       allow local overrides (scan up dirs)
 // TODO: add opt to dump config when starting (--dump-config)
-// TODO: allow user to decide some indexer choices, ie, do we define
-// TODO: may want to run indexer in separate process to avoid indexer/compiler crashes?
-
-std::unordered_map<std::string, std::string> ParseOptions(int argc, char** argv) {
-  std::unordered_map<std::string, std::string> output;
-
-  std::string previous_arg;
-
-  for (int i = 1; i < argc; ++i) {
-    std::string arg = argv[i];
-
-    if (arg[0] != '-') {
-      if (previous_arg.size() == 0) {
-        std::cerr << "Invalid arguments; switches must start with -" << std::endl;
-        exit(1);
-      }
-
-      output[previous_arg] = arg;
-      previous_arg = "";
-    }
-    else {
-      output[arg] = "";
-      previous_arg = arg;
-    }
-  }
-
-  return output;
-}
-
-bool HasOption(const std::unordered_map<std::string, std::string>& options, const std::string& option) {
-  return options.find(option) != options.end();
-}
-
-int main2(int argc, char** argv) {
-  std::unordered_map<std::string, std::string> options = ParseOptions(argc, argv);
-
-  if (argc == 1 || options.find("--help") != options.end()) {
-    std::cout << R"help(clang-indexer help:
-
-  General:
-    --help        Print this help information.
-    --help-commands
-                  Print all available query commands.
-    --project     Path to compile_commands.json. Needed for the server, and
-                  optionally by clients if there are multiple servers running.
-    --print-config
-                  Emit all configuration data this executable is using.
-    
-
-  Server:
-    --server      If present, this binary will run in server mode. The binary
-                  will not return until killed or an exit is requested. The
-                  server computes and caches an index of the entire program
-                  which is then queried by short-lived client processes. A
-                  client is created by running this binary with a --command
-                  flag.
-    --cache-dir   Directory to cache the index and other useful information. If
-                  a previous cache is present, the database will try to reuse
-                  it. If this flag is not present, the database will be
-                  in-memory only.
-    --threads     Number of threads to use for indexing and querying tasks.
-                  This value is optional; a good estimate is computed by
-                  default.
-
-                  
-  Client:
-    --command     Execute a query command against the index. See
-                  --command-help for a listing of valid commands and a
-                  description of what they do. Presence of this flag indicates
-                  that the indexer is in client mode; this flag is mutually
-                  exclusive with --server.
-    --location    Location of the query. Some commands require only a file,
-                  other require a line and column as well. Format is
-                  filename[:line:column]. For example, "foobar.cc" and
-                  "foobar.cc:1:10" are valid inputs.
-    --preferred-symbol-location
-                  When looking up symbols, try to return either the
-                  'declaration' or the 'definition'. Defaults to 'definition'.
-)help";
-    exit(0);
-  }
-
-  if (HasOption(options, "--help-commands")) {
-    std::cout << R"(Available commands:
-
-  callees:
-  callers:
-    Emit all functions (with location) that this function calls ("callees") or
-    that call this function ("callers"). Requires a location.
-
-  find-all-usages:
-    Emit every usage of the given symbol. This is intended to support a rename
-    refactoring. This output contains many uninteresting usages of symbols;
-    prefer find-interesting-usges. Requires a location.
-
-  find-interesting-usages:
-    Emit only usages of the given symbol which are semantically interesting.
-    Requires a location.
-
-  goto-referenced:
-    Find an associated reference (either definition or declaration) for the
-    given symbol. Requires a location.
-
-  hierarchy:
-    List the type hierarchy (ie, inherited and derived members) for the given
-    method or type. Requires a location.
-
-  outline:
-    Emit a file outline, listing all of the symbols in the file.
-
-  search:
-    Search for a symbol by name.
-)";
-    exit(0);
-  }
-
-  if (HasOption(options, "--project")) {
-    std::vector<CompilationEntry> entries = LoadCompilationEntriesFromDirectory(options["--project"]);
-
-
-    std::vector<IndexedFile> dbs;
-    for (const CompilationEntry& entry : entries) {
-      std::cout << "Parsing " << entry.filename << std::endl;
-      IndexedFile db = Parse(entry.filename, entry.args);
-
-      dbs.emplace_back(db);
-      std::cout << db.ToString() << std::endl << std::endl;
-    }
-
-    std::cin.get();
-    exit(0);
-  }
-
-  if (HasOption(options, "--command")) {
-    Command command;
-    if (!ParseCommand(options["--command"], &command))
-      Fail("Unknown command \"" + options["--command"] + "\"; see --help-commands");
-
-
-  }
-
-  std::cout << "Invalid arguments. Try --help.";
-  exit(1);
-  return 0;
-}
+// TODO: allow user to decide some indexer choices, ie, do we mark prototype parameters as usages?
--- a/serializer.cc
+++ b/serializer.cc
@ -2,110 +2,6 @@

 #include "indexer.h"

-#if false
-template<typename T>
-void Emit(Reader& a, const char* key, T& v) {
-  static_assert(false); // Must be specialized.
-}
-template<typename T>
-void Emit(Writer& a, const char* key, T& v) {
-  static_assert(false); // Must be specialized.
-}
-
-template<>
-void Emit(Reader& r, const char* key, int& v) {
-  v = r[key].GetInt();
-}
-
-template<>
-void Emit(Writer& w, const char* key, int &v) {
-  w.Key(key);
-  w.Int(v);
-}
-
-void StartObject(Reader& r) {}
-void StartObject(Writer& w) {
-  w.StartObject();
-}
-
-void EndObject(Reader& r) {}
-void EndObject(Writer& w) {
-  w.EndObject();
-}
-
-void StartArray(Reader& r) {}
-void StartArray(Writer& w) {
-  w.StartArray();
-}
-
-void EndArray(Reader& r) {}
-void EndArray(Writer& w) {
-  w.EndArray();
-}
-
-struct Object {
-  //Location l;
-  int a = 0, b = 0, c = 0;
-};
-
-/*
-void EmitKey(Reader& r, const char* key) {
-  w.Key(key);
-}
-void EmitKey(Writer& w, const char* key) {
-  w = w[key];
-}
-*/
-
-template<typename S>
-void Serialize(S& stream, Object& obj) {
-  StartObject(stream);
-  Emit(stream, "a", obj.a);
-  Emit(stream, "b", obj.b);
-  Emit(stream, "b", obj.c);
-  EndObject(stream);
-}
-
-/*
-template <typename C, typename T>
-C& operator&(C& stream, T& t) {
-t.serialize(stream);
-}
-*/
-
-int main(int argc, char** argv) {
-
-  rapidjson::StringBuffer output;
-  rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(output);
-  writer.SetFormatOptions(
-    rapidjson::PrettyFormatOptions::kFormatSingleLineArray);
-  writer.SetIndent(' ', 2);
-
-  Object foo;
-  foo.a = 10;
-  Serialize(writer, foo);
-  std::cout << output.GetString() << std::endl;
-
-  std::cout << "----" << std::endl;
-
-  rapidjson::Document doc;
-  //doc = doc["foo"];
-  doc.Parse(output.GetString());
-  Object foo2;
-  Serialize(doc, foo2);
-
-  std::cin.get();
-  //Reader r;
-  //foo.Serialize(r);
-
-  return 0;
-}
-#endif
-
-
-
-
-



@ -134,26 +30,26 @@ void Serialize(Writer& writer, const char* key, const std::vector<Location>& loc
 }

 template<typename T>
-void Serialize(Writer& writer, const char* key, LocalId<T> id) {
+void Serialize(Writer& writer, const char* key, Id<T> id) {
  if (key) writer.Key(key);
-  writer.Uint64(id.local_id);
+  writer.Uint64(id.id);
 }

 template<typename T>
-void Serialize(Writer& writer, const char* key, optional<LocalId<T>> id) {
+void Serialize(Writer& writer, const char* key, optional<Id<T>> id) {
  if (id) {
    Serialize(writer, key, id.value());
  }
 }

 template<typename T>
-void Serialize(Writer& writer, const char* key, const std::vector<LocalId<T>>& ids) {
+void Serialize(Writer& writer, const char* key, const std::vector<Id<T>>& ids) {
  if (ids.size() == 0)
    return;

  if (key) writer.Key(key);
  writer.StartArray();
-  for (LocalId<T> id : ids)
+  for (Id<T> id : ids)
    Serialize(writer, nullptr, id);
  writer.EndArray();
 }
@ -161,7 +57,7 @@ void Serialize(Writer& writer, const char* key, const std::vector<LocalId<T>>& i
 template<typename T>
 void Serialize(Writer& writer, const char* key, Ref<T> ref) {
  if (key) writer.Key(key);
-  std::string s = std::to_string(ref.id.local_id) + "@" + ref.loc.ToString();
+  std::string s = std::to_string(ref.id.id) + "@" + ref.loc.ToString();
  writer.String(s.c_str());
 }

@ -191,8 +87,8 @@ void Serialize(Writer& writer, const char* key, uint64_t value) {
 }

 void Serialize(Writer& writer, IndexedFile* file) {
-  auto it = file->usr_to_type_id.find("");
-  if (it != file->usr_to_type_id.end()) {
+  auto it = file->usr_to_id->usr_to_type_id.find("");
+  if (it != file->usr_to_id->usr_to_type_id.end()) {
    file->Resolve(it->second)->def.short_name = "<fundamental>";
    assert(file->Resolve(it->second)->uses.size() == 0);
  }