From 14a213d4075eb033af31dfcfaa7cbde63c46e0b3 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 12 Jan 2018 22:13:08 -0800
Subject: [PATCH] Change std::string USR to uint64_t pseudorandom function
 (SipHash 64-bit)

---
 src/clang_cursor.cc                       |  26 ++++
 src/clang_cursor.h                        |   7 +
 src/import_pipeline.cc                    |   4 +-
 src/indexer.cc                            | 108 +++++++-------
 src/indexer.h                             |  31 ++--
 src/messages/cquery_call_tree.cc          |   8 +-
 src/messages/text_document_code_action.cc |   2 +-
 src/query.cc                              |  42 +++---
 src/query.h                               |  12 +-
 src/serializer.cc                         |   3 +-
 third_party/siphash.c                     | 165 ++++++++++++++++++++++
 wscript                                   |   7 +-
 12 files changed, 306 insertions(+), 109 deletions(-)
 create mode 100644 third_party/siphash.c
diff --git a/src/clang_cursor.cc b/src/clang_cursor.cc
index af70888e..1e82bd30 100644
--- a/src/clang_cursor.cc
+++ b/src/clang_cursor.cc
@@ -2,6 +2,8 @@
 
 #include "clang_utils.h"
 
+#include <string.h>
+
 #include <algorithm>
 #include <cassert>
 
@@ -19,6 +21,19 @@ Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) {
                Position((int16_t)end_line, (int16_t)end_column) /*end*/);
 }
 
+uint64_t HashUSR(const char* usr) {
+  extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+                     uint8_t *out, const size_t outlen);
+  union {
+    uint64_t ret;
+    uint8_t out[8];
+  };
+  const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52,
+                         0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d};
+  (void)siphash(reinterpret_cast<const uint8_t*>(usr), strlen(usr), k, out, 8);
+  return ret;
+}
+
 // TODO Place this global variable into config
 int g_enable_comments;
 
@@ -45,6 +60,10 @@ std::string ClangType::get_usr() const {
   return ClangCursor(clang_getTypeDeclaration(cx_type)).get_usr();
 }
 
+USR ClangType::get_usr_hash() const {
+  return ClangCursor(clang_getTypeDeclaration(cx_type)).get_usr_hash();
+}
+
 ClangType ClangType::get_canonical() const {
   return clang_getCanonicalType(cx_type);
 }
@@ -153,6 +172,13 @@ std::string ClangCursor::get_usr() const {
   return ::ToString(clang_getCursorUSR(cx_cursor));
 }
 
+USR ClangCursor::get_usr_hash() const {
+  CXString usr = clang_getCursorUSR(cx_cursor);
+  USR ret = HashUSR(clang_getCString(usr));
+  clang_disposeString(usr);
+  return ret;
+}
+
 bool ClangCursor::is_definition() const {
   return clang_isCursorDefinition(cx_cursor);
 }
diff --git a/src/clang_cursor.h b/src/clang_cursor.h
index c21531dd..78f10cd7 100644
--- a/src/clang_cursor.h
+++ b/src/clang_cursor.h
@@ -5,12 +5,17 @@
 #include <clang-c/Index.h>
 #include <optional.h>
 
+#include <array>
 #include <string>
 #include <vector>
 
+using USR = uint64_t;
+
 Range ResolveCXSourceRange(const CXSourceRange& range,
                            CXFile* cx_file = nullptr);
 
+USR HashUSR(const char* usr);
+
 class ClangType {
  public:
   ClangType();
@@ -24,6 +29,7 @@ class ClangType {
   // ClangCursor is not defined so we have to return CXCursor
   CXCursor get_declaration() const;
   std::string get_usr() const;
+  USR get_usr_hash() const;
   std::string get_spelling() const;
   ClangType get_canonical() const;
 
@@ -54,6 +60,7 @@ class ClangCursor {
   Range get_extent() const;
   std::string get_display_name() const;
   std::string get_usr() const;
+  USR get_usr_hash() const;
 
   bool is_definition() const;
 
diff --git a/src/import_pipeline.cc b/src/import_pipeline.cc
index 56013c31..d0d3e6c8 100644
--- a/src/import_pipeline.cc
+++ b/src/import_pipeline.cc
@@ -1,5 +1,6 @@
 #include "import_pipeline.h"
 
+#include "clang_cursor.h"  // HashUSR
 #include "cache_manager.h"
 #include "config.h"
 #include "iindexer.h"
@@ -621,8 +622,7 @@ bool QueryDb_ImportMain(Config* config,
       WorkingFile* working_file =
           working_files->GetFileByFilename(updated_file.path);
       if (working_file) {
-        QueryFileId file_id =
-            db->usr_to_file[LowerPathIfCaseInsensitive(working_file->filename)];
+        QueryFileId file_id = db->usr_to_file[LowerPathIfCaseInsensitive(working_file->filename)];
         QueryFile* file = &db->files[file_id.id];
         EmitSemanticHighlighting(db, semantic_cache, working_file, file);
       }
diff --git a/src/indexer.cc b/src/indexer.cc
index b59d1fd5..caa52737 100644
--- a/src/indexer.cc
+++ b/src/indexer.cc
@@ -111,12 +111,11 @@ ClangSymbolKind GetSymbolKind(CXIdxEntityKind kind) {
 // to export. If we do not capture the parameter type description for the
 // constructor we will not be able to attribute the constructor call correctly.
 struct ConstructorCache {
-  using Usr = std::string;
   struct Constructor {
-    Usr usr;
+    USR usr;
     std::vector<std::string> param_type_desc;
   };
-  std::unordered_map<Usr, std::vector<Constructor>> constructors_;
+  std::unordered_map<USR, std::vector<Constructor>> constructors_;
 
   // This should be called whenever there is a constructor declaration.
   void NotifyConstructor(ClangCursor ctor_cursor) {
@@ -129,22 +128,22 @@ struct ConstructorCache {
       return type_desc;
     };
 
-    Constructor ctor{ctor_cursor.get_usr(), build_type_desc(ctor_cursor)};
+    Constructor ctor{ctor_cursor.get_usr_hash(), build_type_desc(ctor_cursor)};
 
     // Insert into |constructors_|.
-    std::string type_usr = ctor_cursor.get_semantic_parent().get_usr();
-    auto existing_ctors = constructors_.find(type_usr);
+    auto type_usr_hash = ctor_cursor.get_semantic_parent().get_usr_hash();
+    auto existing_ctors = constructors_.find(type_usr_hash);
     if (existing_ctors != constructors_.end()) {
       existing_ctors->second.push_back(ctor);
     } else {
-      constructors_[type_usr] = {ctor};
+      constructors_[type_usr_hash] = {ctor};
     }
   }
 
   // Tries to lookup a constructor in |type_usr| that takes arguments most
   // closely aligned to |param_type_desc|.
-  optional<std::string> TryFindConstructorUsr(
-      const std::string& type_usr,
+  optional<USR> TryFindConstructorUsr(
+      USR type_usr,
       const std::vector<std::string>& param_type_desc) {
     auto count_matching_prefix_length = [](const char* a, const char* b) {
       int matched = 0;
@@ -171,7 +170,7 @@ struct ConstructorCache {
     if (ctors.empty())
       return nullopt;
 
-    std::string best_usr;
+    USR best_usr;
     int best_score = INT_MIN;
 
     // Scan constructors for the best possible match.
@@ -192,7 +191,7 @@ struct ConstructorCache {
       // Do prefix-based match on parameter type description. This works well in
       // practice because clang appends qualifiers to the end of the type, ie,
       // |foo *&&|
-      for (int i = 0;
+      for (size_t i = 0;
            i < std::min(param_type_desc.size(), ctor.param_type_desc.size());
            ++i) {
         score += count_matching_prefix_length(param_type_desc[i].c_str(),
@@ -205,7 +204,6 @@ struct ConstructorCache {
       }
     }
 
-    assert(!best_usr.empty());
     return best_usr;
   }
 };
@@ -397,9 +395,10 @@ optional<IndexTypeId> ResolveToDeclarationType(IndexFile* db,
                                                ClangCursor cursor) {
   ClangCursor declaration = cursor.get_declaration();
   declaration = declaration.template_specialization_to_template_definition();
+  // TODO optimize
   std::string usr = declaration.get_usr();
-  if (usr != "")
-    return db->ToTypeId(usr);
+  if (usr.size())
+    return db->ToTypeId(declaration.get_usr_hash());
   return nullopt;
 }
 
@@ -472,7 +471,7 @@ void OnIndexReference_Function(IndexFile* db,
 }  // namespace
 
 // static
-int IndexFile::kCurrentVersion = 8;
+int IndexFile::kCurrentVersion = 9;
 
 IndexFile::IndexFile(const std::string& path,
                      const optional<std::string>& contents)
@@ -488,7 +487,7 @@ IndexFile::IndexFile(const std::string& path,
 }
 
 // TODO: Optimize for const char*?
-IndexTypeId IndexFile::ToTypeId(const std::string& usr) {
+IndexTypeId IndexFile::ToTypeId(USR usr) {
   auto it = id_cache.usr_to_type_id.find(usr);
   if (it != id_cache.usr_to_type_id.end())
     return it->second;
@@ -499,7 +498,7 @@ IndexTypeId IndexFile::ToTypeId(const std::string& usr) {
   id_cache.type_id_to_usr[id] = usr;
   return id;
 }
-IndexFuncId IndexFile::ToFuncId(const std::string& usr) {
+IndexFuncId IndexFile::ToFuncId(USR usr) {
   auto it = id_cache.usr_to_func_id.find(usr);
   if (it != id_cache.usr_to_func_id.end())
     return it->second;
@@ -510,7 +509,7 @@ IndexFuncId IndexFile::ToFuncId(const std::string& usr) {
   id_cache.func_id_to_usr[id] = usr;
   return id;
 }
-IndexVarId IndexFile::ToVarId(const std::string& usr) {
+IndexVarId IndexFile::ToVarId(USR usr) {
   auto it = id_cache.usr_to_var_id.find(usr);
   if (it != id_cache.usr_to_var_id.end())
     return it->second;
@@ -523,15 +522,15 @@ IndexVarId IndexFile::ToVarId(const std::string& usr) {
 }
 
 IndexTypeId IndexFile::ToTypeId(const CXCursor& cursor) {
-  return ToTypeId(ClangCursor(cursor).get_usr());
+  return ToTypeId(ClangCursor(cursor).get_usr_hash());
 }
 
 IndexFuncId IndexFile::ToFuncId(const CXCursor& cursor) {
-  return ToFuncId(ClangCursor(cursor).get_usr());
+  return ToFuncId(ClangCursor(cursor).get_usr_hash());
 }
 
 IndexVarId IndexFile::ToVarId(const CXCursor& cursor) {
-  return ToVarId(ClangCursor(cursor).get_usr());
+  return ToVarId(ClangCursor(cursor).get_usr_hash());
 }
 
 IndexType* IndexFile::Resolve(IndexTypeId id) {
@@ -548,10 +547,7 @@ std::string IndexFile::ToString() {
   return Serialize(SerializeFormat::Json, *this);
 }
 
-IndexType::IndexType(IndexTypeId id, const std::string& usr)
-    : usr(usr), id(id) {
-  assert(usr.size() > 0);
-}
+IndexType::IndexType(IndexTypeId id, USR usr) : usr(usr), id(id) {}
 
 void RemoveItem(std::vector<Range>& ranges, Range to_remove) {
   auto it = std::find(ranges.begin(), ranges.end(), to_remove);
@@ -741,14 +737,12 @@ void VisitDeclForTypeUsageVisitorHandler(ClangCursor cursor,
   IndexFile* db = param->db;
 
   std::string referenced_usr =
-      cursor.get_referenced()
-          .template_specialization_to_template_definition()
-          .get_usr();
+      cursor.get_referenced().template_specialization_to_template_definition().get_usr();
   // TODO: things in STL cause this to be empty. Figure out why and document it.
   if (referenced_usr == "")
     return;
 
-  IndexTypeId ref_type_id = db->ToTypeId(referenced_usr);
+  IndexTypeId ref_type_id = db->ToTypeId(HashUSR(referenced_usr.c_str()));
 
   if (!param->initial_type)
     param->initial_type = ref_type_id;
@@ -960,19 +954,18 @@ ClangCursor::VisitResult AddDeclInitializerUsagesVisitor(ClangCursor cursor,
       // different USR.
 
       // ClangCursor ref =
-      // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr();
+      // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr_hash();
       // std::string ref_usr =
-      // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr();
-      std::string ref_usr =
-          cursor.get_referenced()
-              .template_specialization_to_template_definition()
-              .get_usr();
-      // std::string ref_usr = ref.get_usr();
+      // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr_hash();
+      auto ref_usr = cursor.get_referenced()
+                         .template_specialization_to_template_definition()
+                         .get_usr();
+      // std::string ref_usr = ref.get_usr_hash();
       if (ref_usr == "")
         break;
 
       Range loc = cursor.get_spelling_range();
-      IndexVarId ref_id = db->ToVarId(ref_usr);
+      IndexVarId ref_id = db->ToVarId(HashUSR(ref_usr.c_str()));
       IndexVar* ref_def = db->Resolve(ref_id);
       UniqueAdd(ref_def->uses, loc);
       break;
@@ -1018,11 +1011,11 @@ ClangCursor::VisitResult VisitMacroDefinitionAndExpansions(ClangCursor cursor,
       // only real difference will be that we show 'callers' instead of 'refs'
       // (especially since macros cannot have overrides)
 
-      std::string decl_usr;
+      USR decl_usr;
       if (cursor.get_kind() == CXCursor_MacroDefinition)
-        decl_usr = cursor.get_usr();
+        decl_usr = cursor.get_usr_hash();
       else
-        decl_usr = cursor.get_referenced().get_usr();
+        decl_usr = cursor.get_referenced().get_usr_hash();
 
       IndexVarId var_id = db->ToVarId(decl_usr);
       IndexVar* var_def = db->Resolve(var_id);
@@ -1069,7 +1062,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
     case CXCursor_DeclRefExpr: {
       ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor);
       if (ref_cursor.get_kind() == CXCursor_NonTypeTemplateParameter) {
-        IndexVar* ref_index = db->Resolve(db->ToVarId(ref_cursor.get_usr()));
+        IndexVar* ref_index = db->Resolve(db->ToVarId(ref_cursor.get_usr_hash()));
         if (ref_index->def.short_name.empty()) {
           ref_index->def.definition_spelling = ref_cursor.get_spelling_range();
           ref_index->def.definition_extent = ref_cursor.get_extent();
@@ -1078,9 +1071,10 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
           ref_index->uses.push_back(ref_cursor.get_spelling_range());
 
           ClangType ref_type = clang_getCursorType(ref_cursor.cx_cursor);
+          // TODO optimize
           if (ref_type.get_usr().size()) {
             IndexType* ref_type_index =
-                db->Resolve(db->ToTypeId(ref_type.get_usr()));
+                db->Resolve(db->ToTypeId(ref_type.get_usr_hash()));
             // The cursor extent includes `type name`, not just `name`. There
             // seems no way to extract the spelling range of `type` and we do
             // not want to do subtraction here.
@@ -1101,8 +1095,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
             break;
           case CXCursor_FunctionDecl:
           case CXCursor_FunctionTemplate: {
-            std::string ref_usr = overloaded.get_usr();
-            IndexFuncId called_id = db->ToFuncId(ref_usr);
+            IndexFuncId called_id = db->ToFuncId(overloaded.get_usr_hash());
             IndexFunc* called = db->Resolve(called_id);
             OnIndexReference_Function(db, cursor.get_spelling_range(),
                                       data->container, called_id, called,
@@ -1116,7 +1109,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
     case CXCursor_TemplateRef: {
       ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor);
       if (ref_cursor.get_kind() == CXCursor_TemplateTemplateParameter) {
-        IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr()));
+        IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash()));
         // TODO It seems difficult to get references to template template
         // parameters.
         // CXCursor_TemplateTemplateParameter can be visited by visiting
@@ -1136,7 +1129,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
     case CXCursor_TypeRef: {
       ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor);
       if (ref_cursor.get_kind() == CXCursor_TemplateTypeParameter) {
-        IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr()));
+        IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash()));
         // TODO It seems difficult to get a FunctionTemplate's template
         // parameters.
         // CXCursor_TemplateTypeParameter can be visited by visiting
@@ -1271,9 +1264,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
           decl_cursor.template_specialization_to_template_definition())
         break;
 
-      std::string decl_usr = decl_cursor.get_usr();
-
-      IndexVarId var_id = db->ToVarId(decl->entityInfo->USR);
+      IndexVarId var_id = db->ToVarId(HashUSR(decl->entityInfo->USR));
       IndexVar* var = db->Resolve(var_id);
 
       // TODO: Eventually run with this if. Right now I want to iron out bugs
@@ -1471,7 +1462,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
             ClangCursor parent =
                 ClangCursor(overridden[i])
                     .template_specialization_to_template_definition();
-            IndexFuncId parent_id = db->ToFuncId(parent.get_usr());
+            IndexFuncId parent_id = db->ToFuncId(parent.get_usr_hash());
             IndexFunc* parent_def = db->Resolve(parent_id);
             func = db->Resolve(func_id);  // ToFuncId invalidated func_def
 
@@ -1493,7 +1484,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
       optional<IndexTypeId> alias_of = AddDeclTypeUsages(
           db, decl->cursor, decl->semanticContainer, decl->lexicalContainer);
 
-      IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
+      IndexTypeId type_id = db->ToTypeId(HashUSR(decl->entityInfo->USR));
       IndexType* type = db->Resolve(type_id);
 
       if (alias_of)
@@ -1545,7 +1536,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
       ClangCursor decl_cursor = decl->cursor;
       Range decl_loc_spelling = decl_cursor.get_spelling_range();
 
-      IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR);
+      IndexTypeId type_id = db->ToTypeId(HashUSR(decl->entityInfo->USR));
       IndexType* type = db->Resolve(type_id);
 
       // TODO: Eventually run with this if. Right now I want to iron out bugs
@@ -1632,7 +1623,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
                   << std::endl;
       if (decl->lexicalContainer)
         std::cerr << "     lexicalContainer  = "
-                  << ClangCursor(decl->lexicalContainer->cursor).get_usr()
+                  << ClangCursor(decl->lexicalContainer->cursor).get_usr_hash()
                   << std::endl;
       break;
   }
@@ -1688,7 +1679,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
       ClangCursor referenced = ref->referencedEntity->cursor;
       referenced = referenced.template_specialization_to_template_definition();
 
-      IndexVarId var_id = db->ToVarId(referenced.get_usr());
+      IndexVarId var_id = db->ToVarId(referenced.get_usr_hash());
       IndexVar* var = db->Resolve(var_id);
       // Lambda paramaters are not processed by OnIndexDeclaration and
       // may not have a short_name yet. Note that we only process the lambda
@@ -1735,7 +1726,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
       ClangCursor ref_cursor(ref->cursor);
       Range loc = ref_cursor.get_spelling_range();
 
-      IndexFuncId called_id = db->ToFuncId(ref->referencedEntity->USR);
+      IndexFuncId called_id = db->ToFuncId(HashUSR(ref->referencedEntity->USR));
       IndexFunc* called = db->Resolve(called_id);
 
       // libclang doesn't provide a nice api to check if the given function
@@ -1786,8 +1777,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
         // the constructor function we add a usage to.
         optional<ClangCursor> opt_found_type = FindType(ref->cursor);
         if (opt_found_type) {
-          std::string ctor_type_usr =
-              opt_found_type->get_referenced().get_usr();
+          USR ctor_type_usr = opt_found_type->get_referenced().get_usr_hash();
           ClangCursor call_cursor = ref->cursor;
 
           // Build a type description from the parameters of the call, so we
@@ -1800,7 +1790,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
           }
 
           // Try to find the constructor and add a reference.
-          optional<std::string> ctor_usr =
+          optional<USR> ctor_usr =
               param->ctors.TryFindConstructorUsr(ctor_type_usr, call_type_desc);
           if (ctor_usr) {
             IndexFunc* ctor = db->Resolve(db->ToFuncId(*ctor_usr));
@@ -1823,7 +1813,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) {
     case CXIdxEntity_CXXClass: {
       ClangCursor ref_cursor = ref->referencedEntity->cursor;
       ref_cursor = ref_cursor.template_specialization_to_template_definition();
-      IndexType* referenced = db->Resolve(db->ToTypeId(ref_cursor.get_usr()));
+      IndexType* referenced = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash()));
 
       //
       // The following will generate two TypeRefs to Foo, both located at the
diff --git a/src/indexer.h b/src/indexer.h
index 9ecdebee..7eac4052 100644
--- a/src/indexer.h
+++ b/src/indexer.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "clang_cursor.h"
 #include "clang_index.h"
 #include "clang_translation_unit.h"
 #include "clang_utils.h"
@@ -259,7 +260,7 @@ struct IndexType {
   using Def =
       TypeDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
 
-  std::string usr;
+  USR usr;
   IndexTypeId id;
 
   Def def;
@@ -275,7 +276,7 @@ struct IndexType {
   std::vector<Range> uses;
 
   IndexType() {}  // For serialization.
-  IndexType(IndexTypeId id, const std::string& usr);
+  IndexType(IndexTypeId id, USR usr);
 
   bool operator<(const IndexType& other) const { return id < other.id; }
 };
@@ -361,7 +362,7 @@ struct IndexFunc {
                                     IndexFuncRef,
                                     Range>;
 
-  std::string usr;
+  USR usr;
   IndexFuncId id;
 
   Def def;
@@ -391,7 +392,7 @@ struct IndexFunc {
   std::vector<IndexFuncRef> callers;
 
   IndexFunc() {}  // For serialization.
-  IndexFunc(IndexFuncId id, const std::string& usr) : usr(usr), id(id) {
+  IndexFunc(IndexFuncId id, USR usr) : usr(usr), id(id) {
     // assert(usr.size() > 0);
   }
 
@@ -468,7 +469,7 @@ void Reflect(TVisitor& visitor,
 struct IndexVar {
   using Def = VarDefDefinitionData<IndexTypeId, IndexFuncId, IndexVarId, Range>;
 
-  std::string usr;
+  USR usr;
   IndexVarId id;
 
   Def def;
@@ -477,7 +478,7 @@ struct IndexVar {
   std::vector<Range> uses;
 
   IndexVar() {}  // For serialization.
-  IndexVar(IndexVarId id, const std::string& usr) : usr(usr), id(id) {
+  IndexVar(IndexVarId id, USR usr) : usr(usr), id(id) {
     // assert(usr.size() > 0);
   }
 
@@ -487,12 +488,12 @@ MAKE_HASHABLE(IndexVar, t.id);
 
 struct IdCache {
   std::string primary_file;
-  std::unordered_map<std::string, IndexTypeId> usr_to_type_id;
-  std::unordered_map<std::string, IndexFuncId> usr_to_func_id;
-  std::unordered_map<std::string, IndexVarId> usr_to_var_id;
-  std::unordered_map<IndexTypeId, std::string> type_id_to_usr;
-  std::unordered_map<IndexFuncId, std::string> func_id_to_usr;
-  std::unordered_map<IndexVarId, std::string> var_id_to_usr;
+  std::unordered_map<USR, IndexTypeId> usr_to_type_id;
+  std::unordered_map<USR, IndexFuncId> usr_to_func_id;
+  std::unordered_map<USR, IndexVarId> usr_to_var_id;
+  std::unordered_map<IndexTypeId, USR> type_id_to_usr;
+  std::unordered_map<IndexFuncId, USR> func_id_to_usr;
+  std::unordered_map<IndexVarId, USR> var_id_to_usr;
 
   IdCache(const std::string& primary_file);
 };
@@ -544,9 +545,9 @@ struct IndexFile {
 
   IndexFile(const std::string& path, const optional<std::string>& contents);
 
-  IndexTypeId ToTypeId(const std::string& usr);
-  IndexFuncId ToFuncId(const std::string& usr);
-  IndexVarId ToVarId(const std::string& usr);
+  IndexTypeId ToTypeId(USR usr);
+  IndexFuncId ToFuncId(USR usr);
+  IndexVarId ToVarId(USR usr);
   IndexTypeId ToTypeId(const CXCursor& usr);
   IndexFuncId ToFuncId(const CXCursor& usr);
   IndexVarId ToVarId(const CXCursor& usr);
diff --git a/src/messages/cquery_call_tree.cc b/src/messages/cquery_call_tree.cc
index 914f9d35..d8e8d694 100644
--- a/src/messages/cquery_call_tree.cc
+++ b/src/messages/cquery_call_tree.cc
@@ -4,6 +4,7 @@
 
 #include <loguru.hpp>
 
+// FIXME Interop with VSCode, change std::string usr to Usr (uint64_t)
 namespace {
 struct Ipc_CqueryCallTreeInitial
     : public IpcMessage<Ipc_CqueryCallTreeInitial> {
@@ -62,7 +63,7 @@ std::vector<Out_CqueryCallTree::CallEntry> BuildInitialCallTree(
 
   Out_CqueryCallTree::CallEntry entry;
   entry.name = root_func.def->short_name;
-  entry.usr = root_func.usr;
+  entry.usr = std::to_string(root_func.usr);
   entry.location = *def_loc;
   entry.hasCallers = HasCallersOnSelfOrBaseOrDerived(db, root_func);
   std::vector<Out_CqueryCallTree::CallEntry> result;
@@ -113,7 +114,7 @@ std::vector<Out_CqueryCallTree::CallEntry> BuildExpandCallTree(
 
       Out_CqueryCallTree::CallEntry call_entry;
       call_entry.name = call_func.def->short_name;
-      call_entry.usr = call_func.usr;
+      call_entry.usr = std::to_string(call_func.usr);
       call_entry.location = *call_location;
       call_entry.hasCallers = HasCallersOnSelfOrBaseOrDerived(db, call_func);
       call_entry.callType = call_type;
@@ -188,7 +189,8 @@ struct CqueryCallTreeExpandHandler
     Out_CqueryCallTree out;
     out.id = request->id;
 
-    auto func_id = db->usr_to_func.find(request->params.usr);
+    // FIXME
+    auto func_id = db->usr_to_func.find(std::stoull(request->params.usr));
     if (func_id != db->usr_to_func.end())
       out.result = BuildExpandCallTree(db, working_files, func_id->second);
 
diff --git a/src/messages/text_document_code_action.cc b/src/messages/text_document_code_action.cc
index 206020ba..57a386cc 100644
--- a/src/messages/text_document_code_action.cc
+++ b/src/messages/text_document_code_action.cc
@@ -104,7 +104,7 @@ optional<QueryFileId> GetImplementationFile(QueryDatabase* db,
   LOG_S(INFO) << "!! Looking for impl file that starts with " << target_path;
 
   for (auto& entry : db->usr_to_file) {
-    Usr path = entry.first;
+    const std::string& path = entry.first;
 
     // Do not consider header files for implementation files.
     // TODO: make file extensions configurable.
diff --git a/src/query.cc b/src/query.cc
index 320afa18..f54b324a 100644
--- a/src/query.cc
+++ b/src/query.cc
@@ -285,12 +285,13 @@ QueryFileId GetQueryFileIdFromPath(QueryDatabase* query_db,
     return QueryFileId(it->second.id);
 
   size_t idx = query_db->files.size();
-  query_db->usr_to_file[LowerPathIfCaseInsensitive(path)] = QueryFileId(idx);
+  query_db->usr_to_file[LowerPathIfCaseInsensitive(path)] =
+      QueryFileId(idx);
   query_db->files.push_back(QueryFile(path));
   return QueryFileId(idx);
 }
 
-QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, const Usr& usr) {
+QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, USR usr) {
   auto it = query_db->usr_to_type.find(usr);
   if (it != query_db->usr_to_type.end())
     return QueryTypeId(it->second.id);
@@ -301,7 +302,7 @@ QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, const Usr& usr) {
   return QueryTypeId(idx);
 }
 
-QueryFuncId GetQueryFuncIdFromUsr(QueryDatabase* query_db, const Usr& usr) {
+QueryFuncId GetQueryFuncIdFromUsr(QueryDatabase* query_db, USR usr) {
   auto it = query_db->usr_to_func.find(usr);
   if (it != query_db->usr_to_func.end())
     return QueryFuncId(it->second.id);
@@ -713,8 +714,9 @@ void QueryDatabase::RemoveUsrs(SymbolKind usr_kind,
 
   switch (usr_kind) {
     case SymbolKind::File: {
-      for (const Usr& usr : to_remove)
-        files[usr_to_file[LowerPathIfCaseInsensitive(usr)].id].def = nullopt;
+      // FIXME
+      //for (const Usr& usr : to_remove)
+      //  files[usr_to_file[usr].id].def = nullopt;
       break;
     }
     case SymbolKind::Type: {
@@ -896,30 +898,30 @@ TEST_SUITE("query") {
     IndexFile previous("foo.cc", nullopt);
     IndexFile current("foo.cc", nullopt);
 
-    previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling =
+    previous.Resolve(previous.ToTypeId(HashUSR("usr1")))->def.definition_spelling =
         Range(Position(1, 0));
-    previous.Resolve(previous.ToFuncId("usr2"))->def.definition_spelling =
+    previous.Resolve(previous.ToFuncId(HashUSR("usr2")))->def.definition_spelling =
         Range(Position(2, 0));
-    previous.Resolve(previous.ToVarId("usr3"))->def.definition_spelling =
+    previous.Resolve(previous.ToVarId(HashUSR("usr3")))->def.definition_spelling =
         Range(Position(3, 0));
 
     IndexUpdate update = GetDelta(previous, current);
 
-    REQUIRE(update.types_removed == std::vector<Usr>{"usr1"});
-    REQUIRE(update.funcs_removed == std::vector<Usr>{"usr2"});
-    REQUIRE(update.vars_removed == std::vector<Usr>{"usr3"});
+    REQUIRE(update.types_removed == std::vector<Usr>{HashUSR("usr1")});
+    REQUIRE(update.funcs_removed == std::vector<Usr>{HashUSR("usr2")});
+    REQUIRE(update.vars_removed == std::vector<Usr>{HashUSR("usr3")});
   }
 
   TEST_CASE("do not remove ref-only defs") {
     IndexFile previous("foo.cc", nullopt);
     IndexFile current("foo.cc", nullopt);
 
-    previous.Resolve(previous.ToTypeId("usr1"))
+    previous.Resolve(previous.ToTypeId(HashUSR("usr1")))
         ->uses.push_back(Range(Position(1, 0)));
-    previous.Resolve(previous.ToFuncId("usr2"))
+    previous.Resolve(previous.ToFuncId(HashUSR("usr2")))
         ->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(2, 0)),
                                          false /*is_implicit*/));
-    previous.Resolve(previous.ToVarId("usr3"))
+    previous.Resolve(previous.ToVarId(HashUSR("usr3")))
         ->uses.push_back(Range(Position(3, 0)));
 
     IndexUpdate update = GetDelta(previous, current);
@@ -933,8 +935,8 @@ TEST_SUITE("query") {
     IndexFile previous("foo.cc", nullopt);
     IndexFile current("foo.cc", nullopt);
 
-    IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
-    IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));
+    IndexFunc* pf = previous.Resolve(previous.ToFuncId(HashUSR("usr")));
+    IndexFunc* cf = current.Resolve(current.ToFuncId(HashUSR("usr")));
 
     pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(1, 0)),
                                        false /*is_implicit*/));
@@ -958,8 +960,8 @@ TEST_SUITE("query") {
     IndexFile previous("foo.cc", nullopt);
     IndexFile current("foo.cc", nullopt);
 
-    IndexType* pt = previous.Resolve(previous.ToTypeId("usr"));
-    IndexType* ct = current.Resolve(current.ToTypeId("usr"));
+    IndexType* pt = previous.Resolve(previous.ToTypeId(HashUSR("usr")));
+    IndexType* ct = current.Resolve(current.ToTypeId(HashUSR("usr")));
 
     pt->uses.push_back(Range(Position(1, 0)));
     ct->uses.push_back(Range(Position(2, 0)));
@@ -979,8 +981,8 @@ TEST_SUITE("query") {
     IndexFile previous("foo.cc", nullopt);
     IndexFile current("foo.cc", nullopt);
 
-    IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr"));
-    IndexFunc* cf = current.Resolve(current.ToFuncId("usr"));
+    IndexFunc* pf = previous.Resolve(previous.ToFuncId(HashUSR("usr")));
+    IndexFunc* cf = current.Resolve(current.ToFuncId(HashUSR("usr")));
     pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(1, 0)),
                                        false /*is_implicit*/));
     pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(2, 0)),
diff --git a/src/query.h b/src/query.h
index 046c5ac4..e372a87d 100644
--- a/src/query.h
+++ b/src/query.h
@@ -7,7 +7,7 @@
 
 #include <functional>
 
-using Usr = std::string;
+using Usr = USR;
 
 struct QueryFile;
 struct QueryType;
@@ -170,10 +170,10 @@ void Reflect(TVisitor& visitor, MergeableUpdate<TId, TValue>& value) {
 
 template <typename T>
 struct WithUsr {
-  Usr usr;
+  USR usr;
   T value;
 
-  WithUsr(const Usr& usr, const T& value) : usr(usr), value(value) {}
+  WithUsr(USR usr, const T& value) : usr(usr), value(value) {}
 };
 template <typename TVisitor, typename T>
 void Reflect(TVisitor& visitor, WithUsr<T>& value) {
@@ -225,7 +225,7 @@ struct QueryType {
   using InstancesUpdate = MergeableUpdate<QueryTypeId, QueryVarId>;
   using UsesUpdate = MergeableUpdate<QueryTypeId, QueryLocation>;
 
-  Usr usr;
+  USR usr;
   optional<Def> def;
   std::vector<QueryTypeId> derived;
   std::vector<QueryVarId> instances;
@@ -246,7 +246,7 @@ struct QueryFunc {
   using DerivedUpdate = MergeableUpdate<QueryFuncId, QueryFuncId>;
   using CallersUpdate = MergeableUpdate<QueryFuncId, QueryFuncRef>;
 
-  Usr usr;
+  USR usr;
   optional<Def> def;
   std::vector<QueryLocation> declarations;
   std::vector<QueryFuncId> derived;
@@ -352,7 +352,7 @@ struct QueryDatabase {
   // Lookup symbol based on a usr.
   // NOTE: For usr_to_file make sure to call LowerPathIfCaseInsensitive on key.
   // TODO: add type wrapper to enforce we call it
-  spp::sparse_hash_map<Usr, QueryFileId> usr_to_file;
+  spp::sparse_hash_map<std::string, QueryFileId> usr_to_file;
   spp::sparse_hash_map<Usr, QueryTypeId> usr_to_type;
   spp::sparse_hash_map<Usr, QueryFuncId> usr_to_func;
   spp::sparse_hash_map<Usr, QueryVarId> usr_to_var;
diff --git a/src/serializer.cc b/src/serializer.cc
index 062adb12..d2c079f9 100644
--- a/src/serializer.cc
+++ b/src/serializer.cc
@@ -171,7 +171,8 @@ void Reflect(TVisitor& visitor, IndexVar& value) {
 
 // IndexFile
 bool ReflectMemberStart(Writer& visitor, IndexFile& value) {
-  auto it = value.id_cache.usr_to_type_id.find("");
+  // FIXME
+  auto it = value.id_cache.usr_to_type_id.find(HashUSR(""));
   if (it != value.id_cache.usr_to_type_id.end()) {
     value.Resolve(it->second)->def.short_name = "<fundamental>";
     assert(value.Resolve(it->second)->uses.size() == 0);
diff --git a/third_party/siphash.c b/third_party/siphash.c
new file mode 100644
index 00000000..d69f4b57
--- /dev/null
+++ b/third_party/siphash.c
@@ -0,0 +1,165 @@
+/*
+   SipHash reference C implementation
+
+   Copyright (c) 2012-2016 Jean-Philippe Aumasson
+   <jeanphilippe.aumasson@gmail.com>
+   Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along
+   with
+   this software. If not, see
+   <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/* default: SipHash-2-4 */
+#define cROUNDS 2
+#define dROUNDS 4
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v)                                                        \
+    (p)[0] = (uint8_t)((v));                                                   \
+    (p)[1] = (uint8_t)((v) >> 8);                                              \
+    (p)[2] = (uint8_t)((v) >> 16);                                             \
+    (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v)                                                        \
+    U32TO8_LE((p), (uint32_t)((v)));                                           \
+    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#define U8TO64_LE(p)                                                           \
+    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
+     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
+     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
+     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+
+#define SIPROUND                                                               \
+    do {                                                                       \
+        v0 += v1;                                                              \
+        v1 = ROTL(v1, 13);                                                     \
+        v1 ^= v0;                                                              \
+        v0 = ROTL(v0, 32);                                                     \
+        v2 += v3;                                                              \
+        v3 = ROTL(v3, 16);                                                     \
+        v3 ^= v2;                                                              \
+        v0 += v3;                                                              \
+        v3 = ROTL(v3, 21);                                                     \
+        v3 ^= v0;                                                              \
+        v2 += v1;                                                              \
+        v1 = ROTL(v1, 17);                                                     \
+        v1 ^= v2;                                                              \
+        v2 = ROTL(v2, 32);                                                     \
+    } while (0)
+
+#ifdef DEBUG
+#define TRACE                                                                  \
+    do {                                                                       \
+        printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32),       \
+               (uint32_t)v0);                                                  \
+        printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32),       \
+               (uint32_t)v1);                                                  \
+        printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32),       \
+               (uint32_t)v2);                                                  \
+        printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32),       \
+               (uint32_t)v3);                                                  \
+    } while (0)
+#else
+#define TRACE
+#endif
+
+int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
+            uint8_t *out, const size_t outlen) {
+
+    assert((outlen == 8) || (outlen == 16));
+    uint64_t v0 = 0x736f6d6570736575ULL;
+    uint64_t v1 = 0x646f72616e646f6dULL;
+    uint64_t v2 = 0x6c7967656e657261ULL;
+    uint64_t v3 = 0x7465646279746573ULL;
+    uint64_t k0 = U8TO64_LE(k);
+    uint64_t k1 = U8TO64_LE(k + 8);
+    uint64_t m;
+    int i;
+    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+    const int left = inlen & 7;
+    uint64_t b = ((uint64_t)inlen) << 56;
+    v3 ^= k1;
+    v2 ^= k0;
+    v1 ^= k1;
+    v0 ^= k0;
+
+    if (outlen == 16)
+        v1 ^= 0xee;
+
+    for (; in != end; in += 8) {
+        m = U8TO64_LE(in);
+        v3 ^= m;
+
+        TRACE;
+        for (i = 0; i < cROUNDS; ++i)
+            SIPROUND;
+
+        v0 ^= m;
+    }
+
+    switch (left) {
+    case 7:
+        b |= ((uint64_t)in[6]) << 48;
+    case 6:
+        b |= ((uint64_t)in[5]) << 40;
+    case 5:
+        b |= ((uint64_t)in[4]) << 32;
+    case 4:
+        b |= ((uint64_t)in[3]) << 24;
+    case 3:
+        b |= ((uint64_t)in[2]) << 16;
+    case 2:
+        b |= ((uint64_t)in[1]) << 8;
+    case 1:
+        b |= ((uint64_t)in[0]);
+        break;
+    case 0:
+        break;
+    }
+
+    v3 ^= b;
+
+    TRACE;
+    for (i = 0; i < cROUNDS; ++i)
+        SIPROUND;
+
+    v0 ^= b;
+
+    if (outlen == 16)
+        v2 ^= 0xee;
+    else
+        v2 ^= 0xff;
+
+    TRACE;
+    for (i = 0; i < dROUNDS; ++i)
+        SIPROUND;
+
+    b = v0 ^ v1 ^ v2 ^ v3;
+    U64TO8_LE(out, b);
+
+    if (outlen == 8)
+        return 0;
+
+    v1 ^= 0xdd;
+
+    TRACE;
+    for (i = 0; i < dROUNDS; ++i)
+        SIPROUND;
+
+    b = v0 ^ v1 ^ v2 ^ v3;
+    U64TO8_LE(out + 8, b);
+
+    return 0;
+}
diff --git a/wscript b/wscript
index 8fd16357..ec151612 100644
--- a/wscript
+++ b/wscript
@@ -152,7 +152,8 @@ def configure(ctx):
     if ctx.env.CXXFLAGS:
       cxxflags = ctx.env.CXXFLAGS
     else:
-      cxxflags = ['-g', '-Wall', '-Wno-sign-compare', '-Werror']
+      # FIXME Figure out how to treat siphash.c as C file so that we can remove -Wno-deprecated
+      cxxflags = ['-g', '-Wall', '-Wno-sign-compare', '-Wno-deprecated', '-Werror']
 
     if all(not x.startswith('-std=') for x in ctx.env.CXXFLAGS):
       cxxflags.append('-std=c++11')
@@ -358,10 +359,12 @@ def build(bld):
     else:
       rpath = bld.env['LIBPATH_clang']
 
+  bld.objects(name='siphash', source='third_party/siphash.c')
+
   # https://waf.io/apidocs/tools/c_aliases.html#waflib.Tools.c_aliases.program
   bld.program(
       source=cc_files,
-      use='clang',
+      use=['clang', 'siphash'],
       includes=[
         'src/',
         'third_party/',