From 14a213d4075eb033af31dfcfaa7cbde63c46e0b3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 12 Jan 2018 22:13:08 -0800 Subject: [PATCH] Change std::string USR to uint64_t pseudorandom function (SipHash 64-bit) --- src/clang_cursor.cc | 26 ++++ src/clang_cursor.h | 7 + src/import_pipeline.cc | 4 +- src/indexer.cc | 108 +++++++------- src/indexer.h | 31 ++-- src/messages/cquery_call_tree.cc | 8 +- src/messages/text_document_code_action.cc | 2 +- src/query.cc | 42 +++--- src/query.h | 12 +- src/serializer.cc | 3 +- third_party/siphash.c | 165 ++++++++++++++++++++++ wscript | 7 +- 12 files changed, 306 insertions(+), 109 deletions(-) create mode 100644 third_party/siphash.c diff --git a/src/clang_cursor.cc b/src/clang_cursor.cc index af70888e..1e82bd30 100644 --- a/src/clang_cursor.cc +++ b/src/clang_cursor.cc @@ -2,6 +2,8 @@ #include "clang_utils.h" +#include + #include #include @@ -19,6 +21,19 @@ Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) { Position((int16_t)end_line, (int16_t)end_column) /*end*/); } +uint64_t HashUSR(const char* usr) { + extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k, + uint8_t *out, const size_t outlen); + union { + uint64_t ret; + uint8_t out[8]; + }; + const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, + 0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d}; + (void)siphash(reinterpret_cast(usr), strlen(usr), k, out, 8); + return ret; +} + // TODO Place this global variable into config int g_enable_comments; @@ -45,6 +60,10 @@ std::string ClangType::get_usr() const { return ClangCursor(clang_getTypeDeclaration(cx_type)).get_usr(); } +USR ClangType::get_usr_hash() const { + return ClangCursor(clang_getTypeDeclaration(cx_type)).get_usr_hash(); +} + ClangType ClangType::get_canonical() const { return clang_getCanonicalType(cx_type); } @@ -153,6 +172,13 @@ std::string ClangCursor::get_usr() const { return ::ToString(clang_getCursorUSR(cx_cursor)); } +USR ClangCursor::get_usr_hash() const { + CXString usr = clang_getCursorUSR(cx_cursor); + USR ret = HashUSR(clang_getCString(usr)); + clang_disposeString(usr); + return ret; +} + bool ClangCursor::is_definition() const { return clang_isCursorDefinition(cx_cursor); } diff --git a/src/clang_cursor.h b/src/clang_cursor.h index c21531dd..78f10cd7 100644 --- a/src/clang_cursor.h +++ b/src/clang_cursor.h @@ -5,12 +5,17 @@ #include #include +#include #include #include +using USR = uint64_t; + Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file = nullptr); +USR HashUSR(const char* usr); + class ClangType { public: ClangType(); @@ -24,6 +29,7 @@ class ClangType { // ClangCursor is not defined so we have to return CXCursor CXCursor get_declaration() const; std::string get_usr() const; + USR get_usr_hash() const; std::string get_spelling() const; ClangType get_canonical() const; @@ -54,6 +60,7 @@ class ClangCursor { Range get_extent() const; std::string get_display_name() const; std::string get_usr() const; + USR get_usr_hash() const; bool is_definition() const; diff --git a/src/import_pipeline.cc b/src/import_pipeline.cc index 56013c31..d0d3e6c8 100644 --- a/src/import_pipeline.cc +++ b/src/import_pipeline.cc @@ -1,5 +1,6 @@ #include "import_pipeline.h" +#include "clang_cursor.h" // HashUSR #include "cache_manager.h" #include "config.h" #include "iindexer.h" @@ -621,8 +622,7 @@ bool QueryDb_ImportMain(Config* config, WorkingFile* working_file = working_files->GetFileByFilename(updated_file.path); if (working_file) { - QueryFileId file_id = - db->usr_to_file[LowerPathIfCaseInsensitive(working_file->filename)]; + QueryFileId file_id = db->usr_to_file[LowerPathIfCaseInsensitive(working_file->filename)]; QueryFile* file = &db->files[file_id.id]; EmitSemanticHighlighting(db, semantic_cache, working_file, file); } diff --git a/src/indexer.cc b/src/indexer.cc index b59d1fd5..caa52737 100644 --- a/src/indexer.cc +++ b/src/indexer.cc @@ -111,12 +111,11 @@ ClangSymbolKind GetSymbolKind(CXIdxEntityKind kind) { // to export. If we do not capture the parameter type description for the // constructor we will not be able to attribute the constructor call correctly. struct ConstructorCache { - using Usr = std::string; struct Constructor { - Usr usr; + USR usr; std::vector param_type_desc; }; - std::unordered_map> constructors_; + std::unordered_map> constructors_; // This should be called whenever there is a constructor declaration. void NotifyConstructor(ClangCursor ctor_cursor) { @@ -129,22 +128,22 @@ struct ConstructorCache { return type_desc; }; - Constructor ctor{ctor_cursor.get_usr(), build_type_desc(ctor_cursor)}; + Constructor ctor{ctor_cursor.get_usr_hash(), build_type_desc(ctor_cursor)}; // Insert into |constructors_|. - std::string type_usr = ctor_cursor.get_semantic_parent().get_usr(); - auto existing_ctors = constructors_.find(type_usr); + auto type_usr_hash = ctor_cursor.get_semantic_parent().get_usr_hash(); + auto existing_ctors = constructors_.find(type_usr_hash); if (existing_ctors != constructors_.end()) { existing_ctors->second.push_back(ctor); } else { - constructors_[type_usr] = {ctor}; + constructors_[type_usr_hash] = {ctor}; } } // Tries to lookup a constructor in |type_usr| that takes arguments most // closely aligned to |param_type_desc|. - optional TryFindConstructorUsr( - const std::string& type_usr, + optional TryFindConstructorUsr( + USR type_usr, const std::vector& param_type_desc) { auto count_matching_prefix_length = [](const char* a, const char* b) { int matched = 0; @@ -171,7 +170,7 @@ struct ConstructorCache { if (ctors.empty()) return nullopt; - std::string best_usr; + USR best_usr; int best_score = INT_MIN; // Scan constructors for the best possible match. @@ -192,7 +191,7 @@ struct ConstructorCache { // Do prefix-based match on parameter type description. This works well in // practice because clang appends qualifiers to the end of the type, ie, // |foo *&&| - for (int i = 0; + for (size_t i = 0; i < std::min(param_type_desc.size(), ctor.param_type_desc.size()); ++i) { score += count_matching_prefix_length(param_type_desc[i].c_str(), @@ -205,7 +204,6 @@ struct ConstructorCache { } } - assert(!best_usr.empty()); return best_usr; } }; @@ -397,9 +395,10 @@ optional ResolveToDeclarationType(IndexFile* db, ClangCursor cursor) { ClangCursor declaration = cursor.get_declaration(); declaration = declaration.template_specialization_to_template_definition(); + // TODO optimize std::string usr = declaration.get_usr(); - if (usr != "") - return db->ToTypeId(usr); + if (usr.size()) + return db->ToTypeId(declaration.get_usr_hash()); return nullopt; } @@ -472,7 +471,7 @@ void OnIndexReference_Function(IndexFile* db, } // namespace // static -int IndexFile::kCurrentVersion = 8; +int IndexFile::kCurrentVersion = 9; IndexFile::IndexFile(const std::string& path, const optional& contents) @@ -488,7 +487,7 @@ IndexFile::IndexFile(const std::string& path, } // TODO: Optimize for const char*? -IndexTypeId IndexFile::ToTypeId(const std::string& usr) { +IndexTypeId IndexFile::ToTypeId(USR usr) { auto it = id_cache.usr_to_type_id.find(usr); if (it != id_cache.usr_to_type_id.end()) return it->second; @@ -499,7 +498,7 @@ IndexTypeId IndexFile::ToTypeId(const std::string& usr) { id_cache.type_id_to_usr[id] = usr; return id; } -IndexFuncId IndexFile::ToFuncId(const std::string& usr) { +IndexFuncId IndexFile::ToFuncId(USR usr) { auto it = id_cache.usr_to_func_id.find(usr); if (it != id_cache.usr_to_func_id.end()) return it->second; @@ -510,7 +509,7 @@ IndexFuncId IndexFile::ToFuncId(const std::string& usr) { id_cache.func_id_to_usr[id] = usr; return id; } -IndexVarId IndexFile::ToVarId(const std::string& usr) { +IndexVarId IndexFile::ToVarId(USR usr) { auto it = id_cache.usr_to_var_id.find(usr); if (it != id_cache.usr_to_var_id.end()) return it->second; @@ -523,15 +522,15 @@ IndexVarId IndexFile::ToVarId(const std::string& usr) { } IndexTypeId IndexFile::ToTypeId(const CXCursor& cursor) { - return ToTypeId(ClangCursor(cursor).get_usr()); + return ToTypeId(ClangCursor(cursor).get_usr_hash()); } IndexFuncId IndexFile::ToFuncId(const CXCursor& cursor) { - return ToFuncId(ClangCursor(cursor).get_usr()); + return ToFuncId(ClangCursor(cursor).get_usr_hash()); } IndexVarId IndexFile::ToVarId(const CXCursor& cursor) { - return ToVarId(ClangCursor(cursor).get_usr()); + return ToVarId(ClangCursor(cursor).get_usr_hash()); } IndexType* IndexFile::Resolve(IndexTypeId id) { @@ -548,10 +547,7 @@ std::string IndexFile::ToString() { return Serialize(SerializeFormat::Json, *this); } -IndexType::IndexType(IndexTypeId id, const std::string& usr) - : usr(usr), id(id) { - assert(usr.size() > 0); -} +IndexType::IndexType(IndexTypeId id, USR usr) : usr(usr), id(id) {} void RemoveItem(std::vector& ranges, Range to_remove) { auto it = std::find(ranges.begin(), ranges.end(), to_remove); @@ -741,14 +737,12 @@ void VisitDeclForTypeUsageVisitorHandler(ClangCursor cursor, IndexFile* db = param->db; std::string referenced_usr = - cursor.get_referenced() - .template_specialization_to_template_definition() - .get_usr(); + cursor.get_referenced().template_specialization_to_template_definition().get_usr(); // TODO: things in STL cause this to be empty. Figure out why and document it. if (referenced_usr == "") return; - IndexTypeId ref_type_id = db->ToTypeId(referenced_usr); + IndexTypeId ref_type_id = db->ToTypeId(HashUSR(referenced_usr.c_str())); if (!param->initial_type) param->initial_type = ref_type_id; @@ -960,19 +954,18 @@ ClangCursor::VisitResult AddDeclInitializerUsagesVisitor(ClangCursor cursor, // different USR. // ClangCursor ref = - // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr(); + // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr_hash(); // std::string ref_usr = - // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr(); - std::string ref_usr = - cursor.get_referenced() - .template_specialization_to_template_definition() - .get_usr(); - // std::string ref_usr = ref.get_usr(); + // cursor.get_referenced().template_specialization_to_template_definition().get_type().strip_qualifiers().get_usr_hash(); + auto ref_usr = cursor.get_referenced() + .template_specialization_to_template_definition() + .get_usr(); + // std::string ref_usr = ref.get_usr_hash(); if (ref_usr == "") break; Range loc = cursor.get_spelling_range(); - IndexVarId ref_id = db->ToVarId(ref_usr); + IndexVarId ref_id = db->ToVarId(HashUSR(ref_usr.c_str())); IndexVar* ref_def = db->Resolve(ref_id); UniqueAdd(ref_def->uses, loc); break; @@ -1018,11 +1011,11 @@ ClangCursor::VisitResult VisitMacroDefinitionAndExpansions(ClangCursor cursor, // only real difference will be that we show 'callers' instead of 'refs' // (especially since macros cannot have overrides) - std::string decl_usr; + USR decl_usr; if (cursor.get_kind() == CXCursor_MacroDefinition) - decl_usr = cursor.get_usr(); + decl_usr = cursor.get_usr_hash(); else - decl_usr = cursor.get_referenced().get_usr(); + decl_usr = cursor.get_referenced().get_usr_hash(); IndexVarId var_id = db->ToVarId(decl_usr); IndexVar* var_def = db->Resolve(var_id); @@ -1069,7 +1062,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor, case CXCursor_DeclRefExpr: { ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor); if (ref_cursor.get_kind() == CXCursor_NonTypeTemplateParameter) { - IndexVar* ref_index = db->Resolve(db->ToVarId(ref_cursor.get_usr())); + IndexVar* ref_index = db->Resolve(db->ToVarId(ref_cursor.get_usr_hash())); if (ref_index->def.short_name.empty()) { ref_index->def.definition_spelling = ref_cursor.get_spelling_range(); ref_index->def.definition_extent = ref_cursor.get_extent(); @@ -1078,9 +1071,10 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor, ref_index->uses.push_back(ref_cursor.get_spelling_range()); ClangType ref_type = clang_getCursorType(ref_cursor.cx_cursor); + // TODO optimize if (ref_type.get_usr().size()) { IndexType* ref_type_index = - db->Resolve(db->ToTypeId(ref_type.get_usr())); + db->Resolve(db->ToTypeId(ref_type.get_usr_hash())); // The cursor extent includes `type name`, not just `name`. There // seems no way to extract the spelling range of `type` and we do // not want to do subtraction here. @@ -1101,8 +1095,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor, break; case CXCursor_FunctionDecl: case CXCursor_FunctionTemplate: { - std::string ref_usr = overloaded.get_usr(); - IndexFuncId called_id = db->ToFuncId(ref_usr); + IndexFuncId called_id = db->ToFuncId(overloaded.get_usr_hash()); IndexFunc* called = db->Resolve(called_id); OnIndexReference_Function(db, cursor.get_spelling_range(), data->container, called_id, called, @@ -1116,7 +1109,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor, case CXCursor_TemplateRef: { ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor); if (ref_cursor.get_kind() == CXCursor_TemplateTemplateParameter) { - IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr())); + IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash())); // TODO It seems difficult to get references to template template // parameters. // CXCursor_TemplateTemplateParameter can be visited by visiting @@ -1136,7 +1129,7 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor, case CXCursor_TypeRef: { ClangCursor ref_cursor = clang_getCursorReferenced(cursor.cx_cursor); if (ref_cursor.get_kind() == CXCursor_TemplateTypeParameter) { - IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr())); + IndexType* ref_index = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash())); // TODO It seems difficult to get a FunctionTemplate's template // parameters. // CXCursor_TemplateTypeParameter can be visited by visiting @@ -1271,9 +1264,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { decl_cursor.template_specialization_to_template_definition()) break; - std::string decl_usr = decl_cursor.get_usr(); - - IndexVarId var_id = db->ToVarId(decl->entityInfo->USR); + IndexVarId var_id = db->ToVarId(HashUSR(decl->entityInfo->USR)); IndexVar* var = db->Resolve(var_id); // TODO: Eventually run with this if. Right now I want to iron out bugs @@ -1471,7 +1462,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { ClangCursor parent = ClangCursor(overridden[i]) .template_specialization_to_template_definition(); - IndexFuncId parent_id = db->ToFuncId(parent.get_usr()); + IndexFuncId parent_id = db->ToFuncId(parent.get_usr_hash()); IndexFunc* parent_def = db->Resolve(parent_id); func = db->Resolve(func_id); // ToFuncId invalidated func_def @@ -1493,7 +1484,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { optional alias_of = AddDeclTypeUsages( db, decl->cursor, decl->semanticContainer, decl->lexicalContainer); - IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR); + IndexTypeId type_id = db->ToTypeId(HashUSR(decl->entityInfo->USR)); IndexType* type = db->Resolve(type_id); if (alias_of) @@ -1545,7 +1536,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { ClangCursor decl_cursor = decl->cursor; Range decl_loc_spelling = decl_cursor.get_spelling_range(); - IndexTypeId type_id = db->ToTypeId(decl->entityInfo->USR); + IndexTypeId type_id = db->ToTypeId(HashUSR(decl->entityInfo->USR)); IndexType* type = db->Resolve(type_id); // TODO: Eventually run with this if. Right now I want to iron out bugs @@ -1632,7 +1623,7 @@ void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) { << std::endl; if (decl->lexicalContainer) std::cerr << " lexicalContainer = " - << ClangCursor(decl->lexicalContainer->cursor).get_usr() + << ClangCursor(decl->lexicalContainer->cursor).get_usr_hash() << std::endl; break; } @@ -1688,7 +1679,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { ClangCursor referenced = ref->referencedEntity->cursor; referenced = referenced.template_specialization_to_template_definition(); - IndexVarId var_id = db->ToVarId(referenced.get_usr()); + IndexVarId var_id = db->ToVarId(referenced.get_usr_hash()); IndexVar* var = db->Resolve(var_id); // Lambda paramaters are not processed by OnIndexDeclaration and // may not have a short_name yet. Note that we only process the lambda @@ -1735,7 +1726,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { ClangCursor ref_cursor(ref->cursor); Range loc = ref_cursor.get_spelling_range(); - IndexFuncId called_id = db->ToFuncId(ref->referencedEntity->USR); + IndexFuncId called_id = db->ToFuncId(HashUSR(ref->referencedEntity->USR)); IndexFunc* called = db->Resolve(called_id); // libclang doesn't provide a nice api to check if the given function @@ -1786,8 +1777,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { // the constructor function we add a usage to. optional opt_found_type = FindType(ref->cursor); if (opt_found_type) { - std::string ctor_type_usr = - opt_found_type->get_referenced().get_usr(); + USR ctor_type_usr = opt_found_type->get_referenced().get_usr_hash(); ClangCursor call_cursor = ref->cursor; // Build a type description from the parameters of the call, so we @@ -1800,7 +1790,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { } // Try to find the constructor and add a reference. - optional ctor_usr = + optional ctor_usr = param->ctors.TryFindConstructorUsr(ctor_type_usr, call_type_desc); if (ctor_usr) { IndexFunc* ctor = db->Resolve(db->ToFuncId(*ctor_usr)); @@ -1823,7 +1813,7 @@ void OnIndexReference(CXClientData client_data, const CXIdxEntityRefInfo* ref) { case CXIdxEntity_CXXClass: { ClangCursor ref_cursor = ref->referencedEntity->cursor; ref_cursor = ref_cursor.template_specialization_to_template_definition(); - IndexType* referenced = db->Resolve(db->ToTypeId(ref_cursor.get_usr())); + IndexType* referenced = db->Resolve(db->ToTypeId(ref_cursor.get_usr_hash())); // // The following will generate two TypeRefs to Foo, both located at the diff --git a/src/indexer.h b/src/indexer.h index 9ecdebee..7eac4052 100644 --- a/src/indexer.h +++ b/src/indexer.h @@ -1,5 +1,6 @@ #pragma once +#include "clang_cursor.h" #include "clang_index.h" #include "clang_translation_unit.h" #include "clang_utils.h" @@ -259,7 +260,7 @@ struct IndexType { using Def = TypeDefDefinitionData; - std::string usr; + USR usr; IndexTypeId id; Def def; @@ -275,7 +276,7 @@ struct IndexType { std::vector uses; IndexType() {} // For serialization. - IndexType(IndexTypeId id, const std::string& usr); + IndexType(IndexTypeId id, USR usr); bool operator<(const IndexType& other) const { return id < other.id; } }; @@ -361,7 +362,7 @@ struct IndexFunc { IndexFuncRef, Range>; - std::string usr; + USR usr; IndexFuncId id; Def def; @@ -391,7 +392,7 @@ struct IndexFunc { std::vector callers; IndexFunc() {} // For serialization. - IndexFunc(IndexFuncId id, const std::string& usr) : usr(usr), id(id) { + IndexFunc(IndexFuncId id, USR usr) : usr(usr), id(id) { // assert(usr.size() > 0); } @@ -468,7 +469,7 @@ void Reflect(TVisitor& visitor, struct IndexVar { using Def = VarDefDefinitionData; - std::string usr; + USR usr; IndexVarId id; Def def; @@ -477,7 +478,7 @@ struct IndexVar { std::vector uses; IndexVar() {} // For serialization. - IndexVar(IndexVarId id, const std::string& usr) : usr(usr), id(id) { + IndexVar(IndexVarId id, USR usr) : usr(usr), id(id) { // assert(usr.size() > 0); } @@ -487,12 +488,12 @@ MAKE_HASHABLE(IndexVar, t.id); struct IdCache { std::string primary_file; - std::unordered_map usr_to_type_id; - std::unordered_map usr_to_func_id; - std::unordered_map usr_to_var_id; - std::unordered_map type_id_to_usr; - std::unordered_map func_id_to_usr; - std::unordered_map var_id_to_usr; + std::unordered_map usr_to_type_id; + std::unordered_map usr_to_func_id; + std::unordered_map usr_to_var_id; + std::unordered_map type_id_to_usr; + std::unordered_map func_id_to_usr; + std::unordered_map var_id_to_usr; IdCache(const std::string& primary_file); }; @@ -544,9 +545,9 @@ struct IndexFile { IndexFile(const std::string& path, const optional& contents); - IndexTypeId ToTypeId(const std::string& usr); - IndexFuncId ToFuncId(const std::string& usr); - IndexVarId ToVarId(const std::string& usr); + IndexTypeId ToTypeId(USR usr); + IndexFuncId ToFuncId(USR usr); + IndexVarId ToVarId(USR usr); IndexTypeId ToTypeId(const CXCursor& usr); IndexFuncId ToFuncId(const CXCursor& usr); IndexVarId ToVarId(const CXCursor& usr); diff --git a/src/messages/cquery_call_tree.cc b/src/messages/cquery_call_tree.cc index 914f9d35..d8e8d694 100644 --- a/src/messages/cquery_call_tree.cc +++ b/src/messages/cquery_call_tree.cc @@ -4,6 +4,7 @@ #include +// FIXME Interop with VSCode, change std::string usr to Usr (uint64_t) namespace { struct Ipc_CqueryCallTreeInitial : public IpcMessage { @@ -62,7 +63,7 @@ std::vector BuildInitialCallTree( Out_CqueryCallTree::CallEntry entry; entry.name = root_func.def->short_name; - entry.usr = root_func.usr; + entry.usr = std::to_string(root_func.usr); entry.location = *def_loc; entry.hasCallers = HasCallersOnSelfOrBaseOrDerived(db, root_func); std::vector result; @@ -113,7 +114,7 @@ std::vector BuildExpandCallTree( Out_CqueryCallTree::CallEntry call_entry; call_entry.name = call_func.def->short_name; - call_entry.usr = call_func.usr; + call_entry.usr = std::to_string(call_func.usr); call_entry.location = *call_location; call_entry.hasCallers = HasCallersOnSelfOrBaseOrDerived(db, call_func); call_entry.callType = call_type; @@ -188,7 +189,8 @@ struct CqueryCallTreeExpandHandler Out_CqueryCallTree out; out.id = request->id; - auto func_id = db->usr_to_func.find(request->params.usr); + // FIXME + auto func_id = db->usr_to_func.find(std::stoull(request->params.usr)); if (func_id != db->usr_to_func.end()) out.result = BuildExpandCallTree(db, working_files, func_id->second); diff --git a/src/messages/text_document_code_action.cc b/src/messages/text_document_code_action.cc index 206020ba..57a386cc 100644 --- a/src/messages/text_document_code_action.cc +++ b/src/messages/text_document_code_action.cc @@ -104,7 +104,7 @@ optional GetImplementationFile(QueryDatabase* db, LOG_S(INFO) << "!! Looking for impl file that starts with " << target_path; for (auto& entry : db->usr_to_file) { - Usr path = entry.first; + const std::string& path = entry.first; // Do not consider header files for implementation files. // TODO: make file extensions configurable. diff --git a/src/query.cc b/src/query.cc index 320afa18..f54b324a 100644 --- a/src/query.cc +++ b/src/query.cc @@ -285,12 +285,13 @@ QueryFileId GetQueryFileIdFromPath(QueryDatabase* query_db, return QueryFileId(it->second.id); size_t idx = query_db->files.size(); - query_db->usr_to_file[LowerPathIfCaseInsensitive(path)] = QueryFileId(idx); + query_db->usr_to_file[LowerPathIfCaseInsensitive(path)] = + QueryFileId(idx); query_db->files.push_back(QueryFile(path)); return QueryFileId(idx); } -QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, const Usr& usr) { +QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, USR usr) { auto it = query_db->usr_to_type.find(usr); if (it != query_db->usr_to_type.end()) return QueryTypeId(it->second.id); @@ -301,7 +302,7 @@ QueryTypeId GetQueryTypeIdFromUsr(QueryDatabase* query_db, const Usr& usr) { return QueryTypeId(idx); } -QueryFuncId GetQueryFuncIdFromUsr(QueryDatabase* query_db, const Usr& usr) { +QueryFuncId GetQueryFuncIdFromUsr(QueryDatabase* query_db, USR usr) { auto it = query_db->usr_to_func.find(usr); if (it != query_db->usr_to_func.end()) return QueryFuncId(it->second.id); @@ -713,8 +714,9 @@ void QueryDatabase::RemoveUsrs(SymbolKind usr_kind, switch (usr_kind) { case SymbolKind::File: { - for (const Usr& usr : to_remove) - files[usr_to_file[LowerPathIfCaseInsensitive(usr)].id].def = nullopt; + // FIXME + //for (const Usr& usr : to_remove) + // files[usr_to_file[usr].id].def = nullopt; break; } case SymbolKind::Type: { @@ -896,30 +898,30 @@ TEST_SUITE("query") { IndexFile previous("foo.cc", nullopt); IndexFile current("foo.cc", nullopt); - previous.Resolve(previous.ToTypeId("usr1"))->def.definition_spelling = + previous.Resolve(previous.ToTypeId(HashUSR("usr1")))->def.definition_spelling = Range(Position(1, 0)); - previous.Resolve(previous.ToFuncId("usr2"))->def.definition_spelling = + previous.Resolve(previous.ToFuncId(HashUSR("usr2")))->def.definition_spelling = Range(Position(2, 0)); - previous.Resolve(previous.ToVarId("usr3"))->def.definition_spelling = + previous.Resolve(previous.ToVarId(HashUSR("usr3")))->def.definition_spelling = Range(Position(3, 0)); IndexUpdate update = GetDelta(previous, current); - REQUIRE(update.types_removed == std::vector{"usr1"}); - REQUIRE(update.funcs_removed == std::vector{"usr2"}); - REQUIRE(update.vars_removed == std::vector{"usr3"}); + REQUIRE(update.types_removed == std::vector{HashUSR("usr1")}); + REQUIRE(update.funcs_removed == std::vector{HashUSR("usr2")}); + REQUIRE(update.vars_removed == std::vector{HashUSR("usr3")}); } TEST_CASE("do not remove ref-only defs") { IndexFile previous("foo.cc", nullopt); IndexFile current("foo.cc", nullopt); - previous.Resolve(previous.ToTypeId("usr1")) + previous.Resolve(previous.ToTypeId(HashUSR("usr1"))) ->uses.push_back(Range(Position(1, 0))); - previous.Resolve(previous.ToFuncId("usr2")) + previous.Resolve(previous.ToFuncId(HashUSR("usr2"))) ->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(2, 0)), false /*is_implicit*/)); - previous.Resolve(previous.ToVarId("usr3")) + previous.Resolve(previous.ToVarId(HashUSR("usr3"))) ->uses.push_back(Range(Position(3, 0))); IndexUpdate update = GetDelta(previous, current); @@ -933,8 +935,8 @@ TEST_SUITE("query") { IndexFile previous("foo.cc", nullopt); IndexFile current("foo.cc", nullopt); - IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); - IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); + IndexFunc* pf = previous.Resolve(previous.ToFuncId(HashUSR("usr"))); + IndexFunc* cf = current.Resolve(current.ToFuncId(HashUSR("usr"))); pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(1, 0)), false /*is_implicit*/)); @@ -958,8 +960,8 @@ TEST_SUITE("query") { IndexFile previous("foo.cc", nullopt); IndexFile current("foo.cc", nullopt); - IndexType* pt = previous.Resolve(previous.ToTypeId("usr")); - IndexType* ct = current.Resolve(current.ToTypeId("usr")); + IndexType* pt = previous.Resolve(previous.ToTypeId(HashUSR("usr"))); + IndexType* ct = current.Resolve(current.ToTypeId(HashUSR("usr"))); pt->uses.push_back(Range(Position(1, 0))); ct->uses.push_back(Range(Position(2, 0))); @@ -979,8 +981,8 @@ TEST_SUITE("query") { IndexFile previous("foo.cc", nullopt); IndexFile current("foo.cc", nullopt); - IndexFunc* pf = previous.Resolve(previous.ToFuncId("usr")); - IndexFunc* cf = current.Resolve(current.ToFuncId("usr")); + IndexFunc* pf = previous.Resolve(previous.ToFuncId(HashUSR("usr"))); + IndexFunc* cf = current.Resolve(current.ToFuncId(HashUSR("usr"))); pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(1, 0)), false /*is_implicit*/)); pf->callers.push_back(IndexFuncRef(IndexFuncId(0), Range(Position(2, 0)), diff --git a/src/query.h b/src/query.h index 046c5ac4..e372a87d 100644 --- a/src/query.h +++ b/src/query.h @@ -7,7 +7,7 @@ #include -using Usr = std::string; +using Usr = USR; struct QueryFile; struct QueryType; @@ -170,10 +170,10 @@ void Reflect(TVisitor& visitor, MergeableUpdate& value) { template struct WithUsr { - Usr usr; + USR usr; T value; - WithUsr(const Usr& usr, const T& value) : usr(usr), value(value) {} + WithUsr(USR usr, const T& value) : usr(usr), value(value) {} }; template void Reflect(TVisitor& visitor, WithUsr& value) { @@ -225,7 +225,7 @@ struct QueryType { using InstancesUpdate = MergeableUpdate; using UsesUpdate = MergeableUpdate; - Usr usr; + USR usr; optional def; std::vector derived; std::vector instances; @@ -246,7 +246,7 @@ struct QueryFunc { using DerivedUpdate = MergeableUpdate; using CallersUpdate = MergeableUpdate; - Usr usr; + USR usr; optional def; std::vector declarations; std::vector derived; @@ -352,7 +352,7 @@ struct QueryDatabase { // Lookup symbol based on a usr. // NOTE: For usr_to_file make sure to call LowerPathIfCaseInsensitive on key. // TODO: add type wrapper to enforce we call it - spp::sparse_hash_map usr_to_file; + spp::sparse_hash_map usr_to_file; spp::sparse_hash_map usr_to_type; spp::sparse_hash_map usr_to_func; spp::sparse_hash_map usr_to_var; diff --git a/src/serializer.cc b/src/serializer.cc index 062adb12..d2c079f9 100644 --- a/src/serializer.cc +++ b/src/serializer.cc @@ -171,7 +171,8 @@ void Reflect(TVisitor& visitor, IndexVar& value) { // IndexFile bool ReflectMemberStart(Writer& visitor, IndexFile& value) { - auto it = value.id_cache.usr_to_type_id.find(""); + // FIXME + auto it = value.id_cache.usr_to_type_id.find(HashUSR("")); if (it != value.id_cache.usr_to_type_id.end()) { value.Resolve(it->second)->def.short_name = ""; assert(value.Resolve(it->second)->uses.size() == 0); diff --git a/third_party/siphash.c b/third_party/siphash.c new file mode 100644 index 00000000..d69f4b57 --- /dev/null +++ b/third_party/siphash.c @@ -0,0 +1,165 @@ +/* + SipHash reference C implementation + + Copyright (c) 2012-2016 Jean-Philippe Aumasson + + Copyright (c) 2012-2014 Daniel J. Bernstein + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along + with + this software. If not, see + . + */ +#include +#include +#include +#include + +/* default: SipHash-2-4 */ +#define cROUNDS 2 +#define dROUNDS 4 + +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; \ + v1 = ROTL(v1, 13); \ + v1 ^= v0; \ + v0 = ROTL(v0, 32); \ + v2 += v3; \ + v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = ROTL(v1, 17); \ + v1 ^= v2; \ + v2 = ROTL(v2, 32); \ + } while (0) + +#ifdef DEBUG +#define TRACE \ + do { \ + printf("(%3d) v0 %08x %08x\n", (int)inlen, (uint32_t)(v0 >> 32), \ + (uint32_t)v0); \ + printf("(%3d) v1 %08x %08x\n", (int)inlen, (uint32_t)(v1 >> 32), \ + (uint32_t)v1); \ + printf("(%3d) v2 %08x %08x\n", (int)inlen, (uint32_t)(v2 >> 32), \ + (uint32_t)v2); \ + printf("(%3d) v3 %08x %08x\n", (int)inlen, (uint32_t)(v3 >> 32), \ + (uint32_t)v3); \ + } while (0) +#else +#define TRACE +#endif + +int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k, + uint8_t *out, const size_t outlen) { + + assert((outlen == 8) || (outlen == 16)); + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t k0 = U8TO64_LE(k); + uint64_t k1 = U8TO64_LE(k + 8); + uint64_t m; + int i; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + if (outlen == 16) + v1 ^= 0xee; + + for (; in != end; in += 8) { + m = U8TO64_LE(in); + v3 ^= m; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= m; + } + + switch (left) { + case 7: + b |= ((uint64_t)in[6]) << 48; + case 6: + b |= ((uint64_t)in[5]) << 40; + case 5: + b |= ((uint64_t)in[4]) << 32; + case 4: + b |= ((uint64_t)in[3]) << 24; + case 3: + b |= ((uint64_t)in[2]) << 16; + case 2: + b |= ((uint64_t)in[1]) << 8; + case 1: + b |= ((uint64_t)in[0]); + break; + case 0: + break; + } + + v3 ^= b; + + TRACE; + for (i = 0; i < cROUNDS; ++i) + SIPROUND; + + v0 ^= b; + + if (outlen == 16) + v2 ^= 0xee; + else + v2 ^= 0xff; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out, b); + + if (outlen == 8) + return 0; + + v1 ^= 0xdd; + + TRACE; + for (i = 0; i < dROUNDS; ++i) + SIPROUND; + + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE(out + 8, b); + + return 0; +} diff --git a/wscript b/wscript index 8fd16357..ec151612 100644 --- a/wscript +++ b/wscript @@ -152,7 +152,8 @@ def configure(ctx): if ctx.env.CXXFLAGS: cxxflags = ctx.env.CXXFLAGS else: - cxxflags = ['-g', '-Wall', '-Wno-sign-compare', '-Werror'] + # FIXME Figure out how to treat siphash.c as C file so that we can remove -Wno-deprecated + cxxflags = ['-g', '-Wall', '-Wno-sign-compare', '-Wno-deprecated', '-Werror'] if all(not x.startswith('-std=') for x in ctx.env.CXXFLAGS): cxxflags.append('-std=c++11') @@ -358,10 +359,12 @@ def build(bld): else: rpath = bld.env['LIBPATH_clang'] + bld.objects(name='siphash', source='third_party/siphash.c') + # https://waf.io/apidocs/tools/c_aliases.html#waflib.Tools.c_aliases.program bld.program( source=cc_files, - use='clang', + use=['clang', 'siphash'], includes=[ 'src/', 'third_party/',