From 5ef801662b762d3017af1afe28b676913ae81dc3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 29 Apr 2018 19:51:25 -0700 Subject: [PATCH] Improve workspace/symbol sorting heuristic --- src/clang_indexer.cc | 2 + src/iindexer.cc | 0 src/lex_utils.cc | 47 +++------- src/lex_utils.h | 5 +- src/messages/text_document_completion.cc | 5 +- src/messages/workspace_symbol.cc | 105 +++++++++-------------- 6 files changed, 64 insertions(+), 100 deletions(-) delete mode 100644 src/iindexer.cc diff --git a/src/clang_indexer.cc b/src/clang_indexer.cc index 8c4d25cd..2c2ce633 100644 --- a/src/clang_indexer.cc +++ b/src/clang_indexer.cc @@ -354,6 +354,8 @@ struct IndexParam { }; IndexFile* ConsumeFile(IndexParam* param, CXFile file) { + if (!file) + return nullptr; bool is_first_ownership = false; IndexFile* db = param->file_consumer->TryConsumeFile( file, &is_first_ownership, ¶m->file_contents); diff --git a/src/iindexer.cc b/src/iindexer.cc deleted file mode 100644 index e69de29b..00000000 diff --git a/src/lex_utils.cc b/src/lex_utils.cc index 6045eb9d..9380f723 100644 --- a/src/lex_utils.cc +++ b/src/lex_utils.cc @@ -49,21 +49,20 @@ std::string_view LexIdentifierAroundPos(lsPosition position, // Find discontinous |search| in |content|. // Return |found| and the count of skipped chars before found. -std::pair CaseFoldingSubsequenceMatch(std::string_view search, - std::string_view content) { - bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper); - int skip = 0; - size_t j = 0; - for (char c : search) { - while (j < content.size() && - (hasUppercaseLetter ? content[j] != c - : tolower(content[j]) != tolower(c))) - ++j, ++skip; - if (j == content.size()) - return {false, skip}; - ++j; - } - return {true, skip}; +int ReverseSubseqMatch(std::string_view pat, + std::string_view text, + int case_sensitivity) { + if (case_sensitivity == 1) + case_sensitivity = std::any_of(pat.begin(), pat.end(), isupper) ? 2 : 0; + int j = pat.size(); + if (!j) + return text.size(); + for (int i = text.size(); i--;) + if ((case_sensitivity ? text[i] == pat[j - 1] + : tolower(text[i]) == tolower(pat[j - 1])) && + !--j) + return i; + return -1; } TEST_SUITE("Offset") { @@ -86,21 +85,3 @@ TEST_SUITE("Offset") { REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1); } } - -TEST_SUITE("Substring") { - TEST_CASE("skip") { - REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0)); - REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1)); - REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0)); - REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1)); - REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") == - std::make_pair(true, 1)); - REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") == - std::make_pair(true, 1)); - REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0)); - REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") == - std::make_pair(false, 1)); - REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include ") == - std::make_pair(true, 7)); - } -} diff --git a/src/lex_utils.h b/src/lex_utils.h index 13d6a043..4206a120 100644 --- a/src/lex_utils.h +++ b/src/lex_utils.h @@ -11,5 +11,6 @@ int GetOffsetForPosition(lsPosition position, std::string_view content); std::string_view LexIdentifierAroundPos(lsPosition position, std::string_view content); -std::pair CaseFoldingSubsequenceMatch(std::string_view search, - std::string_view content); +int ReverseSubseqMatch(std::string_view pat, + std::string_view text, + int case_sensitivity); diff --git a/src/messages/text_document_completion.cc b/src/messages/text_document_completion.cc index e67d6083..a0820549 100644 --- a/src/messages/text_document_completion.cc +++ b/src/messages/text_document_completion.cc @@ -214,10 +214,11 @@ void FilterAndSortCompletionResponse( } // Fuzzy match and remove awful candidates. - FuzzyMatcher fuzzy(complete_text, g_config->completion.caseSensitivity); + bool sensitive = g_config->completion.caseSensitivity; + FuzzyMatcher fuzzy(complete_text, sensitive); for (auto& item : items) { item.score_ = - CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first + ReverseSubseqMatch(complete_text, *item.filterText, sensitive) >= 0 ? fuzzy.Match(*item.filterText) : FuzzyMatcher::kMinScore; } diff --git a/src/messages/workspace_symbol.cc b/src/messages/workspace_symbol.cc index e138d4a7..910cfdab 100644 --- a/src/messages/workspace_symbol.cc +++ b/src/messages/workspace_symbol.cc @@ -15,10 +15,13 @@ namespace { MethodType kMethodType = "workspace/symbol"; // Lookup |symbol| in |db| and insert the value into |result|. -bool InsertSymbolIntoResult(QueryDatabase* db, - WorkingFiles* working_files, - SymbolIdx symbol, - std::vector* result) { +bool AddSymbol( + QueryDatabase* db, + WorkingFiles* working_files, + int i, + bool use_detailed, + std::vector>* result) { + SymbolIdx symbol = db->symbols[i]; std::optional info = GetSymbolInfo(db, working_files, symbol, true); if (!info) @@ -38,7 +41,7 @@ bool InsertSymbolIntoResult(QueryDatabase* db, if (!ls_location) return false; info->location = *ls_location; - result->push_back(*info); + result->emplace_back(*info, use_detailed, i); return true; } @@ -72,82 +75,58 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler { std::string query = request->params.query; - std::unordered_set inserted_results; - // db->detailed_names indices of each lsSymbolInformation in out.result - std::vector result_indices; - std::vector unsorted_results; - inserted_results.reserve(g_config->workspaceSymbol.maxNum); - result_indices.reserve(g_config->workspaceSymbol.maxNum); - - // We use detailed_names without parameters for matching. - - // Find exact substring matches. - for (int i = 0; i < db->symbols.size(); ++i) { - std::string_view detailed_name = db->GetSymbolName(i, true); - if (detailed_name.find(query) != std::string::npos) { - // Do not show the same entry twice. - if (!inserted_results.insert(std::string(detailed_name)).second) - continue; - - if (InsertSymbolIntoResult(db, working_files, db->symbols[i], - &unsorted_results)) { - result_indices.push_back(i); - if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum) - break; - } - } - } + // {symbol info, matching detailed_name or short_name, index} + std::vector> unsorted; + bool sensitive = g_config->workspaceSymbol.caseSensitivity; // Find subsequence matches. - if (unsorted_results.size() < g_config->workspaceSymbol.maxNum) { - std::string query_without_space; - query_without_space.reserve(query.size()); - for (char c : query) - if (!isspace(c)) - query_without_space += c; + std::string query_without_space; + query_without_space.reserve(query.size()); + for (char c : query) + if (!isspace(c)) + query_without_space += c; - for (int i = 0; i < (int)db->symbols.size(); ++i) { - std::string_view detailed_name = db->GetSymbolName(i, true); - if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name) - .first) { - // Do not show the same entry twice. - if (!inserted_results.insert(std::string(detailed_name)).second) - continue; - - if (InsertSymbolIntoResult(db, working_files, db->symbols[i], - &unsorted_results)) { - result_indices.push_back(i); - if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum) - break; - } - } - } + for (int i = 0; i < (int)db->symbols.size(); ++i) { + std::string_view detailed_name = db->GetSymbolName(i, true); + int pos = + ReverseSubseqMatch(query_without_space, detailed_name, sensitive); + if (pos >= 0 && + AddSymbol(db, working_files, i, + detailed_name.find(':', pos) != std::string::npos, + &unsorted) && + unsorted.size() >= g_config->workspaceSymbol.maxNum) + break; } if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) { // Sort results with a fuzzy matching algorithm. int longest = 0; - for (int i : result_indices) - longest = std::max(longest, int(db->GetSymbolName(i, true).size())); + for (int i = 0; i < int(unsorted.size()); i++) { + longest = std::max( + longest, + int(db->GetSymbolName(std::get<2>(unsorted[i]), true).size())); + } FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity); - std::vector> permutation(result_indices.size()); - for (int i = 0; i < int(result_indices.size()); i++) { + std::vector> permutation(unsorted.size()); + for (int i = 0; i < int(unsorted.size()); i++) { permutation[i] = { - fuzzy.Match(db->GetSymbolName(result_indices[i], true)), i}; + fuzzy.Match(db->GetSymbolName(std::get<2>(unsorted[i]), + std::get<1>(unsorted[i]))), + i}; } std::sort(permutation.begin(), permutation.end(), std::greater>()); - out.result.reserve(result_indices.size()); + out.result.reserve(unsorted.size()); // Discard awful candidates. - for (int i = 0; i < int(result_indices.size()) && + for (int i = 0; i < int(unsorted.size()) && permutation[i].first > FuzzyMatcher::kMinScore; i++) out.result.push_back( - std::move(unsorted_results[permutation[i].second])); + std::move(std::get<0>(unsorted[permutation[i].second]))); } else { - out.result.reserve(unsorted_results.size()); - for (const auto& entry : unsorted_results) - out.result.push_back(std::move(entry)); + out.result.reserve(unsorted.size()); + for (auto& entry : unsorted) + out.result.push_back(std::get<0>(entry)); } LOG_S(INFO) << "[querydb] Found " << out.result.size()