From ad88f707f7bd1dd651eb6610e0c46926ad38855e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 18 Mar 2018 13:04:59 -0700 Subject: [PATCH] Simplify and optimize completion. --- src/fuzzy_match.cc | 2 +- src/lex_utils.cc | 70 ++++++------------------ src/lex_utils.h | 7 +-- src/lsp_completion.h | 3 +- src/messages/text_document_completion.cc | 68 ++++++++--------------- src/messages/workspace_symbol.cc | 2 +- 6 files changed, 44 insertions(+), 108 deletions(-) diff --git a/src/fuzzy_match.cc b/src/fuzzy_match.cc index 480b69d2..1f3cbc2a 100644 --- a/src/fuzzy_match.cc +++ b/src/fuzzy_match.cc @@ -156,7 +156,7 @@ TEST_SUITE("fuzzy_match") { Ranks("ma", {"map", "many", "maximum"}); Ranks("print", {"printf", "sprintf"}); // score(PRINT) = kMinScore - Ranks("int", {"int", "INT", "PRINT"}); + Ranks("ast", {"ast", "AST", "INT_FAST16_MAX"}); // score(PRINT) > kMinScore Ranks("Int", {"int", "INT", "PRINT"}); } diff --git a/src/lex_utils.cc b/src/lex_utils.cc index 9ab0ef4b..c7e5700f 100644 --- a/src/lex_utils.cc +++ b/src/lex_utils.cc @@ -180,23 +180,10 @@ std::string_view LexIdentifierAroundPos(lsPosition position, return content.substr(start, end - start); } -bool SubsequenceMatchIgnoreCase(std::string_view search, std::string_view content) { - size_t j = 0; - for (size_t i = 0; i < search.size(); i++) { - char search_char = tolower(search[i]); - while (j < content.size() && tolower(content[j]) != search_char) - j++; - if (j == content.size()) - return false; - j++; - } - return true; -} - // Find discontinous |search| in |content|. // Return |found| and the count of skipped chars before found. -std::tuple SubsequenceCountSkip(std::string_view search, - std::string_view content) { +std::pair CaseFoldingSubsequenceMatch(std::string_view search, + std::string_view content) { bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper); int skip = 0; size_t j = 0; @@ -206,10 +193,10 @@ std::tuple SubsequenceCountSkip(std::string_view search, : tolower(content[j]) != tolower(c))) ++j, ++skip; if (j == content.size()) - return std::make_tuple(false, skip); + return {false, skip}; ++j; } - return std::make_tuple(true, skip); + return {true, skip}; } TEST_SUITE("Offset") { @@ -234,43 +221,20 @@ TEST_SUITE("Offset") { } TEST_SUITE("Substring") { - TEST_CASE("match") { - // Empty string matches anything. - REQUIRE(SubsequenceMatchIgnoreCase("", "")); - REQUIRE(SubsequenceMatchIgnoreCase("", "aa")); - - // Match in start/middle/end. - REQUIRE(SubsequenceMatchIgnoreCase("a", "abbbb")); - REQUIRE(SubsequenceMatchIgnoreCase("a", "bbabb")); - REQUIRE(SubsequenceMatchIgnoreCase("a", "bbbba")); - REQUIRE(SubsequenceMatchIgnoreCase("aa", "aabbb")); - REQUIRE(SubsequenceMatchIgnoreCase("aa", "bbaab")); - REQUIRE(SubsequenceMatchIgnoreCase("aa", "bbbaa")); - - // Capitalization. - REQUIRE(SubsequenceMatchIgnoreCase("aa", "aA")); - REQUIRE(SubsequenceMatchIgnoreCase("aa", "Aa")); - REQUIRE(SubsequenceMatchIgnoreCase("aa", "AA")); - - // Token skipping. - REQUIRE(SubsequenceMatchIgnoreCase("ad", "abcd")); - REQUIRE(SubsequenceMatchIgnoreCase("ad", "ABCD")); - - // Ordering. - REQUIRE(!SubsequenceMatchIgnoreCase("ad", "dcba")); - } - TEST_CASE("skip") { - REQUIRE(SubsequenceCountSkip("a", "a") == std::make_tuple(true, 0)); - REQUIRE(SubsequenceCountSkip("b", "a") == std::make_tuple(false, 1)); - REQUIRE(SubsequenceCountSkip("", "") == std::make_tuple(true, 0)); - REQUIRE(SubsequenceCountSkip("a", "ba") == std::make_tuple(true, 1)); - REQUIRE(SubsequenceCountSkip("aa", "aba") == std::make_tuple(true, 1)); - REQUIRE(SubsequenceCountSkip("aa", "baa") == std::make_tuple(true, 1)); - REQUIRE(SubsequenceCountSkip("aA", "aA") == std::make_tuple(true, 0)); - REQUIRE(SubsequenceCountSkip("aA", "aa") == std::make_tuple(false, 1)); - REQUIRE(SubsequenceCountSkip("incstdioh", "include ") == - std::make_tuple(true, 7)); + REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0)); + REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1)); + REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0)); + REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1)); + REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") == + std::make_pair(true, 1)); + REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") == + std::make_pair(true, 1)); + REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0)); + REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") == + std::make_pair(false, 1)); + REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include ") == + std::make_pair(true, 7)); } } diff --git a/src/lex_utils.h b/src/lex_utils.h index f95035e6..7d860ace 100644 --- a/src/lex_utils.h +++ b/src/lex_utils.h @@ -26,8 +26,5 @@ void LexFunctionDeclaration(const std::string& buffer_content, std::string_view LexIdentifierAroundPos(lsPosition position, std::string_view content); -// Case-insensitive subsequence matching. -bool SubsequenceMatchIgnoreCase(std::string_view search, std::string_view content); - -std::tuple SubsequenceCountSkip(std::string_view search, - std::string_view content); +std::pair CaseFoldingSubsequenceMatch(std::string_view search, + std::string_view content); diff --git a/src/lsp_completion.h b/src/lsp_completion.h index e06b4bd9..04105f2c 100644 --- a/src/lsp_completion.h +++ b/src/lsp_completion.h @@ -72,8 +72,7 @@ struct lsCompletionItem { optional documentation; // Internal information to order candidates. - bool found_; - std::string::size_type skip_; + int score_; unsigned priority_; // Use <> or "" by default as include path. diff --git a/src/messages/text_document_completion.cc b/src/messages/text_document_completion.cc index ba1a305d..17e0c501 100644 --- a/src/messages/text_document_completion.cc +++ b/src/messages/text_document_completion.cc @@ -1,5 +1,6 @@ #include "clang_complete.h" #include "code_complete_cache.h" +#include "fuzzy_match.h" #include "include_complete.h" #include "message_handler.h" #include "queue_manager.h" @@ -70,19 +71,6 @@ struct Out_TextDocumentComplete }; MAKE_REFLECT_STRUCT(Out_TextDocumentComplete, jsonrpc, id, result); -bool CompareLsCompletionItem(const lsCompletionItem& lhs, - const lsCompletionItem& rhs) { - if (lhs.found_ != rhs.found_) - return !lhs.found_ < !rhs.found_; - if (lhs.skip_ != rhs.skip_) - return lhs.skip_ < rhs.skip_; - if (lhs.priority_ != rhs.priority_) - return lhs.priority_ < rhs.priority_; - if (lhs.filterText->length() != rhs.filterText->length()) - return lhs.filterText->length() < rhs.filterText->length(); - return *lhs.filterText < *rhs.filterText; -} - void DecorateIncludePaths(const std::smatch& match, std::vector* items) { std::string spaces_after_include = " "; @@ -200,41 +188,29 @@ void FilterAndSortCompletionResponse( item.filterText = item.label; } - // If the text doesn't start with underscore, remove all candidates that - // start with underscore. - if (complete_text[0] != '_') { - auto filter = [](const lsCompletionItem& item) { - return (*item.filterText)[0] == '_'; - }; - items.erase(std::remove_if(items.begin(), items.end(), filter), - items.end()); - } - - // Fuzzy match. Remove any candidates that do not match. - bool found = false; + // Fuzzy match and remove awful candidates. + FuzzyMatcher fuzzy(complete_text); for (auto& item : items) { - std::tie(item.found_, item.skip_) = - SubsequenceCountSkip(complete_text, *item.filterText); - found = found || item.found_; - } - if (found) { - auto filter = [](const lsCompletionItem& item) { return !item.found_; }; - items.erase(std::remove_if(items.begin(), items.end(), filter), - items.end()); - - // Order all items and set |sortText|. - const size_t kMaxSortSize = 200u; - if (items.size() <= kMaxSortSize) { - std::sort(items.begin(), items.end(), CompareLsCompletionItem); - } else { - // Just place items that found the text before those not. - std::vector items_found, items_notfound; - for (auto& item : items) - (item.found_ ? items_found : items_notfound).push_back(item); - items = items_found; - items.insert(items.end(), items_notfound.begin(), items_notfound.end()); - } + item.score_ = + CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first + ? fuzzy.Match(*item.filterText) + : FuzzyMatcher::kMinScore; } + items.erase(std::remove_if(items.begin(), items.end(), + [](const lsCompletionItem& item) { + return item.score_ <= FuzzyMatcher::kMinScore; + }), + items.end()); + std::sort(items.begin(), items.end(), + [](const lsCompletionItem& lhs, const lsCompletionItem& rhs) { + if (lhs.score_ != rhs.score_) + return lhs.score_ > rhs.score_; + if (lhs.priority_ != rhs.priority_) + return lhs.priority_ < rhs.priority_; + if (lhs.filterText->size() != rhs.filterText->size()) + return lhs.filterText->size() < rhs.filterText->size(); + return *lhs.filterText < *rhs.filterText; + }); // Trim result. finalize(); diff --git a/src/messages/workspace_symbol.cc b/src/messages/workspace_symbol.cc index e02b2212..9595baf8 100644 --- a/src/messages/workspace_symbol.cc +++ b/src/messages/workspace_symbol.cc @@ -107,7 +107,7 @@ struct WorkspaceSymbolHandler : BaseMessageHandler { for (int i = 0; i < (int)db->symbols.size(); ++i) { std::string_view detailed_name = db->GetSymbolDetailedName(i); - if (SubsequenceMatchIgnoreCase(query_without_space, detailed_name)) { + if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name).first) { // Do not show the same entry twice. if (!inserted_results.insert(std::string(detailed_name)).second) continue;