From ad88f707f7bd1dd651eb6610e0c46926ad38855e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 18 Mar 2018 13:04:59 -0700
Subject: [PATCH] Simplify and optimize completion.

---
 src/fuzzy_match.cc                       |  2 +-
 src/lex_utils.cc                         | 70 ++++++------------------
 src/lex_utils.h                          |  7 +--
 src/lsp_completion.h                     |  3 +-
 src/messages/text_document_completion.cc | 68 ++++++++---------------
 src/messages/workspace_symbol.cc         |  2 +-
 6 files changed, 44 insertions(+), 108 deletions(-)

diff --git a/src/fuzzy_match.cc b/src/fuzzy_match.cc
index 480b69d2..1f3cbc2a 100644
--- a/src/fuzzy_match.cc
+++ b/src/fuzzy_match.cc
@@ -156,7 +156,7 @@ TEST_SUITE("fuzzy_match") {
     Ranks("ma", {"map", "many", "maximum"});
     Ranks("print", {"printf", "sprintf"});
     // score(PRINT) = kMinScore
-    Ranks("int", {"int", "INT", "PRINT"});
+    Ranks("ast", {"ast", "AST", "INT_FAST16_MAX"});
     // score(PRINT) > kMinScore
     Ranks("Int", {"int", "INT", "PRINT"});
   }
diff --git a/src/lex_utils.cc b/src/lex_utils.cc
index 9ab0ef4b..c7e5700f 100644
--- a/src/lex_utils.cc
+++ b/src/lex_utils.cc
@@ -180,23 +180,10 @@ std::string_view LexIdentifierAroundPos(lsPosition position,
   return content.substr(start, end - start);
 }
 
-bool SubsequenceMatchIgnoreCase(std::string_view search, std::string_view content) {
-  size_t j = 0;
-  for (size_t i = 0; i < search.size(); i++) {
-    char search_char = tolower(search[i]);
-    while (j < content.size() && tolower(content[j]) != search_char)
-      j++;
-    if (j == content.size())
-      return false;
-    j++;
-  }
-  return true;
-}
-
 // Find discontinous |search| in |content|.
 // Return |found| and the count of skipped chars before found.
-std::tuple<bool, int> SubsequenceCountSkip(std::string_view search,
-                                           std::string_view content) {
+std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
+                                                 std::string_view content) {
   bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
   int skip = 0;
   size_t j = 0;
@@ -206,10 +193,10 @@ std::tuple<bool, int> SubsequenceCountSkip(std::string_view search,
                                : tolower(content[j]) != tolower(c)))
       ++j, ++skip;
     if (j == content.size())
-      return std::make_tuple(false, skip);
+      return {false, skip};
     ++j;
   }
-  return std::make_tuple(true, skip);
+  return {true, skip};
 }
 
 TEST_SUITE("Offset") {
@@ -234,43 +221,20 @@ TEST_SUITE("Offset") {
 }
 
 TEST_SUITE("Substring") {
-  TEST_CASE("match") {
-    // Empty string matches anything.
-    REQUIRE(SubsequenceMatchIgnoreCase("", ""));
-    REQUIRE(SubsequenceMatchIgnoreCase("", "aa"));
-
-    // Match in start/middle/end.
-    REQUIRE(SubsequenceMatchIgnoreCase("a", "abbbb"));
-    REQUIRE(SubsequenceMatchIgnoreCase("a", "bbabb"));
-    REQUIRE(SubsequenceMatchIgnoreCase("a", "bbbba"));
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "aabbb"));
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "bbaab"));
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "bbbaa"));
-
-    // Capitalization.
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "aA"));
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "Aa"));
-    REQUIRE(SubsequenceMatchIgnoreCase("aa", "AA"));
-
-    // Token skipping.
-    REQUIRE(SubsequenceMatchIgnoreCase("ad", "abcd"));
-    REQUIRE(SubsequenceMatchIgnoreCase("ad", "ABCD"));
-
-    // Ordering.
-    REQUIRE(!SubsequenceMatchIgnoreCase("ad", "dcba"));
-  }
-
   TEST_CASE("skip") {
-    REQUIRE(SubsequenceCountSkip("a", "a") == std::make_tuple(true, 0));
-    REQUIRE(SubsequenceCountSkip("b", "a") == std::make_tuple(false, 1));
-    REQUIRE(SubsequenceCountSkip("", "") == std::make_tuple(true, 0));
-    REQUIRE(SubsequenceCountSkip("a", "ba") == std::make_tuple(true, 1));
-    REQUIRE(SubsequenceCountSkip("aa", "aba") == std::make_tuple(true, 1));
-    REQUIRE(SubsequenceCountSkip("aa", "baa") == std::make_tuple(true, 1));
-    REQUIRE(SubsequenceCountSkip("aA", "aA") == std::make_tuple(true, 0));
-    REQUIRE(SubsequenceCountSkip("aA", "aa") == std::make_tuple(false, 1));
-    REQUIRE(SubsequenceCountSkip("incstdioh", "include <stdio.h>") ==
-            std::make_tuple(true, 7));
+    REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
+    REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
+    REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
+    REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
+    REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
+            std::make_pair(true, 1));
+    REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
+            std::make_pair(true, 1));
+    REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
+    REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
+            std::make_pair(false, 1));
+    REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
+            std::make_pair(true, 7));
   }
 }
 
diff --git a/src/lex_utils.h b/src/lex_utils.h
index f95035e6..7d860ace 100644
--- a/src/lex_utils.h
+++ b/src/lex_utils.h
@@ -26,8 +26,5 @@ void LexFunctionDeclaration(const std::string& buffer_content,
 std::string_view LexIdentifierAroundPos(lsPosition position,
                                         std::string_view content);
 
-// Case-insensitive subsequence matching.
-bool SubsequenceMatchIgnoreCase(std::string_view search, std::string_view content);
-
-std::tuple<bool, int> SubsequenceCountSkip(std::string_view search,
-                                           std::string_view content);
+std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
+                                                 std::string_view content);
diff --git a/src/lsp_completion.h b/src/lsp_completion.h
index e06b4bd9..04105f2c 100644
--- a/src/lsp_completion.h
+++ b/src/lsp_completion.h
@@ -72,8 +72,7 @@ struct lsCompletionItem {
   optional<std::string> documentation;
 
   // Internal information to order candidates.
-  bool found_;
-  std::string::size_type skip_;
+  int score_;
   unsigned priority_;
 
   // Use <> or "" by default as include path.
diff --git a/src/messages/text_document_completion.cc b/src/messages/text_document_completion.cc
index ba1a305d..17e0c501 100644
--- a/src/messages/text_document_completion.cc
+++ b/src/messages/text_document_completion.cc
@@ -1,5 +1,6 @@
 #include "clang_complete.h"
 #include "code_complete_cache.h"
+#include "fuzzy_match.h"
 #include "include_complete.h"
 #include "message_handler.h"
 #include "queue_manager.h"
@@ -70,19 +71,6 @@ struct Out_TextDocumentComplete
 };
 MAKE_REFLECT_STRUCT(Out_TextDocumentComplete, jsonrpc, id, result);
 
-bool CompareLsCompletionItem(const lsCompletionItem& lhs,
-                             const lsCompletionItem& rhs) {
-  if (lhs.found_ != rhs.found_)
-    return !lhs.found_ < !rhs.found_;
-  if (lhs.skip_ != rhs.skip_)
-    return lhs.skip_ < rhs.skip_;
-  if (lhs.priority_ != rhs.priority_)
-    return lhs.priority_ < rhs.priority_;
-  if (lhs.filterText->length() != rhs.filterText->length())
-    return lhs.filterText->length() < rhs.filterText->length();
-  return *lhs.filterText < *rhs.filterText;
-}
-
 void DecorateIncludePaths(const std::smatch& match,
                           std::vector<lsCompletionItem>* items) {
   std::string spaces_after_include = " ";
@@ -200,41 +188,29 @@ void FilterAndSortCompletionResponse(
       item.filterText = item.label;
   }
 
-  // If the text doesn't start with underscore, remove all candidates that
-  // start with underscore.
-  if (complete_text[0] != '_') {
-    auto filter = [](const lsCompletionItem& item) {
-      return (*item.filterText)[0] == '_';
-    };
-    items.erase(std::remove_if(items.begin(), items.end(), filter),
-                items.end());
-  }
-
-  // Fuzzy match. Remove any candidates that do not match.
-  bool found = false;
+  // Fuzzy match and remove awful candidates.
+  FuzzyMatcher fuzzy(complete_text);
   for (auto& item : items) {
-    std::tie(item.found_, item.skip_) =
-        SubsequenceCountSkip(complete_text, *item.filterText);
-    found = found || item.found_;
-  }
-  if (found) {
-    auto filter = [](const lsCompletionItem& item) { return !item.found_; };
-    items.erase(std::remove_if(items.begin(), items.end(), filter),
-                items.end());
-
-    // Order all items and set |sortText|.
-    const size_t kMaxSortSize = 200u;
-    if (items.size() <= kMaxSortSize) {
-      std::sort(items.begin(), items.end(), CompareLsCompletionItem);
-    } else {
-      // Just place items that found the text before those not.
-      std::vector<lsCompletionItem> items_found, items_notfound;
-      for (auto& item : items)
-        (item.found_ ? items_found : items_notfound).push_back(item);
-      items = items_found;
-      items.insert(items.end(), items_notfound.begin(), items_notfound.end());
-    }
+    item.score_ =
+        CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first
+            ? fuzzy.Match(*item.filterText)
+            : FuzzyMatcher::kMinScore;
   }
+  items.erase(std::remove_if(items.begin(), items.end(),
+      [](const lsCompletionItem& item) {
+        return item.score_ <= FuzzyMatcher::kMinScore;
+      }),
+    items.end());
+  std::sort(items.begin(), items.end(),
+            [](const lsCompletionItem& lhs, const lsCompletionItem& rhs) {
+              if (lhs.score_ != rhs.score_)
+                return lhs.score_ > rhs.score_;
+              if (lhs.priority_ != rhs.priority_)
+                return lhs.priority_ < rhs.priority_;
+              if (lhs.filterText->size() != rhs.filterText->size())
+                return lhs.filterText->size() < rhs.filterText->size();
+              return *lhs.filterText < *rhs.filterText;
+            });
 
   // Trim result.
   finalize();
diff --git a/src/messages/workspace_symbol.cc b/src/messages/workspace_symbol.cc
index e02b2212..9595baf8 100644
--- a/src/messages/workspace_symbol.cc
+++ b/src/messages/workspace_symbol.cc
@@ -107,7 +107,7 @@ struct WorkspaceSymbolHandler : BaseMessageHandler<Ipc_WorkspaceSymbol> {
 
       for (int i = 0; i < (int)db->symbols.size(); ++i) {
         std::string_view detailed_name = db->GetSymbolDetailedName(i);
-        if (SubsequenceMatchIgnoreCase(query_without_space, detailed_name)) {
+        if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name).first) {
           // Do not show the same entry twice.
           if (!inserted_results.insert(std::string(detailed_name)).second)
             continue;