Improve workspace/symbol sorting heuristic

This commit is contained in:
Fangrui Song 2018-04-29 19:51:25 -07:00
parent f73100adf3
commit 5ef801662b
6 changed files with 64 additions and 100 deletions

View File

@ -354,6 +354,8 @@ struct IndexParam {
}; };
IndexFile* ConsumeFile(IndexParam* param, CXFile file) { IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
if (!file)
return nullptr;
bool is_first_ownership = false; bool is_first_ownership = false;
IndexFile* db = param->file_consumer->TryConsumeFile( IndexFile* db = param->file_consumer->TryConsumeFile(
file, &is_first_ownership, &param->file_contents); file, &is_first_ownership, &param->file_contents);

View File

View File

@ -49,21 +49,20 @@ std::string_view LexIdentifierAroundPos(lsPosition position,
// Find discontinous |search| in |content|. // Find discontinous |search| in |content|.
// Return |found| and the count of skipped chars before found. // Return |found| and the count of skipped chars before found.
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search, int ReverseSubseqMatch(std::string_view pat,
std::string_view content) { std::string_view text,
bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper); int case_sensitivity) {
int skip = 0; if (case_sensitivity == 1)
size_t j = 0; case_sensitivity = std::any_of(pat.begin(), pat.end(), isupper) ? 2 : 0;
for (char c : search) { int j = pat.size();
while (j < content.size() && if (!j)
(hasUppercaseLetter ? content[j] != c return text.size();
: tolower(content[j]) != tolower(c))) for (int i = text.size(); i--;)
++j, ++skip; if ((case_sensitivity ? text[i] == pat[j - 1]
if (j == content.size()) : tolower(text[i]) == tolower(pat[j - 1])) &&
return {false, skip}; !--j)
++j; return i;
} return -1;
return {true, skip};
} }
TEST_SUITE("Offset") { TEST_SUITE("Offset") {
@ -86,21 +85,3 @@ TEST_SUITE("Offset") {
REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1); REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
} }
} }
TEST_SUITE("Substring") {
TEST_CASE("skip") {
REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
std::make_pair(false, 1));
REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
std::make_pair(true, 7));
}
}

View File

@ -11,5 +11,6 @@ int GetOffsetForPosition(lsPosition position, std::string_view content);
std::string_view LexIdentifierAroundPos(lsPosition position, std::string_view LexIdentifierAroundPos(lsPosition position,
std::string_view content); std::string_view content);
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search, int ReverseSubseqMatch(std::string_view pat,
std::string_view content); std::string_view text,
int case_sensitivity);

View File

@ -214,10 +214,11 @@ void FilterAndSortCompletionResponse(
} }
// Fuzzy match and remove awful candidates. // Fuzzy match and remove awful candidates.
FuzzyMatcher fuzzy(complete_text, g_config->completion.caseSensitivity); bool sensitive = g_config->completion.caseSensitivity;
FuzzyMatcher fuzzy(complete_text, sensitive);
for (auto& item : items) { for (auto& item : items) {
item.score_ = item.score_ =
CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first ReverseSubseqMatch(complete_text, *item.filterText, sensitive) >= 0
? fuzzy.Match(*item.filterText) ? fuzzy.Match(*item.filterText)
: FuzzyMatcher::kMinScore; : FuzzyMatcher::kMinScore;
} }

View File

@ -15,10 +15,13 @@ namespace {
MethodType kMethodType = "workspace/symbol"; MethodType kMethodType = "workspace/symbol";
// Lookup |symbol| in |db| and insert the value into |result|. // Lookup |symbol| in |db| and insert the value into |result|.
bool InsertSymbolIntoResult(QueryDatabase* db, bool AddSymbol(
QueryDatabase* db,
WorkingFiles* working_files, WorkingFiles* working_files,
SymbolIdx symbol, int i,
std::vector<lsSymbolInformation>* result) { bool use_detailed,
std::vector<std::tuple<lsSymbolInformation, bool, int>>* result) {
SymbolIdx symbol = db->symbols[i];
std::optional<lsSymbolInformation> info = std::optional<lsSymbolInformation> info =
GetSymbolInfo(db, working_files, symbol, true); GetSymbolInfo(db, working_files, symbol, true);
if (!info) if (!info)
@ -38,7 +41,7 @@ bool InsertSymbolIntoResult(QueryDatabase* db,
if (!ls_location) if (!ls_location)
return false; return false;
info->location = *ls_location; info->location = *ls_location;
result->push_back(*info); result->emplace_back(*info, use_detailed, i);
return true; return true;
} }
@ -72,34 +75,11 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
std::string query = request->params.query; std::string query = request->params.query;
std::unordered_set<std::string> inserted_results; // {symbol info, matching detailed_name or short_name, index}
// db->detailed_names indices of each lsSymbolInformation in out.result std::vector<std::tuple<lsSymbolInformation, bool, int>> unsorted;
std::vector<int> result_indices; bool sensitive = g_config->workspaceSymbol.caseSensitivity;
std::vector<lsSymbolInformation> unsorted_results;
inserted_results.reserve(g_config->workspaceSymbol.maxNum);
result_indices.reserve(g_config->workspaceSymbol.maxNum);
// We use detailed_names without parameters for matching.
// Find exact substring matches.
for (int i = 0; i < db->symbols.size(); ++i) {
std::string_view detailed_name = db->GetSymbolName(i, true);
if (detailed_name.find(query) != std::string::npos) {
// Do not show the same entry twice.
if (!inserted_results.insert(std::string(detailed_name)).second)
continue;
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
&unsorted_results)) {
result_indices.push_back(i);
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
break;
}
}
}
// Find subsequence matches. // Find subsequence matches.
if (unsorted_results.size() < g_config->workspaceSymbol.maxNum) {
std::string query_without_space; std::string query_without_space;
query_without_space.reserve(query.size()); query_without_space.reserve(query.size());
for (char c : query) for (char c : query)
@ -108,46 +88,45 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
for (int i = 0; i < (int)db->symbols.size(); ++i) { for (int i = 0; i < (int)db->symbols.size(); ++i) {
std::string_view detailed_name = db->GetSymbolName(i, true); std::string_view detailed_name = db->GetSymbolName(i, true);
if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name) int pos =
.first) { ReverseSubseqMatch(query_without_space, detailed_name, sensitive);
// Do not show the same entry twice. if (pos >= 0 &&
if (!inserted_results.insert(std::string(detailed_name)).second) AddSymbol(db, working_files, i,
continue; detailed_name.find(':', pos) != std::string::npos,
&unsorted) &&
if (InsertSymbolIntoResult(db, working_files, db->symbols[i], unsorted.size() >= g_config->workspaceSymbol.maxNum)
&unsorted_results)) {
result_indices.push_back(i);
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
break; break;
} }
}
}
}
if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) { if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
// Sort results with a fuzzy matching algorithm. // Sort results with a fuzzy matching algorithm.
int longest = 0; int longest = 0;
for (int i : result_indices) for (int i = 0; i < int(unsorted.size()); i++) {
longest = std::max(longest, int(db->GetSymbolName(i, true).size())); longest = std::max(
longest,
int(db->GetSymbolName(std::get<2>(unsorted[i]), true).size()));
}
FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity); FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
std::vector<std::pair<int, int>> permutation(result_indices.size()); std::vector<std::pair<int, int>> permutation(unsorted.size());
for (int i = 0; i < int(result_indices.size()); i++) { for (int i = 0; i < int(unsorted.size()); i++) {
permutation[i] = { permutation[i] = {
fuzzy.Match(db->GetSymbolName(result_indices[i], true)), i}; fuzzy.Match(db->GetSymbolName(std::get<2>(unsorted[i]),
std::get<1>(unsorted[i]))),
i};
} }
std::sort(permutation.begin(), permutation.end(), std::sort(permutation.begin(), permutation.end(),
std::greater<std::pair<int, int>>()); std::greater<std::pair<int, int>>());
out.result.reserve(result_indices.size()); out.result.reserve(unsorted.size());
// Discard awful candidates. // Discard awful candidates.
for (int i = 0; i < int(result_indices.size()) && for (int i = 0; i < int(unsorted.size()) &&
permutation[i].first > FuzzyMatcher::kMinScore; permutation[i].first > FuzzyMatcher::kMinScore;
i++) i++)
out.result.push_back( out.result.push_back(
std::move(unsorted_results[permutation[i].second])); std::move(std::get<0>(unsorted[permutation[i].second])));
} else { } else {
out.result.reserve(unsorted_results.size()); out.result.reserve(unsorted.size());
for (const auto& entry : unsorted_results) for (auto& entry : unsorted)
out.result.push_back(std::move(entry)); out.result.push_back(std::get<0>(entry));
} }
LOG_S(INFO) << "[querydb] Found " << out.result.size() LOG_S(INFO) << "[querydb] Found " << out.result.size()