Improve workspace/symbol sorting heuristic

This commit is contained in:
Fangrui Song 2018-04-29 19:51:25 -07:00
parent f73100adf3
commit 5ef801662b
6 changed files with 64 additions and 100 deletions

View File

@ -354,6 +354,8 @@ struct IndexParam {
};
IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
if (!file)
return nullptr;
bool is_first_ownership = false;
IndexFile* db = param->file_consumer->TryConsumeFile(
file, &is_first_ownership, &param->file_contents);

View File

View File

@ -49,21 +49,20 @@ std::string_view LexIdentifierAroundPos(lsPosition position,
// Find discontinous |search| in |content|.
// Return |found| and the count of skipped chars before found.
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
std::string_view content) {
bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
int skip = 0;
size_t j = 0;
for (char c : search) {
while (j < content.size() &&
(hasUppercaseLetter ? content[j] != c
: tolower(content[j]) != tolower(c)))
++j, ++skip;
if (j == content.size())
return {false, skip};
++j;
}
return {true, skip};
int ReverseSubseqMatch(std::string_view pat,
std::string_view text,
int case_sensitivity) {
if (case_sensitivity == 1)
case_sensitivity = std::any_of(pat.begin(), pat.end(), isupper) ? 2 : 0;
int j = pat.size();
if (!j)
return text.size();
for (int i = text.size(); i--;)
if ((case_sensitivity ? text[i] == pat[j - 1]
: tolower(text[i]) == tolower(pat[j - 1])) &&
!--j)
return i;
return -1;
}
TEST_SUITE("Offset") {
@ -86,21 +85,3 @@ TEST_SUITE("Offset") {
REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
}
}
TEST_SUITE("Substring") {
TEST_CASE("skip") {
REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
std::make_pair(true, 1));
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
std::make_pair(false, 1));
REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
std::make_pair(true, 7));
}
}

View File

@ -11,5 +11,6 @@ int GetOffsetForPosition(lsPosition position, std::string_view content);
std::string_view LexIdentifierAroundPos(lsPosition position,
std::string_view content);
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
std::string_view content);
int ReverseSubseqMatch(std::string_view pat,
std::string_view text,
int case_sensitivity);

View File

@ -214,10 +214,11 @@ void FilterAndSortCompletionResponse(
}
// Fuzzy match and remove awful candidates.
FuzzyMatcher fuzzy(complete_text, g_config->completion.caseSensitivity);
bool sensitive = g_config->completion.caseSensitivity;
FuzzyMatcher fuzzy(complete_text, sensitive);
for (auto& item : items) {
item.score_ =
CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first
ReverseSubseqMatch(complete_text, *item.filterText, sensitive) >= 0
? fuzzy.Match(*item.filterText)
: FuzzyMatcher::kMinScore;
}

View File

@ -15,10 +15,13 @@ namespace {
MethodType kMethodType = "workspace/symbol";
// Lookup |symbol| in |db| and insert the value into |result|.
bool InsertSymbolIntoResult(QueryDatabase* db,
bool AddSymbol(
QueryDatabase* db,
WorkingFiles* working_files,
SymbolIdx symbol,
std::vector<lsSymbolInformation>* result) {
int i,
bool use_detailed,
std::vector<std::tuple<lsSymbolInformation, bool, int>>* result) {
SymbolIdx symbol = db->symbols[i];
std::optional<lsSymbolInformation> info =
GetSymbolInfo(db, working_files, symbol, true);
if (!info)
@ -38,7 +41,7 @@ bool InsertSymbolIntoResult(QueryDatabase* db,
if (!ls_location)
return false;
info->location = *ls_location;
result->push_back(*info);
result->emplace_back(*info, use_detailed, i);
return true;
}
@ -72,34 +75,11 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
std::string query = request->params.query;
std::unordered_set<std::string> inserted_results;
// db->detailed_names indices of each lsSymbolInformation in out.result
std::vector<int> result_indices;
std::vector<lsSymbolInformation> unsorted_results;
inserted_results.reserve(g_config->workspaceSymbol.maxNum);
result_indices.reserve(g_config->workspaceSymbol.maxNum);
// We use detailed_names without parameters for matching.
// Find exact substring matches.
for (int i = 0; i < db->symbols.size(); ++i) {
std::string_view detailed_name = db->GetSymbolName(i, true);
if (detailed_name.find(query) != std::string::npos) {
// Do not show the same entry twice.
if (!inserted_results.insert(std::string(detailed_name)).second)
continue;
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
&unsorted_results)) {
result_indices.push_back(i);
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
break;
}
}
}
// {symbol info, matching detailed_name or short_name, index}
std::vector<std::tuple<lsSymbolInformation, bool, int>> unsorted;
bool sensitive = g_config->workspaceSymbol.caseSensitivity;
// Find subsequence matches.
if (unsorted_results.size() < g_config->workspaceSymbol.maxNum) {
std::string query_without_space;
query_without_space.reserve(query.size());
for (char c : query)
@ -108,46 +88,45 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
for (int i = 0; i < (int)db->symbols.size(); ++i) {
std::string_view detailed_name = db->GetSymbolName(i, true);
if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name)
.first) {
// Do not show the same entry twice.
if (!inserted_results.insert(std::string(detailed_name)).second)
continue;
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
&unsorted_results)) {
result_indices.push_back(i);
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
int pos =
ReverseSubseqMatch(query_without_space, detailed_name, sensitive);
if (pos >= 0 &&
AddSymbol(db, working_files, i,
detailed_name.find(':', pos) != std::string::npos,
&unsorted) &&
unsorted.size() >= g_config->workspaceSymbol.maxNum)
break;
}
}
}
}
if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
// Sort results with a fuzzy matching algorithm.
int longest = 0;
for (int i : result_indices)
longest = std::max(longest, int(db->GetSymbolName(i, true).size()));
for (int i = 0; i < int(unsorted.size()); i++) {
longest = std::max(
longest,
int(db->GetSymbolName(std::get<2>(unsorted[i]), true).size()));
}
FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
std::vector<std::pair<int, int>> permutation(result_indices.size());
for (int i = 0; i < int(result_indices.size()); i++) {
std::vector<std::pair<int, int>> permutation(unsorted.size());
for (int i = 0; i < int(unsorted.size()); i++) {
permutation[i] = {
fuzzy.Match(db->GetSymbolName(result_indices[i], true)), i};
fuzzy.Match(db->GetSymbolName(std::get<2>(unsorted[i]),
std::get<1>(unsorted[i]))),
i};
}
std::sort(permutation.begin(), permutation.end(),
std::greater<std::pair<int, int>>());
out.result.reserve(result_indices.size());
out.result.reserve(unsorted.size());
// Discard awful candidates.
for (int i = 0; i < int(result_indices.size()) &&
for (int i = 0; i < int(unsorted.size()) &&
permutation[i].first > FuzzyMatcher::kMinScore;
i++)
out.result.push_back(
std::move(unsorted_results[permutation[i].second]));
std::move(std::get<0>(unsorted[permutation[i].second])));
} else {
out.result.reserve(unsorted_results.size());
for (const auto& entry : unsorted_results)
out.result.push_back(std::move(entry));
out.result.reserve(unsorted.size());
for (auto& entry : unsorted)
out.result.push_back(std::get<0>(entry));
}
LOG_S(INFO) << "[querydb] Found " << out.result.size()