mirror of
https://github.com/MaskRay/ccls.git
synced 2025-01-19 03:55:49 +00:00
Improve workspace/symbol sorting heuristic
This commit is contained in:
parent
f73100adf3
commit
5ef801662b
@ -354,6 +354,8 @@ struct IndexParam {
|
|||||||
};
|
};
|
||||||
|
|
||||||
IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
|
IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
|
||||||
|
if (!file)
|
||||||
|
return nullptr;
|
||||||
bool is_first_ownership = false;
|
bool is_first_ownership = false;
|
||||||
IndexFile* db = param->file_consumer->TryConsumeFile(
|
IndexFile* db = param->file_consumer->TryConsumeFile(
|
||||||
file, &is_first_ownership, ¶m->file_contents);
|
file, &is_first_ownership, ¶m->file_contents);
|
||||||
|
@ -49,21 +49,20 @@ std::string_view LexIdentifierAroundPos(lsPosition position,
|
|||||||
|
|
||||||
// Find discontinous |search| in |content|.
|
// Find discontinous |search| in |content|.
|
||||||
// Return |found| and the count of skipped chars before found.
|
// Return |found| and the count of skipped chars before found.
|
||||||
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
|
int ReverseSubseqMatch(std::string_view pat,
|
||||||
std::string_view content) {
|
std::string_view text,
|
||||||
bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
|
int case_sensitivity) {
|
||||||
int skip = 0;
|
if (case_sensitivity == 1)
|
||||||
size_t j = 0;
|
case_sensitivity = std::any_of(pat.begin(), pat.end(), isupper) ? 2 : 0;
|
||||||
for (char c : search) {
|
int j = pat.size();
|
||||||
while (j < content.size() &&
|
if (!j)
|
||||||
(hasUppercaseLetter ? content[j] != c
|
return text.size();
|
||||||
: tolower(content[j]) != tolower(c)))
|
for (int i = text.size(); i--;)
|
||||||
++j, ++skip;
|
if ((case_sensitivity ? text[i] == pat[j - 1]
|
||||||
if (j == content.size())
|
: tolower(text[i]) == tolower(pat[j - 1])) &&
|
||||||
return {false, skip};
|
!--j)
|
||||||
++j;
|
return i;
|
||||||
}
|
return -1;
|
||||||
return {true, skip};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_SUITE("Offset") {
|
TEST_SUITE("Offset") {
|
||||||
@ -86,21 +85,3 @@ TEST_SUITE("Offset") {
|
|||||||
REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
|
REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_SUITE("Substring") {
|
|
||||||
TEST_CASE("skip") {
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
|
|
||||||
std::make_pair(true, 1));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
|
|
||||||
std::make_pair(true, 1));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
|
|
||||||
std::make_pair(false, 1));
|
|
||||||
REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
|
|
||||||
std::make_pair(true, 7));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -11,5 +11,6 @@ int GetOffsetForPosition(lsPosition position, std::string_view content);
|
|||||||
std::string_view LexIdentifierAroundPos(lsPosition position,
|
std::string_view LexIdentifierAroundPos(lsPosition position,
|
||||||
std::string_view content);
|
std::string_view content);
|
||||||
|
|
||||||
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
|
int ReverseSubseqMatch(std::string_view pat,
|
||||||
std::string_view content);
|
std::string_view text,
|
||||||
|
int case_sensitivity);
|
||||||
|
@ -214,10 +214,11 @@ void FilterAndSortCompletionResponse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fuzzy match and remove awful candidates.
|
// Fuzzy match and remove awful candidates.
|
||||||
FuzzyMatcher fuzzy(complete_text, g_config->completion.caseSensitivity);
|
bool sensitive = g_config->completion.caseSensitivity;
|
||||||
|
FuzzyMatcher fuzzy(complete_text, sensitive);
|
||||||
for (auto& item : items) {
|
for (auto& item : items) {
|
||||||
item.score_ =
|
item.score_ =
|
||||||
CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first
|
ReverseSubseqMatch(complete_text, *item.filterText, sensitive) >= 0
|
||||||
? fuzzy.Match(*item.filterText)
|
? fuzzy.Match(*item.filterText)
|
||||||
: FuzzyMatcher::kMinScore;
|
: FuzzyMatcher::kMinScore;
|
||||||
}
|
}
|
||||||
|
@ -15,10 +15,13 @@ namespace {
|
|||||||
MethodType kMethodType = "workspace/symbol";
|
MethodType kMethodType = "workspace/symbol";
|
||||||
|
|
||||||
// Lookup |symbol| in |db| and insert the value into |result|.
|
// Lookup |symbol| in |db| and insert the value into |result|.
|
||||||
bool InsertSymbolIntoResult(QueryDatabase* db,
|
bool AddSymbol(
|
||||||
WorkingFiles* working_files,
|
QueryDatabase* db,
|
||||||
SymbolIdx symbol,
|
WorkingFiles* working_files,
|
||||||
std::vector<lsSymbolInformation>* result) {
|
int i,
|
||||||
|
bool use_detailed,
|
||||||
|
std::vector<std::tuple<lsSymbolInformation, bool, int>>* result) {
|
||||||
|
SymbolIdx symbol = db->symbols[i];
|
||||||
std::optional<lsSymbolInformation> info =
|
std::optional<lsSymbolInformation> info =
|
||||||
GetSymbolInfo(db, working_files, symbol, true);
|
GetSymbolInfo(db, working_files, symbol, true);
|
||||||
if (!info)
|
if (!info)
|
||||||
@ -38,7 +41,7 @@ bool InsertSymbolIntoResult(QueryDatabase* db,
|
|||||||
if (!ls_location)
|
if (!ls_location)
|
||||||
return false;
|
return false;
|
||||||
info->location = *ls_location;
|
info->location = *ls_location;
|
||||||
result->push_back(*info);
|
result->emplace_back(*info, use_detailed, i);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,82 +75,58 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
|
|||||||
|
|
||||||
std::string query = request->params.query;
|
std::string query = request->params.query;
|
||||||
|
|
||||||
std::unordered_set<std::string> inserted_results;
|
// {symbol info, matching detailed_name or short_name, index}
|
||||||
// db->detailed_names indices of each lsSymbolInformation in out.result
|
std::vector<std::tuple<lsSymbolInformation, bool, int>> unsorted;
|
||||||
std::vector<int> result_indices;
|
bool sensitive = g_config->workspaceSymbol.caseSensitivity;
|
||||||
std::vector<lsSymbolInformation> unsorted_results;
|
|
||||||
inserted_results.reserve(g_config->workspaceSymbol.maxNum);
|
|
||||||
result_indices.reserve(g_config->workspaceSymbol.maxNum);
|
|
||||||
|
|
||||||
// We use detailed_names without parameters for matching.
|
|
||||||
|
|
||||||
// Find exact substring matches.
|
|
||||||
for (int i = 0; i < db->symbols.size(); ++i) {
|
|
||||||
std::string_view detailed_name = db->GetSymbolName(i, true);
|
|
||||||
if (detailed_name.find(query) != std::string::npos) {
|
|
||||||
// Do not show the same entry twice.
|
|
||||||
if (!inserted_results.insert(std::string(detailed_name)).second)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
|
|
||||||
&unsorted_results)) {
|
|
||||||
result_indices.push_back(i);
|
|
||||||
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find subsequence matches.
|
// Find subsequence matches.
|
||||||
if (unsorted_results.size() < g_config->workspaceSymbol.maxNum) {
|
std::string query_without_space;
|
||||||
std::string query_without_space;
|
query_without_space.reserve(query.size());
|
||||||
query_without_space.reserve(query.size());
|
for (char c : query)
|
||||||
for (char c : query)
|
if (!isspace(c))
|
||||||
if (!isspace(c))
|
query_without_space += c;
|
||||||
query_without_space += c;
|
|
||||||
|
|
||||||
for (int i = 0; i < (int)db->symbols.size(); ++i) {
|
for (int i = 0; i < (int)db->symbols.size(); ++i) {
|
||||||
std::string_view detailed_name = db->GetSymbolName(i, true);
|
std::string_view detailed_name = db->GetSymbolName(i, true);
|
||||||
if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name)
|
int pos =
|
||||||
.first) {
|
ReverseSubseqMatch(query_without_space, detailed_name, sensitive);
|
||||||
// Do not show the same entry twice.
|
if (pos >= 0 &&
|
||||||
if (!inserted_results.insert(std::string(detailed_name)).second)
|
AddSymbol(db, working_files, i,
|
||||||
continue;
|
detailed_name.find(':', pos) != std::string::npos,
|
||||||
|
&unsorted) &&
|
||||||
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
|
unsorted.size() >= g_config->workspaceSymbol.maxNum)
|
||||||
&unsorted_results)) {
|
break;
|
||||||
result_indices.push_back(i);
|
|
||||||
if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
|
if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
|
||||||
// Sort results with a fuzzy matching algorithm.
|
// Sort results with a fuzzy matching algorithm.
|
||||||
int longest = 0;
|
int longest = 0;
|
||||||
for (int i : result_indices)
|
for (int i = 0; i < int(unsorted.size()); i++) {
|
||||||
longest = std::max(longest, int(db->GetSymbolName(i, true).size()));
|
longest = std::max(
|
||||||
|
longest,
|
||||||
|
int(db->GetSymbolName(std::get<2>(unsorted[i]), true).size()));
|
||||||
|
}
|
||||||
FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
|
FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
|
||||||
std::vector<std::pair<int, int>> permutation(result_indices.size());
|
std::vector<std::pair<int, int>> permutation(unsorted.size());
|
||||||
for (int i = 0; i < int(result_indices.size()); i++) {
|
for (int i = 0; i < int(unsorted.size()); i++) {
|
||||||
permutation[i] = {
|
permutation[i] = {
|
||||||
fuzzy.Match(db->GetSymbolName(result_indices[i], true)), i};
|
fuzzy.Match(db->GetSymbolName(std::get<2>(unsorted[i]),
|
||||||
|
std::get<1>(unsorted[i]))),
|
||||||
|
i};
|
||||||
}
|
}
|
||||||
std::sort(permutation.begin(), permutation.end(),
|
std::sort(permutation.begin(), permutation.end(),
|
||||||
std::greater<std::pair<int, int>>());
|
std::greater<std::pair<int, int>>());
|
||||||
out.result.reserve(result_indices.size());
|
out.result.reserve(unsorted.size());
|
||||||
// Discard awful candidates.
|
// Discard awful candidates.
|
||||||
for (int i = 0; i < int(result_indices.size()) &&
|
for (int i = 0; i < int(unsorted.size()) &&
|
||||||
permutation[i].first > FuzzyMatcher::kMinScore;
|
permutation[i].first > FuzzyMatcher::kMinScore;
|
||||||
i++)
|
i++)
|
||||||
out.result.push_back(
|
out.result.push_back(
|
||||||
std::move(unsorted_results[permutation[i].second]));
|
std::move(std::get<0>(unsorted[permutation[i].second])));
|
||||||
} else {
|
} else {
|
||||||
out.result.reserve(unsorted_results.size());
|
out.result.reserve(unsorted.size());
|
||||||
for (const auto& entry : unsorted_results)
|
for (auto& entry : unsorted)
|
||||||
out.result.push_back(std::move(entry));
|
out.result.push_back(std::get<0>(entry));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_S(INFO) << "[querydb] Found " << out.result.size()
|
LOG_S(INFO) << "[querydb] Found " << out.result.size()
|
||||||
|
Loading…
Reference in New Issue
Block a user