mirror of
https://github.com/MaskRay/ccls.git
synced 2024-11-25 09:05:10 +00:00
Optimize FuzzyMatcher and add tests.
This commit is contained in:
parent
13cccda781
commit
02542b1e69
@ -1,7 +1,11 @@
|
|||||||
#include "fuzzy_match.h"
|
#include "fuzzy_match.h"
|
||||||
|
|
||||||
|
#include <doctest/doctest.h>
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
enum CharClass { Other, Lower, Upper };
|
enum CharClass { Other, Lower, Upper };
|
||||||
enum CharRole { None, Tail, Head };
|
enum CharRole { None, Tail, Head };
|
||||||
@ -49,20 +53,20 @@ int FuzzyMatcher::MissScore(int j, bool last) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int FuzzyMatcher::MatchScore(int i, int j, bool last) {
|
int FuzzyMatcher::MatchScore(int i, int j, bool last) {
|
||||||
int s = 40;
|
int s = 0;
|
||||||
if (pat[i] == text[j]) {
|
if (pat[i] == text[j]) {
|
||||||
s++;
|
s++;
|
||||||
if ((pat_set & 1 << Upper) || i == j)
|
if ((pat_set & 1 << Upper) || i == j)
|
||||||
s += 20;
|
s += 10;
|
||||||
}
|
}
|
||||||
if (pat_role[i] == Head && text_role[j] == Head)
|
if (pat_role[i] == Head && text_role[j] == Head)
|
||||||
s += 50;
|
s += 30;
|
||||||
if (text_role[j] == Tail && i && !last)
|
if (text_role[j] == Tail && i && !last)
|
||||||
s -= 50;
|
|
||||||
if (pat_role[i] == Head && text_role[j] == Tail)
|
|
||||||
s -= 30;
|
s -= 30;
|
||||||
|
if (pat_role[i] == Head && text_role[j] == Tail)
|
||||||
|
s -= 10;
|
||||||
if (i == 0 && text_role[j] == Tail)
|
if (i == 0 && text_role[j] == Tail)
|
||||||
s -= 70;
|
s -= 40;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,31 +91,73 @@ int FuzzyMatcher::Match(std::string_view text) {
|
|||||||
low_text[i] = (char)::tolower(text[i]);
|
low_text[i] = (char)::tolower(text[i]);
|
||||||
CalculateRoles(text, text_role, &text_set);
|
CalculateRoles(text, text_role, &text_set);
|
||||||
dp[0][0][0] = 0;
|
dp[0][0][0] = 0;
|
||||||
dp[0][0][1] = kMinScore;
|
dp[0][0][1] = kMinScore * 2;
|
||||||
for (int j = 0; j < n; j++) {
|
for (int j = 0; j < n; j++) {
|
||||||
dp[0][j + 1][0] = dp[0][j][0] + MissScore(j, false);
|
dp[0][j + 1][0] = dp[0][j][0] + MissScore(j, false);
|
||||||
dp[0][j + 1][1] = kMinScore;
|
dp[0][j + 1][1] = kMinScore * 2;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < int(pat.size()); i++) {
|
for (int i = 0; i < int(pat.size()); i++) {
|
||||||
int(*pre)[2] = dp[i & 1];
|
int(*pre)[2] = dp[i & 1];
|
||||||
int(*cur)[2] = dp[(i + 1) & 1];
|
int(*cur)[2] = dp[(i + 1) & 1];
|
||||||
cur[0][0] = cur[0][1] = kMinScore;
|
cur[i][0] = cur[i][1] = kMinScore;
|
||||||
for (int j = 0; j < n; j++) {
|
for (int j = i; j < n; j++) {
|
||||||
cur[j + 1][0] = std::max(cur[j][0] + MissScore(j, false),
|
cur[j + 1][0] = std::max(cur[j][0] + MissScore(j, false),
|
||||||
cur[j][1] + MissScore(j, true));
|
cur[j][1] + MissScore(j, true));
|
||||||
if (low_pat[i] != low_text[j])
|
// For the first char of pattern, apply extra restriction to filter bad
|
||||||
cur[j + 1][1] = kMinScore;
|
// candidates (e.g. |int| in |PRINT|)
|
||||||
else {
|
if (low_pat[i] == low_text[j] &&
|
||||||
|
(i || text_role[j] != Tail || pat[i] == text[j])) {
|
||||||
cur[j + 1][1] = std::max(pre[j][0] + MatchScore(i, j, false),
|
cur[j + 1][1] = std::max(pre[j][0] + MatchScore(i, j, false),
|
||||||
pre[j][1] + MatchScore(i, j, true));
|
pre[j][1] + MatchScore(i, j, true));
|
||||||
}
|
} else
|
||||||
|
cur[j + 1][1] = kMinScore * 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enumerate the end position of the match in str. Each removed trailing
|
// Enumerate the end position of the match in str. Each removed trailing
|
||||||
// character has a penulty.
|
// character has a penulty.
|
||||||
int ret = kMinScore;
|
int ret = kMinScore;
|
||||||
for (int j = 1; j <= n; j++)
|
for (int j = pat.size(); j <= n; j++)
|
||||||
ret = std::max(ret, dp[pat.size() & 1][j][1] - 3 * (n - j));
|
ret = std::max(ret, dp[pat.size() & 1][j][1] - 3 * (n - j));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_SUITE("fuzzy_match") {
|
||||||
|
bool Ranks(std::string_view pat, std::vector<const char*> texts) {
|
||||||
|
FuzzyMatcher fuzzy(pat);
|
||||||
|
std::vector<int> scores;
|
||||||
|
for (auto text : texts)
|
||||||
|
scores.push_back(fuzzy.Match(text));
|
||||||
|
bool ret = true;
|
||||||
|
for (size_t i = 0; i < texts.size() - 1; i++)
|
||||||
|
if (scores[i] < scores[i + 1]) {
|
||||||
|
ret = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (1 || !ret) {
|
||||||
|
for (size_t i = 0; i < texts.size(); i++)
|
||||||
|
printf("%s %d ", texts[i], scores[i]);
|
||||||
|
puts("");
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE("test") {
|
||||||
|
// case
|
||||||
|
Ranks("monad", {"monad", "Monad", "mONAD"});
|
||||||
|
// initials
|
||||||
|
Ranks("ab", {"ab", "aoo_boo", "acb"});
|
||||||
|
Ranks("CC", {"CamelCase", "camelCase", "camelcase"});
|
||||||
|
Ranks("cC", {"camelCase", "CamelCase", "camelcase"});
|
||||||
|
Ranks("Da.Te", {"Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"});
|
||||||
|
// prefix
|
||||||
|
Ranks("is", {"isIEEE", "inSuf"});
|
||||||
|
// shorter
|
||||||
|
Ranks("ma", {"map", "many", "maximum"});
|
||||||
|
Ranks("print", {"printf", "sprintf"});
|
||||||
|
// score(PRINT) = kMinScore
|
||||||
|
Ranks("int", {"int", "INT", "PRINT"});
|
||||||
|
// score(PRINT) > kMinScore
|
||||||
|
Ranks("Int", {"int", "INT", "PRINT"});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -11,7 +11,7 @@ public:
|
|||||||
constexpr static int kMaxText = 200;
|
constexpr static int kMaxText = 200;
|
||||||
// Negative but far from INT_MIN so that intermediate results are hard to
|
// Negative but far from INT_MIN so that intermediate results are hard to
|
||||||
// overflow.
|
// overflow.
|
||||||
constexpr static int kMinScore = INT_MIN / 2;
|
constexpr static int kMinScore = INT_MIN / 4;
|
||||||
|
|
||||||
FuzzyMatcher(std::string_view pattern);
|
FuzzyMatcher(std::string_view pattern);
|
||||||
int Match(std::string_view text);
|
int Match(std::string_view text);
|
||||||
|
@ -136,7 +136,10 @@ struct WorkspaceSymbolHandler : BaseMessageHandler<Ipc_WorkspaceSymbol> {
|
|||||||
std::sort(permutation.begin(), permutation.end(),
|
std::sort(permutation.begin(), permutation.end(),
|
||||||
std::greater<std::pair<int, int>>());
|
std::greater<std::pair<int, int>>());
|
||||||
out.result.reserve(result_indices.size());
|
out.result.reserve(result_indices.size());
|
||||||
for (int i = 0; i < int(result_indices.size()); i++)
|
// Discard awful candidates.
|
||||||
|
for (int i = 0; i < int(result_indices.size()) &&
|
||||||
|
permutation[i].first > FuzzyMatcher::kMinScore;
|
||||||
|
i++)
|
||||||
out.result.push_back(
|
out.result.push_back(
|
||||||
std::move(unsorted_results[permutation[i].second]));
|
std::move(unsorted_results[permutation[i].second]));
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user