ccls/src/fuzzy_match.cc

198 lines
5.8 KiB
C++
Raw Normal View History

2018-08-21 05:27:52 +00:00
/* Copyright 2017-2018 ccls Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "fuzzy_match.h"
2018-08-09 17:08:14 +00:00
#include <algorithm>
#include <ctype.h>
2018-03-18 19:17:40 +00:00
#include <stdio.h>
#include <vector>
2018-03-16 15:28:37 +00:00
enum CharClass { Other, Lower, Upper };
enum CharRole { None, Tail, Head };
2018-03-16 07:19:49 +00:00
namespace {
2018-03-16 15:28:37 +00:00
CharClass GetCharClass(int c) {
if (islower(c))
2018-03-16 07:19:49 +00:00
return Lower;
if (isupper(c))
2018-03-16 07:19:49 +00:00
return Upper;
return Other;
}
2018-08-09 17:08:14 +00:00
void CalculateRoles(std::string_view s, int roles[], int *class_set) {
2018-03-16 07:19:49 +00:00
if (s.empty()) {
*class_set = 0;
return;
}
2018-03-16 15:28:37 +00:00
CharClass pre = Other, cur = GetCharClass(s[0]), suc;
2018-03-16 07:19:49 +00:00
*class_set = 1 << cur;
auto fn = [&]() {
if (cur == Other)
return None;
// U(U)L is Head while U(U)U is Tail
return pre == Other || (cur == Upper && (pre == Lower || suc != Upper))
? Head
: Tail;
};
for (size_t i = 0; i < s.size() - 1; i++) {
suc = GetCharClass(s[i + 1]);
*class_set |= 1 << suc;
roles[i] = fn();
pre = cur;
cur = suc;
}
roles[s.size() - 1] = fn();
}
2018-08-09 17:08:14 +00:00
} // namespace
2018-03-16 07:19:49 +00:00
int FuzzyMatcher::MissScore(int j, bool last) {
2018-04-02 03:55:10 +00:00
int s = -3;
if (last)
s -= 10;
2018-03-16 07:19:49 +00:00
if (text_role[j] == Head)
s -= 10;
return s;
}
2018-03-16 07:19:49 +00:00
int FuzzyMatcher::MatchScore(int i, int j, bool last) {
2018-03-18 19:17:40 +00:00
int s = 0;
2018-04-02 03:55:10 +00:00
// Case matching.
2018-03-16 15:28:37 +00:00
if (pat[i] == text[j]) {
s++;
2018-04-02 03:55:10 +00:00
// pat contains uppercase letters or prefix matching.
2018-03-16 15:28:37 +00:00
if ((pat_set & 1 << Upper) || i == j)
2018-03-23 21:57:52 +00:00
s++;
}
if (pat_role[i] == Head) {
if (text_role[j] == Head)
s += 30;
else if (text_role[j] == Tail)
s -= 10;
2018-03-16 15:28:37 +00:00
}
2018-04-02 03:55:10 +00:00
// Matching a tail while previous char wasn't matched.
2018-03-16 07:19:49 +00:00
if (text_role[j] == Tail && i && !last)
s -= 30;
2018-04-02 03:55:10 +00:00
// First char of pat matches a tail.
2018-03-16 07:19:49 +00:00
if (i == 0 && text_role[j] == Tail)
2018-03-18 19:17:40 +00:00
s -= 40;
2018-03-16 07:19:49 +00:00
return s;
}
FuzzyMatcher::FuzzyMatcher(std::string_view pattern, int sensitivity) {
2018-03-16 07:19:49 +00:00
CalculateRoles(pattern, pat_role, &pat_set);
if (sensitivity == 1)
sensitivity = pat_set & 1 << Upper ? 2 : 0;
case_sensitivity = sensitivity;
2018-03-16 07:19:49 +00:00
size_t n = 0;
for (size_t i = 0; i < pattern.size(); i++)
if (pattern[i] != ' ') {
pat += pattern[i];
low_pat[n] = ::tolower(pattern[i]);
pat_role[n] = pat_role[i];
n++;
}
}
2018-03-16 07:19:49 +00:00
int FuzzyMatcher::Match(std::string_view text) {
int n = int(text.size());
if (n > kMaxText)
return kMinScore + 1;
this->text = text;
for (int i = 0; i < n; i++)
low_text[i] = ::tolower(text[i]);
CalculateRoles(text, text_role, &text_set);
dp[0][0][0] = dp[0][0][1] = 0;
2018-03-16 07:19:49 +00:00
for (int j = 0; j < n; j++) {
dp[0][j + 1][0] = dp[0][j][0] + MissScore(j, false);
2018-03-18 19:17:40 +00:00
dp[0][j + 1][1] = kMinScore * 2;
}
2018-03-16 07:19:49 +00:00
for (int i = 0; i < int(pat.size()); i++) {
int(*pre)[2] = dp[i & 1];
2018-03-16 15:28:37 +00:00
int(*cur)[2] = dp[(i + 1) & 1];
2018-03-18 19:17:40 +00:00
cur[i][0] = cur[i][1] = kMinScore;
for (int j = i; j < n; j++) {
2018-03-16 07:19:49 +00:00
cur[j + 1][0] = std::max(cur[j][0] + MissScore(j, false),
cur[j][1] + MissScore(j, true));
2018-03-18 19:17:40 +00:00
// For the first char of pattern, apply extra restriction to filter bad
// candidates (e.g. |int| in |PRINT|)
cur[j + 1][1] = (case_sensitivity ? pat[i] == text[j]
: low_pat[i] == low_text[j] &&
(i || text_role[j] != Tail ||
pat[i] == text[j]))
? std::max(pre[j][0] + MatchScore(i, j, false),
pre[j][1] + MatchScore(i, j, true))
2018-05-11 21:23:53 +00:00
: kMinScore * 2;
}
}
// Enumerate the end position of the match in str. Each removed trailing
2018-03-16 07:19:49 +00:00
// character has a penulty.
int ret = kMinScore;
2018-03-18 19:17:40 +00:00
for (int j = pat.size(); j <= n; j++)
2018-04-02 03:55:10 +00:00
ret = std::max(ret, dp[pat.size() & 1][j][1] - 2 * (n - j));
2018-03-16 07:19:49 +00:00
return ret;
}
2018-03-18 19:17:40 +00:00
2018-07-09 00:13:41 +00:00
#if 0
2018-03-18 19:17:40 +00:00
TEST_SUITE("fuzzy_match") {
bool Ranks(std::string_view pat, std::vector<const char*> texts) {
FuzzyMatcher fuzzy(pat, 0);
2018-03-18 19:17:40 +00:00
std::vector<int> scores;
for (auto text : texts)
scores.push_back(fuzzy.Match(text));
bool ret = true;
for (size_t i = 0; i < texts.size() - 1; i++)
if (scores[i] < scores[i + 1]) {
ret = false;
break;
}
2018-03-23 21:57:52 +00:00
if (!ret) {
2018-03-18 19:17:40 +00:00
for (size_t i = 0; i < texts.size(); i++)
printf("%s %d ", texts[i], scores[i]);
puts("");
}
return ret;
}
TEST_CASE("test") {
FuzzyMatcher fuzzy("", 0);
CHECK(fuzzy.Match("") == 0);
CHECK(fuzzy.Match("aaa") < 0);
2018-03-18 19:17:40 +00:00
// case
2018-03-23 21:57:52 +00:00
CHECK(Ranks("monad", {"monad", "Monad", "mONAD"}));
2018-03-18 19:17:40 +00:00
// initials
2018-03-23 21:57:52 +00:00
CHECK(Ranks("ab", {"ab", "aoo_boo", "acb"}));
CHECK(Ranks("CC", {"CamelCase", "camelCase", "camelcase"}));
CHECK(Ranks("cC", {"camelCase", "CamelCase", "camelcase"}));
2018-04-04 06:05:41 +00:00
CHECK(Ranks("c c", {"camelCase", "camel case", "CamelCase", "camelcase",
2018-03-23 21:57:52 +00:00
"camel ace"}));
CHECK(Ranks("Da.Te",
{"Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"}));
CHECK(Ranks("foo bar.h", {"foo/bar.h", "foobar.h"}));
2018-03-18 19:17:40 +00:00
// prefix
2018-03-23 21:57:52 +00:00
CHECK(Ranks("is", {"isIEEE", "inSuf"}));
2018-03-18 19:17:40 +00:00
// shorter
2018-03-23 21:57:52 +00:00
CHECK(Ranks("ma", {"map", "many", "maximum"}));
CHECK(Ranks("print", {"printf", "sprintf"}));
2018-03-18 19:17:40 +00:00
// score(PRINT) = kMinScore
2018-03-23 21:57:52 +00:00
CHECK(Ranks("ast", {"ast", "AST", "INT_FAST16_MAX"}));
2018-03-18 19:17:40 +00:00
// score(PRINT) > kMinScore
2018-03-23 21:57:52 +00:00
CHECK(Ranks("Int", {"int", "INT", "PRINT"}));
2018-03-18 19:17:40 +00:00
}
}
2018-07-09 00:13:41 +00:00
#endif