/* Copyright 2017-2018 ccls Authors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "fuzzy_match.h" #include #include #include #include enum CharClass { Other, Lower, Upper }; enum CharRole { None, Tail, Head }; namespace { CharClass GetCharClass(int c) { if (islower(c)) return Lower; if (isupper(c)) return Upper; return Other; } void CalculateRoles(std::string_view s, int roles[], int *class_set) { if (s.empty()) { *class_set = 0; return; } CharClass pre = Other, cur = GetCharClass(s[0]), suc; *class_set = 1 << cur; auto fn = [&]() { if (cur == Other) return None; // U(U)L is Head while U(U)U is Tail return pre == Other || (cur == Upper && (pre == Lower || suc == Lower)) ? Head : Tail; }; for (size_t i = 0; i < s.size() - 1; i++) { suc = GetCharClass(s[i + 1]); *class_set |= 1 << suc; roles[i] = fn(); pre = cur; cur = suc; } roles[s.size() - 1] = fn(); } } // namespace int FuzzyMatcher::MissScore(int j, bool last) { int s = -3; if (last) s -= 10; if (text_role[j] == Head) s -= 10; return s; } int FuzzyMatcher::MatchScore(int i, int j, bool last) { int s = 0; // Case matching. if (pat[i] == text[j]) { s++; // pat contains uppercase letters or prefix matching. if ((pat_set & 1 << Upper) || i == j) s++; } if (pat_role[i] == Head) { if (text_role[j] == Head) s += 30; else if (text_role[j] == Tail) s -= 10; } // Matching a tail while previous char wasn't matched. if (text_role[j] == Tail && i && !last) s -= 30; // First char of pat matches a tail. if (i == 0 && text_role[j] == Tail) s -= 40; return s; } FuzzyMatcher::FuzzyMatcher(std::string_view pattern, int sensitivity) { CalculateRoles(pattern, pat_role, &pat_set); if (sensitivity == 1) sensitivity = pat_set & 1 << Upper ? 2 : 0; case_sensitivity = sensitivity; size_t n = 0; for (size_t i = 0; i < pattern.size(); i++) if (pattern[i] != ' ') { pat += pattern[i]; low_pat[n] = ::tolower(pattern[i]); pat_role[n] = pat_role[i]; n++; } } int FuzzyMatcher::Match(std::string_view text) { int n = int(text.size()); if (n > kMaxText) return kMinScore + 1; this->text = text; for (int i = 0; i < n; i++) low_text[i] = ::tolower(text[i]); CalculateRoles(text, text_role, &text_set); dp[0][0][0] = dp[0][0][1] = 0; for (int j = 0; j < n; j++) { dp[0][j + 1][0] = dp[0][j][0] + MissScore(j, false); dp[0][j + 1][1] = kMinScore * 2; } for (int i = 0; i < int(pat.size()); i++) { int(*pre)[2] = dp[i & 1]; int(*cur)[2] = dp[(i + 1) & 1]; cur[i][0] = cur[i][1] = kMinScore; for (int j = i; j < n; j++) { cur[j + 1][0] = std::max(cur[j][0] + MissScore(j, false), cur[j][1] + MissScore(j, true)); // For the first char of pattern, apply extra restriction to filter bad // candidates (e.g. |int| in |PRINT|) cur[j + 1][1] = (case_sensitivity ? pat[i] == text[j] : low_pat[i] == low_text[j] && (i || text_role[j] != Tail || pat[i] == text[j])) ? std::max(pre[j][0] + MatchScore(i, j, false), pre[j][1] + MatchScore(i, j, true)) : kMinScore * 2; } } // Enumerate the end position of the match in str. Each removed trailing // character has a penulty. int ret = kMinScore; for (int j = pat.size(); j <= n; j++) ret = std::max(ret, dp[pat.size() & 1][j][1] - 2 * (n - j)); return ret; } #if 0 TEST_SUITE("fuzzy_match") { bool Ranks(std::string_view pat, std::vector texts) { FuzzyMatcher fuzzy(pat, 0); std::vector scores; for (auto text : texts) scores.push_back(fuzzy.Match(text)); bool ret = true; for (size_t i = 0; i < texts.size() - 1; i++) if (scores[i] < scores[i + 1]) { ret = false; break; } if (!ret) { for (size_t i = 0; i < texts.size(); i++) printf("%s %d ", texts[i], scores[i]); puts(""); } return ret; } TEST_CASE("test") { FuzzyMatcher fuzzy("", 0); CHECK(fuzzy.Match("") == 0); CHECK(fuzzy.Match("aaa") < 0); // case CHECK(Ranks("monad", {"monad", "Monad", "mONAD"})); // initials CHECK(Ranks("ab", {"ab", "aoo_boo", "acb"})); CHECK(Ranks("CC", {"CamelCase", "camelCase", "camelcase"})); CHECK(Ranks("cC", {"camelCase", "CamelCase", "camelcase"})); CHECK(Ranks("c c", {"camelCase", "camel case", "CamelCase", "camelcase", "camel ace"})); CHECK(Ranks("Da.Te", {"Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"})); CHECK(Ranks("foo bar.h", {"foo/bar.h", "foobar.h"})); // prefix CHECK(Ranks("is", {"isIEEE", "inSuf"})); // shorter CHECK(Ranks("ma", {"map", "many", "maximum"})); CHECK(Ranks("print", {"printf", "sprintf"})); // score(PRINT) = kMinScore CHECK(Ranks("ast", {"ast", "AST", "INT_FAST16_MAX"})); // score(PRINT) > kMinScore CHECK(Ranks("Int", {"int", "INT", "PRINT"})); } } #endif