diff --git a/src/messages/workspace_symbol.cc b/src/messages/workspace_symbol.cc index c3197e11..1122e1a8 100644 --- a/src/messages/workspace_symbol.cc +++ b/src/messages/workspace_symbol.cc @@ -71,8 +71,9 @@ constexpr int kGapScore = -5; // greater than 1 constexpr int kPatternStartMultiplier = 2; -constexpr int kWordStartScore = 100; -constexpr int kNonWordScore = 90; +constexpr int kWordStartScore = 50; +constexpr int kNonWordScore = 40; +constexpr int kCaseMatchScore = 2; // Less than kWordStartScore constexpr int kConsecutiveScore = kWordStartScore + kGapScore; @@ -81,14 +82,14 @@ constexpr int kCamelScore = kWordStartScore + kGapScore - 1; enum class CharClass { Lower, Upper, Digit, NonWord }; -static enum CharClass getCharClass(int c) { +static enum CharClass GetCharClass(int c) { if (islower(c)) return CharClass::Lower; if (isupper(c)) return CharClass::Upper; if (isdigit(c)) return CharClass::Digit; return CharClass::NonWord; } -static int getScoreFor(CharClass prev, CharClass curr) { +static int GetScoreFor(CharClass prev, CharClass curr) { if (prev == CharClass::NonWord && curr != CharClass::NonWord) return kWordStartScore; if ((prev == CharClass::Lower && curr == CharClass::Upper) || @@ -115,7 +116,7 @@ dp[0][j] = leading_gap_penalty(0, j) + score[j] dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] + gap_penalty(k+1, j) + score[j] : k < j)) The first dimension can be suppressed since we do not need a matching scheme, which reduces the space complexity from O(N*M) to O(M) */ -int fuzzyEvaluate(const std::string& pattern, +int FuzzyEvaluate(const std::string& pattern, const std::string& str, std::vector& score, std::vector& dp) { @@ -128,8 +129,8 @@ int fuzzyEvaluate(const std::string& pattern, // Calculate position score for each character in str. CharClass prev = CharClass::NonWord; for (int i = 0; i < int(str.size()); i++) { - CharClass cur = getCharClass(str[i]); - score[i] = getScoreFor(prev, cur); + CharClass cur = GetCharClass(str[i]); + score[i] = GetScoreFor(prev, cur); prev = cur; } std::fill_n(dp.begin(), str.size(), kMinScore); @@ -145,10 +146,12 @@ int fuzzyEvaluate(const std::string& pattern, for (int i = 0; i < int(str.size()); i++) { left = dp[i]; lefts = std::max(lefts + kGapScore, left); + // Use lower() if case-insensitive if (tolower(pc) == tolower(str[i])) { int t = score[i] * (pstart ? kPatternStartMultiplier : 1); - dp[i] = pfirst ? kLeadingGapScore * i + t - : std::max(uleft + kConsecutiveScore, ulefts + t); + dp[i] = (pfirst ? kLeadingGapScore * i + t + : std::max(uleft + kConsecutiveScore, ulefts + t)) + + (pc == str[i] ? kCaseMatchScore : 0); } else dp[i] = kMinScore; uleft = left; @@ -164,7 +167,7 @@ int fuzzyEvaluate(const std::string& pattern, // parameters. We do not want to penalize them. // If we use `short_name` instead of `detailed_name` for fuzzy matching, the // penulty kGapScore can be used. - lefts = std::max(lefts /*+ kGapScore */, dp[i]); + lefts = std::max(lefts + kGapScore, dp[i]); return lefts; } @@ -181,65 +184,70 @@ struct WorkspaceSymbolHandler : BaseMessageHandler { std::unordered_set inserted_results; // db->detailed_names indices of each lsSymbolInformation in out.result std::vector result_indices; + std::vector unsorted_results; inserted_results.reserve(config->maxWorkspaceSearchResults); result_indices.reserve(config->maxWorkspaceSearchResults); - for (int i = 0; i < db->detailed_names.size(); ++i) { - if (db->detailed_names[i].find(query) != std::string::npos) { + // Find exact substring matches. + for (int i = 0; i < db->short_names.size(); ++i) { + if (db->short_names[i].find(query) != std::string::npos) { // Do not show the same entry twice. if (!inserted_results.insert(db->detailed_names[i]).second) continue; - if (InsertSymbolIntoResult(db, working_files, db->symbols[i], &out.result)) { + if (InsertSymbolIntoResult(db, working_files, db->symbols[i], &unsorted_results)) { result_indices.push_back(i); - if (out.result.size() >= config->maxWorkspaceSearchResults) + if (unsorted_results.size() >= config->maxWorkspaceSearchResults) break; } } } - if (out.result.size() < config->maxWorkspaceSearchResults) { + // Find subsequence matches. + if (unsorted_results.size() < config->maxWorkspaceSearchResults) { std::string query_without_space; query_without_space.reserve(query.size()); for (char c: query) if (!isspace(c)) query_without_space += c; - for (int i = 0; i < db->detailed_names.size(); ++i) { - if (SubstringMatch(query_without_space, db->detailed_names[i])) { + for (int i = 0; i < db->short_names.size(); ++i) { + if (SubstringMatch(query_without_space, db->short_names[i])) { // Do not show the same entry twice. if (!inserted_results.insert(db->detailed_names[i]).second) continue; - if (InsertSymbolIntoResult(db, working_files, db->symbols[i], &out.result)) { + if (InsertSymbolIntoResult(db, working_files, db->symbols[i], &unsorted_results)) { result_indices.push_back(i); - if (out.result.size() >= config->maxWorkspaceSearchResults) + if (unsorted_results.size() >= config->maxWorkspaceSearchResults) break; } } } } - if (out.result.size() < config->maxWorkspaceSearchResults) { + // Sort results with a fuzzy matching algorithm. + if (unsorted_results.size() < config->maxWorkspaceSearchResults) { int longest = 0; for (int i: result_indices) - longest = std::max(longest, int(db->detailed_names[i].size())); + longest = std::max(longest, int(db->short_names[i].size())); std::vector score(longest), // score for each position - dp(longest); // dp[i]: maximum value by aligning pattern[0..pi] to str[0..si] + dp(longest); // dp[i]: maximum value by aligning pattern to str[0..i] std::vector> permutation(result_indices.size()); for (int i = 0; i < int(result_indices.size()); i++) { permutation[i] = { - fuzzyEvaluate(query, db->detailed_names[result_indices[i]], score, + FuzzyEvaluate(query, db->short_names[result_indices[i]], score, dp), i}; } std::sort(permutation.begin(), permutation.end(), std::greater>()); + out.result.reserve(result_indices.size()); for (int i = 0; i < int(result_indices.size()); i++) - if (i != permutation[i].second) - std::swap(out.result[i], out.result[permutation[i].second]); - } + out.result.push_back(std::move(unsorted_results[permutation[i].second])); + } else + out.result = std::move(unsorted_results); LOG_S(INFO) << "[querydb] Found " << out.result.size() << " results for query " << query; diff --git a/src/query.cc b/src/query.cc index 12dd723b..f842df14 100644 --- a/src/query.cc +++ b/src/query.cc @@ -754,7 +754,7 @@ void QueryDatabase::ImportOrUpdate( existing.def = def; UpdateDetailedNames(&existing.detailed_name_idx, SymbolKind::File, - it->second.id, def.path); + it->second.id, def.path, def.path); } } @@ -778,7 +778,7 @@ void QueryDatabase::ImportOrUpdate( existing.def = def.value; UpdateDetailedNames(&existing.detailed_name_idx, SymbolKind::Type, - it->second.id, def.value.detailed_name); + it->second.id, def.value.short_name, def.value.detailed_name); } } @@ -802,7 +802,7 @@ void QueryDatabase::ImportOrUpdate( existing.def = def.value; UpdateDetailedNames(&existing.detailed_name_idx, SymbolKind::Func, - it->second.id, def.value.detailed_name); + it->second.id, def.value.short_name, def.value.detailed_name); } } @@ -827,20 +827,23 @@ void QueryDatabase::ImportOrUpdate( existing.def = def.value; if (!def.value.is_local()) UpdateDetailedNames(&existing.detailed_name_idx, SymbolKind::Var, - it->second.id, def.value.detailed_name); + it->second.id, def.value.short_name, def.value.detailed_name); } } void QueryDatabase::UpdateDetailedNames(size_t* qualified_name_index, SymbolKind kind, size_t symbol_index, - const std::string& name) { + const std::string& short_name, + const std::string& detailed_name) { if (*qualified_name_index == -1) { - detailed_names.push_back(name); + short_names.push_back(short_name); + detailed_names.push_back(detailed_name); symbols.push_back(SymbolIdx(kind, symbol_index)); *qualified_name_index = detailed_names.size() - 1; } else { - detailed_names[*qualified_name_index] = name; + short_names[*qualified_name_index] = short_name; + detailed_names[*qualified_name_index] = detailed_name; } } diff --git a/src/query.h b/src/query.h index cd4a9914..046c5ac4 100644 --- a/src/query.h +++ b/src/query.h @@ -340,6 +340,7 @@ struct QueryDatabase { // Indicies between lookup vectors are related to symbols, ie, index 5 in // |detailed_names| matches index 5 in |symbols|. std::vector detailed_names; + std::vector short_names; std::vector symbols; // Raw data storage. Accessible via SymbolIdx instances. @@ -367,7 +368,8 @@ struct QueryDatabase { void UpdateDetailedNames(size_t* qualified_name_index, SymbolKind kind, size_t symbol_index, - const std::string& name); + const std::string& short_name, + const std::string& detailed_name); }; struct IdMap {