mirror of
				https://github.com/MaskRay/ccls.git
				synced 2025-11-04 06:15:20 +00:00 
			
		
		
		
	Improve workspace/symbol sorting heuristic
This commit is contained in:
		
							parent
							
								
									d821ac34d8
								
							
						
					
					
						commit
						01f1064576
					
				@ -354,6 +354,8 @@ struct IndexParam {
 | 
				
			|||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
 | 
					IndexFile* ConsumeFile(IndexParam* param, CXFile file) {
 | 
				
			||||||
 | 
					  if (!file)
 | 
				
			||||||
 | 
					    return nullptr;
 | 
				
			||||||
  bool is_first_ownership = false;
 | 
					  bool is_first_ownership = false;
 | 
				
			||||||
  IndexFile* db = param->file_consumer->TryConsumeFile(
 | 
					  IndexFile* db = param->file_consumer->TryConsumeFile(
 | 
				
			||||||
      file, &is_first_ownership, ¶m->file_contents);
 | 
					      file, &is_first_ownership, ¶m->file_contents);
 | 
				
			||||||
 | 
				
			|||||||
@ -49,21 +49,20 @@ std::string_view LexIdentifierAroundPos(lsPosition position,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Find discontinous |search| in |content|.
 | 
					// Find discontinous |search| in |content|.
 | 
				
			||||||
// Return |found| and the count of skipped chars before found.
 | 
					// Return |found| and the count of skipped chars before found.
 | 
				
			||||||
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
 | 
					int ReverseSubseqMatch(std::string_view pat,
 | 
				
			||||||
                                                 std::string_view content) {
 | 
					                       std::string_view text,
 | 
				
			||||||
  bool hasUppercaseLetter = std::any_of(search.begin(), search.end(), isupper);
 | 
					                       int case_sensitivity) {
 | 
				
			||||||
  int skip = 0;
 | 
					  if (case_sensitivity == 1)
 | 
				
			||||||
  size_t j = 0;
 | 
					    case_sensitivity = std::any_of(pat.begin(), pat.end(), isupper) ? 2 : 0;
 | 
				
			||||||
  for (char c : search) {
 | 
					  int j = pat.size();
 | 
				
			||||||
    while (j < content.size() &&
 | 
					  if (!j)
 | 
				
			||||||
           (hasUppercaseLetter ? content[j] != c
 | 
					    return text.size();
 | 
				
			||||||
                               : tolower(content[j]) != tolower(c)))
 | 
					  for (int i = text.size(); i--;)
 | 
				
			||||||
      ++j, ++skip;
 | 
					    if ((case_sensitivity ? text[i] == pat[j - 1]
 | 
				
			||||||
    if (j == content.size())
 | 
					                          : tolower(text[i]) == tolower(pat[j - 1])) &&
 | 
				
			||||||
      return {false, skip};
 | 
					        !--j)
 | 
				
			||||||
    ++j;
 | 
					      return i;
 | 
				
			||||||
  }
 | 
					  return -1;
 | 
				
			||||||
  return {true, skip};
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TEST_SUITE("Offset") {
 | 
					TEST_SUITE("Offset") {
 | 
				
			||||||
@ -86,21 +85,3 @@ TEST_SUITE("Offset") {
 | 
				
			|||||||
    REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
 | 
					    REQUIRE(GetOffsetForPosition(lsPosition{0, 1}, "a") == 1);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
TEST_SUITE("Substring") {
 | 
					 | 
				
			||||||
  TEST_CASE("skip") {
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("a", "a") == std::make_pair(true, 0));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("b", "a") == std::make_pair(false, 1));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("", "") == std::make_pair(true, 0));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("a", "ba") == std::make_pair(true, 1));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("aa", "aba") ==
 | 
					 | 
				
			||||||
            std::make_pair(true, 1));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("aa", "baa") ==
 | 
					 | 
				
			||||||
            std::make_pair(true, 1));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("aA", "aA") == std::make_pair(true, 0));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("aA", "aa") ==
 | 
					 | 
				
			||||||
            std::make_pair(false, 1));
 | 
					 | 
				
			||||||
    REQUIRE(CaseFoldingSubsequenceMatch("incstdioh", "include <stdio.h>") ==
 | 
					 | 
				
			||||||
            std::make_pair(true, 7));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -11,5 +11,6 @@ int GetOffsetForPosition(lsPosition position, std::string_view content);
 | 
				
			|||||||
std::string_view LexIdentifierAroundPos(lsPosition position,
 | 
					std::string_view LexIdentifierAroundPos(lsPosition position,
 | 
				
			||||||
                                        std::string_view content);
 | 
					                                        std::string_view content);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
std::pair<bool, int> CaseFoldingSubsequenceMatch(std::string_view search,
 | 
					int ReverseSubseqMatch(std::string_view pat,
 | 
				
			||||||
                                                 std::string_view content);
 | 
					                       std::string_view text,
 | 
				
			||||||
 | 
					                       int case_sensitivity);
 | 
				
			||||||
 | 
				
			|||||||
@ -214,10 +214,11 @@ void FilterAndSortCompletionResponse(
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Fuzzy match and remove awful candidates.
 | 
					  // Fuzzy match and remove awful candidates.
 | 
				
			||||||
  FuzzyMatcher fuzzy(complete_text, g_config->completion.caseSensitivity);
 | 
					  bool sensitive = g_config->completion.caseSensitivity;
 | 
				
			||||||
 | 
					  FuzzyMatcher fuzzy(complete_text, sensitive);
 | 
				
			||||||
  for (auto& item : items) {
 | 
					  for (auto& item : items) {
 | 
				
			||||||
    item.score_ =
 | 
					    item.score_ =
 | 
				
			||||||
        CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first
 | 
					        ReverseSubseqMatch(complete_text, *item.filterText, sensitive) >= 0
 | 
				
			||||||
            ? fuzzy.Match(*item.filterText)
 | 
					            ? fuzzy.Match(*item.filterText)
 | 
				
			||||||
            : FuzzyMatcher::kMinScore;
 | 
					            : FuzzyMatcher::kMinScore;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
				
			|||||||
@ -15,10 +15,13 @@ namespace {
 | 
				
			|||||||
MethodType kMethodType = "workspace/symbol";
 | 
					MethodType kMethodType = "workspace/symbol";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Lookup |symbol| in |db| and insert the value into |result|.
 | 
					// Lookup |symbol| in |db| and insert the value into |result|.
 | 
				
			||||||
bool InsertSymbolIntoResult(QueryDatabase* db,
 | 
					bool AddSymbol(
 | 
				
			||||||
 | 
					    QueryDatabase* db,
 | 
				
			||||||
    WorkingFiles* working_files,
 | 
					    WorkingFiles* working_files,
 | 
				
			||||||
                            SymbolIdx symbol,
 | 
					    int i,
 | 
				
			||||||
                            std::vector<lsSymbolInformation>* result) {
 | 
					    bool use_detailed,
 | 
				
			||||||
 | 
					    std::vector<std::tuple<lsSymbolInformation, bool, int>>* result) {
 | 
				
			||||||
 | 
					  SymbolIdx symbol = db->symbols[i];
 | 
				
			||||||
  std::optional<lsSymbolInformation> info =
 | 
					  std::optional<lsSymbolInformation> info =
 | 
				
			||||||
      GetSymbolInfo(db, working_files, symbol, true);
 | 
					      GetSymbolInfo(db, working_files, symbol, true);
 | 
				
			||||||
  if (!info)
 | 
					  if (!info)
 | 
				
			||||||
@ -38,7 +41,7 @@ bool InsertSymbolIntoResult(QueryDatabase* db,
 | 
				
			|||||||
  if (!ls_location)
 | 
					  if (!ls_location)
 | 
				
			||||||
    return false;
 | 
					    return false;
 | 
				
			||||||
  info->location = *ls_location;
 | 
					  info->location = *ls_location;
 | 
				
			||||||
  result->push_back(*info);
 | 
					  result->emplace_back(*info, use_detailed, i);
 | 
				
			||||||
  return true;
 | 
					  return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -72,34 +75,11 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    std::string query = request->params.query;
 | 
					    std::string query = request->params.query;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::unordered_set<std::string> inserted_results;
 | 
					    // {symbol info, matching detailed_name or short_name, index}
 | 
				
			||||||
    // db->detailed_names indices of each lsSymbolInformation in out.result
 | 
					    std::vector<std::tuple<lsSymbolInformation, bool, int>> unsorted;
 | 
				
			||||||
    std::vector<int> result_indices;
 | 
					    bool sensitive = g_config->workspaceSymbol.caseSensitivity;
 | 
				
			||||||
    std::vector<lsSymbolInformation> unsorted_results;
 | 
					 | 
				
			||||||
    inserted_results.reserve(g_config->workspaceSymbol.maxNum);
 | 
					 | 
				
			||||||
    result_indices.reserve(g_config->workspaceSymbol.maxNum);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // We use detailed_names without parameters for matching.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Find exact substring matches.
 | 
					 | 
				
			||||||
    for (int i = 0; i < db->symbols.size(); ++i) {
 | 
					 | 
				
			||||||
      std::string_view detailed_name = db->GetSymbolName(i, true);
 | 
					 | 
				
			||||||
      if (detailed_name.find(query) != std::string::npos) {
 | 
					 | 
				
			||||||
        // Do not show the same entry twice.
 | 
					 | 
				
			||||||
        if (!inserted_results.insert(std::string(detailed_name)).second)
 | 
					 | 
				
			||||||
          continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
 | 
					 | 
				
			||||||
                                   &unsorted_results)) {
 | 
					 | 
				
			||||||
          result_indices.push_back(i);
 | 
					 | 
				
			||||||
          if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
 | 
					 | 
				
			||||||
            break;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Find subsequence matches.
 | 
					    // Find subsequence matches.
 | 
				
			||||||
    if (unsorted_results.size() < g_config->workspaceSymbol.maxNum) {
 | 
					 | 
				
			||||||
    std::string query_without_space;
 | 
					    std::string query_without_space;
 | 
				
			||||||
    query_without_space.reserve(query.size());
 | 
					    query_without_space.reserve(query.size());
 | 
				
			||||||
    for (char c : query)
 | 
					    for (char c : query)
 | 
				
			||||||
@ -108,46 +88,45 @@ struct Handler_WorkspaceSymbol : BaseMessageHandler<In_WorkspaceSymbol> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for (int i = 0; i < (int)db->symbols.size(); ++i) {
 | 
					    for (int i = 0; i < (int)db->symbols.size(); ++i) {
 | 
				
			||||||
      std::string_view detailed_name = db->GetSymbolName(i, true);
 | 
					      std::string_view detailed_name = db->GetSymbolName(i, true);
 | 
				
			||||||
        if (CaseFoldingSubsequenceMatch(query_without_space, detailed_name)
 | 
					      int pos =
 | 
				
			||||||
                .first) {
 | 
					        ReverseSubseqMatch(query_without_space, detailed_name, sensitive);
 | 
				
			||||||
          // Do not show the same entry twice.
 | 
					      if (pos >= 0 &&
 | 
				
			||||||
          if (!inserted_results.insert(std::string(detailed_name)).second)
 | 
					        AddSymbol(db, working_files, i,
 | 
				
			||||||
            continue;
 | 
					          detailed_name.find(':', pos) != std::string::npos,
 | 
				
			||||||
 | 
					          &unsorted) &&
 | 
				
			||||||
          if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
 | 
					        unsorted.size() >= g_config->workspaceSymbol.maxNum)
 | 
				
			||||||
                                     &unsorted_results)) {
 | 
					 | 
				
			||||||
            result_indices.push_back(i);
 | 
					 | 
				
			||||||
            if (unsorted_results.size() >= g_config->workspaceSymbol.maxNum)
 | 
					 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
 | 
					    if (g_config->workspaceSymbol.sort && query.size() <= FuzzyMatcher::kMaxPat) {
 | 
				
			||||||
      // Sort results with a fuzzy matching algorithm.
 | 
					      // Sort results with a fuzzy matching algorithm.
 | 
				
			||||||
      int longest = 0;
 | 
					      int longest = 0;
 | 
				
			||||||
      for (int i : result_indices)
 | 
					      for (int i = 0; i < int(unsorted.size()); i++) {
 | 
				
			||||||
        longest = std::max(longest, int(db->GetSymbolName(i, true).size()));
 | 
					        longest = std::max(
 | 
				
			||||||
 | 
					            longest,
 | 
				
			||||||
 | 
					            int(db->GetSymbolName(std::get<2>(unsorted[i]), true).size()));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
 | 
					      FuzzyMatcher fuzzy(query, g_config->workspaceSymbol.caseSensitivity);
 | 
				
			||||||
      std::vector<std::pair<int, int>> permutation(result_indices.size());
 | 
					      std::vector<std::pair<int, int>> permutation(unsorted.size());
 | 
				
			||||||
      for (int i = 0; i < int(result_indices.size()); i++) {
 | 
					      for (int i = 0; i < int(unsorted.size()); i++) {
 | 
				
			||||||
        permutation[i] = {
 | 
					        permutation[i] = {
 | 
				
			||||||
            fuzzy.Match(db->GetSymbolName(result_indices[i], true)), i};
 | 
					            fuzzy.Match(db->GetSymbolName(std::get<2>(unsorted[i]),
 | 
				
			||||||
 | 
					                                          std::get<1>(unsorted[i]))),
 | 
				
			||||||
 | 
					            i};
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      std::sort(permutation.begin(), permutation.end(),
 | 
					      std::sort(permutation.begin(), permutation.end(),
 | 
				
			||||||
                std::greater<std::pair<int, int>>());
 | 
					                std::greater<std::pair<int, int>>());
 | 
				
			||||||
      out.result.reserve(result_indices.size());
 | 
					      out.result.reserve(unsorted.size());
 | 
				
			||||||
      // Discard awful candidates.
 | 
					      // Discard awful candidates.
 | 
				
			||||||
      for (int i = 0; i < int(result_indices.size()) &&
 | 
					      for (int i = 0; i < int(unsorted.size()) &&
 | 
				
			||||||
                      permutation[i].first > FuzzyMatcher::kMinScore;
 | 
					                      permutation[i].first > FuzzyMatcher::kMinScore;
 | 
				
			||||||
           i++)
 | 
					           i++)
 | 
				
			||||||
        out.result.push_back(
 | 
					        out.result.push_back(
 | 
				
			||||||
            std::move(unsorted_results[permutation[i].second]));
 | 
					            std::move(std::get<0>(unsorted[permutation[i].second])));
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
      out.result.reserve(unsorted_results.size());
 | 
					      out.result.reserve(unsorted.size());
 | 
				
			||||||
      for (const auto& entry : unsorted_results)
 | 
					      for (auto& entry : unsorted)
 | 
				
			||||||
        out.result.push_back(std::move(entry));
 | 
					        out.result.push_back(std::get<0>(entry));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    LOG_S(INFO) << "[querydb] Found " << out.result.size()
 | 
					    LOG_S(INFO) << "[querydb] Found " << out.result.size()
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user