2017-12-06 03:32:33 +00:00
|
|
|
#include "lex_utils.h"
|
|
|
|
#include "message_handler.h"
|
|
|
|
#include "query_utils.h"
|
2017-12-29 16:29:47 +00:00
|
|
|
#include "queue_manager.h"
|
2017-12-06 03:32:33 +00:00
|
|
|
|
|
|
|
#include <loguru.hpp>
|
|
|
|
|
2017-12-29 16:29:47 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <functional>
|
|
|
|
|
2017-12-06 05:03:38 +00:00
|
|
|
namespace {
|
2017-12-19 05:31:19 +00:00
|
|
|
|
|
|
|
// Lookup |symbol| in |db| and insert the value into |result|.
|
2017-12-24 03:23:29 +00:00
|
|
|
bool InsertSymbolIntoResult(QueryDatabase* db,
|
2017-12-19 05:31:19 +00:00
|
|
|
WorkingFiles* working_files,
|
|
|
|
SymbolIdx symbol,
|
|
|
|
std::vector<lsSymbolInformation>* result) {
|
2017-12-23 16:01:43 +00:00
|
|
|
optional<lsSymbolInformation> info =
|
|
|
|
GetSymbolInfo(db, working_files, symbol, false /*use_short_name*/);
|
2017-12-19 05:31:19 +00:00
|
|
|
if (!info)
|
2017-12-24 03:23:29 +00:00
|
|
|
return false;
|
2017-12-19 05:31:19 +00:00
|
|
|
|
|
|
|
optional<QueryLocation> location = GetDefinitionExtentOfSymbol(db, symbol);
|
|
|
|
if (!location) {
|
|
|
|
auto decls = GetDeclarationsOfSymbolForGotoDefinition(db, symbol);
|
|
|
|
if (decls.empty())
|
2017-12-24 03:23:29 +00:00
|
|
|
return false;
|
2017-12-19 05:31:19 +00:00
|
|
|
location = decls[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
optional<lsLocation> ls_location =
|
|
|
|
GetLsLocation(db, working_files, *location);
|
|
|
|
if (!ls_location)
|
2017-12-24 03:23:29 +00:00
|
|
|
return false;
|
2017-12-19 05:31:19 +00:00
|
|
|
info->location = *ls_location;
|
|
|
|
result->push_back(*info);
|
2017-12-24 03:23:29 +00:00
|
|
|
return true;
|
2017-12-19 05:31:19 +00:00
|
|
|
}
|
|
|
|
|
2017-12-06 04:39:44 +00:00
|
|
|
struct lsWorkspaceSymbolParams {
|
|
|
|
std::string query;
|
|
|
|
};
|
|
|
|
MAKE_REFLECT_STRUCT(lsWorkspaceSymbolParams, query);
|
|
|
|
|
|
|
|
struct Ipc_WorkspaceSymbol : public IpcMessage<Ipc_WorkspaceSymbol> {
|
|
|
|
const static IpcId kIpcId = IpcId::WorkspaceSymbol;
|
|
|
|
lsRequestId id;
|
|
|
|
lsWorkspaceSymbolParams params;
|
|
|
|
};
|
|
|
|
MAKE_REFLECT_STRUCT(Ipc_WorkspaceSymbol, id, params);
|
|
|
|
REGISTER_IPC_MESSAGE(Ipc_WorkspaceSymbol);
|
|
|
|
|
|
|
|
struct Out_WorkspaceSymbol : public lsOutMessage<Out_WorkspaceSymbol> {
|
|
|
|
lsRequestId id;
|
2017-12-12 05:20:29 +00:00
|
|
|
std::vector<lsSymbolInformation> result;
|
2017-12-06 04:39:44 +00:00
|
|
|
};
|
|
|
|
MAKE_REFLECT_STRUCT(Out_WorkspaceSymbol, jsonrpc, id, result);
|
|
|
|
|
2017-12-24 03:23:29 +00:00
|
|
|
///// Fuzzy matching
|
|
|
|
|
|
|
|
// Negative but far from INT_MIN so that intermediate results are hard to
|
|
|
|
// overflow
|
|
|
|
constexpr int kMinScore = INT_MIN / 2;
|
|
|
|
// Penalty of dropping a leading character in str
|
|
|
|
constexpr int kLeadingGapScore = -4;
|
|
|
|
// Penalty of dropping a non-leading character in str
|
|
|
|
constexpr int kGapScore = -5;
|
|
|
|
// Bonus of aligning with an initial character of a word in pattern. Must be
|
|
|
|
// greater than 1
|
|
|
|
constexpr int kPatternStartMultiplier = 2;
|
|
|
|
|
2017-12-24 05:23:01 +00:00
|
|
|
constexpr int kWordStartScore = 50;
|
|
|
|
constexpr int kNonWordScore = 40;
|
|
|
|
constexpr int kCaseMatchScore = 2;
|
2017-12-24 03:23:29 +00:00
|
|
|
|
|
|
|
// Less than kWordStartScore
|
|
|
|
constexpr int kConsecutiveScore = kWordStartScore + kGapScore;
|
|
|
|
// Slightly less than kConsecutiveScore
|
|
|
|
constexpr int kCamelScore = kWordStartScore + kGapScore - 1;
|
|
|
|
|
|
|
|
enum class CharClass { Lower, Upper, Digit, NonWord };
|
|
|
|
|
2017-12-24 18:27:17 +00:00
|
|
|
static CharClass GetCharClass(int c) {
|
2017-12-27 15:53:35 +00:00
|
|
|
if (islower(c))
|
|
|
|
return CharClass::Lower;
|
|
|
|
if (isupper(c))
|
|
|
|
return CharClass::Upper;
|
|
|
|
if (isdigit(c))
|
|
|
|
return CharClass::Digit;
|
2017-12-24 03:23:29 +00:00
|
|
|
return CharClass::NonWord;
|
|
|
|
}
|
|
|
|
|
2017-12-24 05:23:01 +00:00
|
|
|
static int GetScoreFor(CharClass prev, CharClass curr) {
|
2017-12-24 03:23:29 +00:00
|
|
|
if (prev == CharClass::NonWord && curr != CharClass::NonWord)
|
|
|
|
return kWordStartScore;
|
|
|
|
if ((prev == CharClass::Lower && curr == CharClass::Upper) ||
|
|
|
|
(prev != CharClass::Digit && curr == CharClass::Digit))
|
|
|
|
return kCamelScore;
|
|
|
|
if (curr == CharClass::NonWord)
|
|
|
|
return kNonWordScore;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-12-27 15:53:35 +00:00
|
|
|
fuzzyEvaluate implements a global sequence alignment algorithm to find the
|
|
|
|
maximum accumulated score by aligning `pattern` to `str`. It applies when
|
|
|
|
`pattern` is a subsequence of `str`.
|
2017-12-24 03:23:29 +00:00
|
|
|
|
|
|
|
Scoring criteria
|
2017-12-27 15:53:35 +00:00
|
|
|
- Prefer matches at the start of a word, or the start of subwords in
|
|
|
|
CamelCase/camelCase/camel123 words. See kWordStartScore/kCamelScore
|
2017-12-24 03:23:29 +00:00
|
|
|
- Non-word characters matter. See kNonWordScore
|
2017-12-27 15:53:35 +00:00
|
|
|
- The first characters of words of `pattern` receive bonus because they usually
|
|
|
|
have more significance than the rest. See kPatternStartMultiplier
|
|
|
|
- Superfluous characters in `str` will reduce the score (gap penalty). See
|
|
|
|
kGapScore
|
2017-12-24 03:23:29 +00:00
|
|
|
- Prefer early occurrence of the first character. See kLeadingGapScore/kGapScore
|
|
|
|
|
|
|
|
The recurrence of the dynamic programming:
|
|
|
|
dp[i][j]: maximum accumulated score by aligning pattern[0..i] to str[0..j]
|
|
|
|
dp[0][j] = leading_gap_penalty(0, j) + score[j]
|
2017-12-27 15:53:35 +00:00
|
|
|
dp[i][j] = max(dp[i-1][j-1] + CONSECUTIVE_SCORE, max(dp[i-1][k] +
|
|
|
|
gap_penalty(k+1, j) + score[j] : k < j))
|
|
|
|
The first dimension can be suppressed since we do not need a matching scheme,
|
|
|
|
which reduces the space complexity from O(N*M) to O(M)
|
2017-12-24 03:23:29 +00:00
|
|
|
*/
|
2017-12-24 05:23:01 +00:00
|
|
|
int FuzzyEvaluate(const std::string& pattern,
|
2017-12-24 03:23:29 +00:00
|
|
|
const std::string& str,
|
|
|
|
std::vector<int>& score,
|
|
|
|
std::vector<int>& dp) {
|
|
|
|
bool pfirst = true, // aligning the first character of pattern
|
|
|
|
pstart = true; // whether we are aligning the start of a word in pattern
|
|
|
|
int uleft = 0, // value of the upper left cell
|
|
|
|
ulefts = 0, // maximum value of uleft and cells on the left
|
|
|
|
left, lefts; // similar to uleft/ulefts, but for the next row
|
|
|
|
|
|
|
|
// Calculate position score for each character in str.
|
|
|
|
CharClass prev = CharClass::NonWord;
|
|
|
|
for (int i = 0; i < int(str.size()); i++) {
|
2017-12-24 05:23:01 +00:00
|
|
|
CharClass cur = GetCharClass(str[i]);
|
|
|
|
score[i] = GetScoreFor(prev, cur);
|
2017-12-24 03:23:29 +00:00
|
|
|
prev = cur;
|
|
|
|
}
|
|
|
|
std::fill_n(dp.begin(), str.size(), kMinScore);
|
|
|
|
|
|
|
|
// Align each character of pattern.
|
2017-12-27 15:53:35 +00:00
|
|
|
for (unsigned char pc : pattern) {
|
2017-12-24 03:23:29 +00:00
|
|
|
if (isspace(pc)) {
|
|
|
|
pstart = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
lefts = kMinScore;
|
|
|
|
// Enumerate the character in str to be aligned with pc.
|
|
|
|
for (int i = 0; i < int(str.size()); i++) {
|
|
|
|
left = dp[i];
|
|
|
|
lefts = std::max(lefts + kGapScore, left);
|
2017-12-24 05:23:01 +00:00
|
|
|
// Use lower() if case-insensitive
|
2017-12-24 03:23:29 +00:00
|
|
|
if (tolower(pc) == tolower(str[i])) {
|
|
|
|
int t = score[i] * (pstart ? kPatternStartMultiplier : 1);
|
2017-12-24 05:23:01 +00:00
|
|
|
dp[i] = (pfirst ? kLeadingGapScore * i + t
|
|
|
|
: std::max(uleft + kConsecutiveScore, ulefts + t)) +
|
|
|
|
(pc == str[i] ? kCaseMatchScore : 0);
|
2017-12-24 03:23:29 +00:00
|
|
|
} else
|
|
|
|
dp[i] = kMinScore;
|
|
|
|
uleft = left;
|
|
|
|
ulefts = lefts;
|
|
|
|
}
|
|
|
|
pfirst = pstart = false;
|
|
|
|
}
|
|
|
|
|
2017-12-24 06:49:45 +00:00
|
|
|
// Enumerate the end position of the match in str. Each removed trailing
|
|
|
|
// character has a penulty of kGapScore.
|
2017-12-24 03:23:29 +00:00
|
|
|
lefts = kMinScore;
|
|
|
|
for (int i = 0; i < int(str.size()); i++)
|
2017-12-24 05:23:01 +00:00
|
|
|
lefts = std::max(lefts + kGapScore, dp[i]);
|
2017-12-24 03:23:29 +00:00
|
|
|
return lefts;
|
|
|
|
}
|
|
|
|
|
2017-12-06 03:32:33 +00:00
|
|
|
struct WorkspaceSymbolHandler : BaseMessageHandler<Ipc_WorkspaceSymbol> {
|
|
|
|
void Run(Ipc_WorkspaceSymbol* request) override {
|
|
|
|
Out_WorkspaceSymbol out;
|
|
|
|
out.id = request->id;
|
|
|
|
|
|
|
|
LOG_S(INFO) << "[querydb] Considering " << db->detailed_names.size()
|
|
|
|
<< " candidates for query " << request->params.query;
|
|
|
|
|
|
|
|
std::string query = request->params.query;
|
|
|
|
|
|
|
|
std::unordered_set<std::string> inserted_results;
|
2017-12-24 03:23:29 +00:00
|
|
|
// db->detailed_names indices of each lsSymbolInformation in out.result
|
|
|
|
std::vector<int> result_indices;
|
2017-12-24 05:23:01 +00:00
|
|
|
std::vector<lsSymbolInformation> unsorted_results;
|
2017-12-06 03:32:33 +00:00
|
|
|
inserted_results.reserve(config->maxWorkspaceSearchResults);
|
2017-12-24 03:23:29 +00:00
|
|
|
result_indices.reserve(config->maxWorkspaceSearchResults);
|
2017-12-06 03:32:33 +00:00
|
|
|
|
2018-01-05 18:06:15 +00:00
|
|
|
// We use detailed_names for exact matches and short_names for fuzzy matches
|
|
|
|
// because otherwise the fuzzy match is likely to match on parameter names
|
|
|
|
// and the like.
|
|
|
|
// TODO: make detailed_names not include function parameter information (or
|
|
|
|
// introduce additional metadata) so that we can do fuzzy search with
|
|
|
|
// detailed_names.
|
|
|
|
|
2017-12-24 05:23:01 +00:00
|
|
|
// Find exact substring matches.
|
2018-01-05 18:06:15 +00:00
|
|
|
for (int i = 0; i < db->detailed_names.size(); ++i) {
|
|
|
|
if (db->detailed_names[i].find(query) != std::string::npos) {
|
2017-12-06 03:32:33 +00:00
|
|
|
// Do not show the same entry twice.
|
|
|
|
if (!inserted_results.insert(db->detailed_names[i]).second)
|
|
|
|
continue;
|
|
|
|
|
2017-12-27 15:53:35 +00:00
|
|
|
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
|
|
|
|
&unsorted_results)) {
|
2017-12-24 03:23:29 +00:00
|
|
|
result_indices.push_back(i);
|
2017-12-24 05:23:01 +00:00
|
|
|
if (unsorted_results.size() >= config->maxWorkspaceSearchResults)
|
2017-12-24 03:23:29 +00:00
|
|
|
break;
|
|
|
|
}
|
2017-12-06 03:32:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-12-24 05:23:01 +00:00
|
|
|
// Find subsequence matches.
|
|
|
|
if (unsorted_results.size() < config->maxWorkspaceSearchResults) {
|
2017-12-24 03:23:29 +00:00
|
|
|
std::string query_without_space;
|
|
|
|
query_without_space.reserve(query.size());
|
2017-12-27 15:53:35 +00:00
|
|
|
for (char c : query)
|
2017-12-24 03:23:29 +00:00
|
|
|
if (!isspace(c))
|
|
|
|
query_without_space += c;
|
|
|
|
|
2017-12-24 05:23:01 +00:00
|
|
|
for (int i = 0; i < db->short_names.size(); ++i) {
|
|
|
|
if (SubstringMatch(query_without_space, db->short_names[i])) {
|
2017-12-06 03:32:33 +00:00
|
|
|
// Do not show the same entry twice.
|
|
|
|
if (!inserted_results.insert(db->detailed_names[i]).second)
|
|
|
|
continue;
|
|
|
|
|
2017-12-27 15:53:35 +00:00
|
|
|
if (InsertSymbolIntoResult(db, working_files, db->symbols[i],
|
|
|
|
&unsorted_results)) {
|
2017-12-24 03:23:29 +00:00
|
|
|
result_indices.push_back(i);
|
2017-12-24 05:23:01 +00:00
|
|
|
if (unsorted_results.size() >= config->maxWorkspaceSearchResults)
|
2017-12-24 03:23:29 +00:00
|
|
|
break;
|
|
|
|
}
|
2017-12-06 03:32:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-07 21:08:18 +00:00
|
|
|
if (config->sortWorkspaceSearchResults) {
|
|
|
|
// Sort results with a fuzzy matching algorithm.
|
|
|
|
int longest = 0;
|
|
|
|
for (int i : result_indices)
|
|
|
|
longest = std::max(longest, int(db->short_names[i].size()));
|
|
|
|
|
|
|
|
std::vector<int> score(longest); // score for each position
|
|
|
|
std::vector<int> dp(longest); // dp[i]: maximum value by aligning pattern to str[0..i]
|
|
|
|
std::vector<std::pair<int, int>> permutation(result_indices.size());
|
|
|
|
for (int i = 0; i < int(result_indices.size()); i++) {
|
|
|
|
permutation[i] = {
|
|
|
|
FuzzyEvaluate(query, db->short_names[result_indices[i]], score, dp),
|
|
|
|
i};
|
|
|
|
}
|
|
|
|
std::sort(permutation.begin(), permutation.end(),
|
|
|
|
std::greater<std::pair<int, int>>());
|
|
|
|
out.result.reserve(result_indices.size());
|
|
|
|
for (int i = 0; i < int(result_indices.size()); i++)
|
|
|
|
out.result.push_back(std::move(unsorted_results[permutation[i].second]));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
out.result.reserve(unsorted_results.size());
|
|
|
|
for (const auto& entry : unsorted_results)
|
|
|
|
out.result.push_back(std::move(entry));
|
2017-12-24 06:49:45 +00:00
|
|
|
}
|
2018-01-07 21:08:18 +00:00
|
|
|
|
2017-12-24 03:23:29 +00:00
|
|
|
|
2017-12-06 03:32:33 +00:00
|
|
|
LOG_S(INFO) << "[querydb] Found " << out.result.size()
|
|
|
|
<< " results for query " << query;
|
2017-12-24 00:25:18 +00:00
|
|
|
QueueManager::WriteStdout(IpcId::WorkspaceSymbol, out);
|
2017-12-06 03:32:33 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
REGISTER_MESSAGE_HANDLER(WorkspaceSymbolHandler);
|
2017-12-24 03:23:29 +00:00
|
|
|
} // namespace
|