mirror of
https://github.com/MaskRay/ccls.git
synced 2024-11-29 19:07:08 +00:00
Align column of index line and buffer line
This commit is contained in:
parent
2c4783c904
commit
e04245f185
@ -12,7 +12,12 @@
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr int kMaxDiff = 9;
|
// When finding a best match of buffer line and index line, limit the max edit
|
||||||
|
// distance.
|
||||||
|
constexpr int kMaxDiff = 20;
|
||||||
|
// Don't align index line to buffer line if one of the lengths is larger than
|
||||||
|
// |kMaxColumnAlignSize|.
|
||||||
|
constexpr int kMaxColumnAlignSize = 200;
|
||||||
|
|
||||||
lsPosition GetPositionForOffset(const std::string& content, int offset) {
|
lsPosition GetPositionForOffset(const std::string& content, int offset) {
|
||||||
if (offset >= content.size())
|
if (offset >= content.size())
|
||||||
@ -69,13 +74,13 @@ int MyersDiff(const std::string& a, const std::string& b, int threshold) {
|
|||||||
return MyersDiff(a.data(), a.size(), b.data(), b.size(), threshold);
|
return MyersDiff(a.data(), a.size(), b.data(), b.size(), threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Computes Levenshtein edit distance with O(N*M) Needleman-Wunsch algorithm
|
// Computes edit distance with O(N*M) Needleman-Wunsch algorithm
|
||||||
// and returns a distance vector where d[i] = cost of aligning a to b[0,i).
|
// and returns a distance vector where d[i] = cost of aligning a to b[0,i).
|
||||||
//
|
//
|
||||||
// Myers' diff algorithm is used to find best matching line while this one is
|
// Myers' diff algorithm is used to find best matching line while this one is
|
||||||
// used to align a single column because Myers' needs some twiddling to return
|
// used to align a single column because Myers' needs some twiddling to return
|
||||||
// distance vector.
|
// distance vector.
|
||||||
std::vector<int> LevenshteinDistance(std::string a, std::string b) {
|
std::vector<int> EditDistanceVector(std::string a, std::string b) {
|
||||||
std::vector<int> d(b.size() + 1);
|
std::vector<int> d(b.size() + 1);
|
||||||
std::iota(d.begin(), d.end(), 0);
|
std::iota(d.begin(), d.end(), 0);
|
||||||
for (int i = 0; i < (int)a.size(); i++) {
|
for (int i = 0; i < (int)a.size(); i++) {
|
||||||
@ -83,13 +88,54 @@ std::vector<int> LevenshteinDistance(std::string a, std::string b) {
|
|||||||
d[0] = i + 1;
|
d[0] = i + 1;
|
||||||
for (int j = 0; j < (int)b.size(); j++) {
|
for (int j = 0; j < (int)b.size(); j++) {
|
||||||
int t = d[j + 1];
|
int t = d[j + 1];
|
||||||
d[j + 1] = a[i] == b[j] ? ul : std::min(ul, std::min(d[j], d[j + 1])) + 1;
|
d[j + 1] = a[i] == b[j] ? ul : std::min(d[j], d[j + 1]) + 1;
|
||||||
ul = t;
|
ul = t;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Find matching position of |a[column]| in |b|.
|
||||||
|
// This is actually a single step of Hirschberg's sequence alignment algorithm.
|
||||||
|
int AlignColumn(const std::string& a, int column, std::string b) {
|
||||||
|
int head = 0, tail = 0;
|
||||||
|
while (head < (int)a.size() && head < (int)b.size() && a[head] == b[head])
|
||||||
|
head++;
|
||||||
|
while (tail < (int)a.size() && tail < (int)b.size() &&
|
||||||
|
a[a.size() - 1 - tail] == b[b.size() - 1 - tail])
|
||||||
|
tail++;
|
||||||
|
if (column < head)
|
||||||
|
return column;
|
||||||
|
if ((int)a.size() - tail <= column)
|
||||||
|
return column + b.size() - a.size();
|
||||||
|
if (std::max(a.size(), b.size()) - head - tail >= kMaxColumnAlignSize)
|
||||||
|
return std::min(column, (int)b.size());
|
||||||
|
|
||||||
|
// b[head, tail)
|
||||||
|
b = b.substr(head, b.size() - tail - head);
|
||||||
|
|
||||||
|
// left[i] = cost of aligning a[head, column) to b[head, head + i)
|
||||||
|
std::vector<int> left = EditDistanceVector(a.substr(head, column - head), b);
|
||||||
|
|
||||||
|
// right[i] = cost of aligning a[column, a.size() - tail) to b[head + i,
|
||||||
|
// b.size() - tail)
|
||||||
|
std::string a_rev = a.substr(column, a.size() - tail - column);
|
||||||
|
std::reverse(a_rev.begin(), a_rev.end());
|
||||||
|
std::reverse(b.begin(), b.end());
|
||||||
|
std::vector<int> right = EditDistanceVector(a_rev, b);
|
||||||
|
std::reverse(right.begin(), right.end());
|
||||||
|
|
||||||
|
int best = 0, best_cost = INT_MAX;
|
||||||
|
for (size_t i = 0; i < left.size(); i++) {
|
||||||
|
int cost = left[i] + right[i];
|
||||||
|
if (cost < best_cost) {
|
||||||
|
best_cost = cost;
|
||||||
|
best = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return head + best;
|
||||||
|
}
|
||||||
|
|
||||||
// Find matching buffer line of index_lines[line].
|
// Find matching buffer line of index_lines[line].
|
||||||
// By symmetry, this can also be used to find matching index line of a buffer
|
// By symmetry, this can also be used to find matching index line of a buffer
|
||||||
// line.
|
// line.
|
||||||
@ -99,8 +145,12 @@ optional<int> FindMatchingLine(const std::vector<std::string>& index_lines,
|
|||||||
int* column,
|
int* column,
|
||||||
const std::vector<std::string>& buffer_lines) {
|
const std::vector<std::string>& buffer_lines) {
|
||||||
// If this is a confident mapping, returns.
|
// If this is a confident mapping, returns.
|
||||||
if (index_to_buffer[line] >= 0)
|
if (index_to_buffer[line] >= 0) {
|
||||||
return index_to_buffer[line];
|
int ret = index_to_buffer[line];
|
||||||
|
if (column)
|
||||||
|
*column = AlignColumn(index_lines[line], *column, buffer_lines[ret]);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// Find the nearest two confident lines above and below.
|
// Find the nearest two confident lines above and below.
|
||||||
int up = line, down = line;
|
int up = line, down = line;
|
||||||
@ -123,6 +173,8 @@ optional<int> FindMatchingLine(const std::vector<std::string>& index_lines,
|
|||||||
best = i;
|
best = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (column)
|
||||||
|
*column = AlignColumn(index_lines[line], *column, buffer_lines[best]);
|
||||||
return best;
|
return best;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user