mirror of
https://github.com/MaskRay/ccls.git
synced 2025-02-21 07:59:27 +00:00
Comment Paul Heckel's diff algorithm which is used to align confident lines in index and buffer
This commit is contained in:
parent
e20a6e9790
commit
0bad74eb4f
@ -32,7 +32,11 @@ lsPosition GetPositionForOffset(const std::string& content, int offset) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int MyersDiff(const char *a, int la, const char *b, int lb, int threshold) {
|
// Computes the edit distance of strings [a,a+la) and [b,b+lb) with Eugene W.
|
||||||
|
// Myers' O(ND) diff algorithm.
|
||||||
|
// Costs: insertion=1, deletion=1, no substitution.
|
||||||
|
// If the distance is larger than threshold, returns threshould + 1.
|
||||||
|
int MyersDiff(const char* a, int la, const char* b, int lb, int threshold) {
|
||||||
assert(threshold <= kMaxDiff);
|
assert(threshold <= kMaxDiff);
|
||||||
static int v_static[kMaxDiff + 2];
|
static int v_static[kMaxDiff + 2];
|
||||||
const char *ea = a + la, *eb = b + lb;
|
const char *ea = a + la, *eb = b + lb;
|
||||||
@ -64,13 +68,18 @@ int MyersDiff(const std::string& a, const std::string& b, int threshold) {
|
|||||||
return MyersDiff(a.data(), a.size(), b.data(), b.size(), threshold);
|
return MyersDiff(a.data(), a.size(), b.data(), b.size(), threshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find matching buffer line with index line and the converse.
|
// Find matching buffer line of index_lines[line].
|
||||||
|
// By symmetry, this can also be used to find matching index line of a buffer
|
||||||
|
// line.
|
||||||
optional<int> FindMatchingLine(const std::vector<std::string>& index_lines,
|
optional<int> FindMatchingLine(const std::vector<std::string>& index_lines,
|
||||||
const std::vector<int>& index_to_buffer,
|
const std::vector<int>& index_to_buffer,
|
||||||
int line,
|
int line,
|
||||||
const std::vector<std::string>& buffer_lines) {
|
const std::vector<std::string>& buffer_lines) {
|
||||||
|
// If this is a confident mapping, returns.
|
||||||
if (index_to_buffer[line] >= 0)
|
if (index_to_buffer[line] >= 0)
|
||||||
return index_to_buffer[line];
|
return index_to_buffer[line];
|
||||||
|
|
||||||
|
// Find the nearest two confident lines above and below.
|
||||||
int up = line, down = line;
|
int up = line, down = line;
|
||||||
while (--up >= 0 && index_to_buffer[up] < 0) {}
|
while (--up >= 0 && index_to_buffer[up] < 0) {}
|
||||||
while (++down < int(index_to_buffer.size()) && index_to_buffer[down] < 0) {}
|
while (++down < int(index_to_buffer.size()) && index_to_buffer[down] < 0) {}
|
||||||
@ -79,6 +88,9 @@ optional<int> FindMatchingLine(const std::vector<std::string>& index_lines,
|
|||||||
: index_to_buffer[down];
|
: index_to_buffer[down];
|
||||||
if (up > down)
|
if (up > down)
|
||||||
return nullopt;
|
return nullopt;
|
||||||
|
|
||||||
|
// Search for lines [up,down] and use Myers's diff algorithm to find the best
|
||||||
|
// match (least edit distance).
|
||||||
int best = up, best_dist = kMaxDiff + 1;
|
int best = up, best_dist = kMaxDiff + 1;
|
||||||
const std::string& needle = index_lines[line];
|
const std::string& needle = index_lines[line];
|
||||||
for (int i = up; i <= down; i++) {
|
for (int i = up; i <= down; i++) {
|
||||||
@ -129,16 +141,19 @@ void WorkingFile::OnBufferContentUpdated() {
|
|||||||
buffer_to_index.clear();
|
buffer_to_index.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Variant of Paul Heckel's diff algorithm
|
// Variant of Paul Heckel's diff algorithm to compute |index_to_buffer| and
|
||||||
|
// |buffer_to_index|.
|
||||||
|
// The core idea is that if a line is unique in both index and buffer,
|
||||||
|
// we are confident that the line appeared in index maps to the one appeared in
|
||||||
|
// buffer. And then using them as start points to extend upwards and downwards
|
||||||
|
// to align other identical lines (but not unique).
|
||||||
void WorkingFile::ComputeLineMapping() {
|
void WorkingFile::ComputeLineMapping() {
|
||||||
std::unordered_map<uint64_t, int> hash_to_unique;
|
std::unordered_map<uint64_t, int> hash_to_unique;
|
||||||
std::vector<uint64_t> index_hashes(index_lines.size()),
|
std::vector<uint64_t> index_hashes(index_lines.size()),
|
||||||
buffer_hashes(buffer_lines.size());
|
buffer_hashes(buffer_lines.size());
|
||||||
std::vector<int>& from_index = index_to_buffer;
|
index_to_buffer.resize(index_lines.size());
|
||||||
std::vector<int>& from_buffer = buffer_to_index;
|
buffer_to_index.resize(buffer_lines.size());
|
||||||
from_index.resize(index_lines.size());
|
hash_to_unique.reserve(std::max(index_to_buffer.size(), buffer_to_index.size()));
|
||||||
from_buffer.resize(buffer_lines.size());
|
|
||||||
hash_to_unique.reserve(std::max(from_index.size(), from_buffer.size()));
|
|
||||||
|
|
||||||
// For index line i, set from_index[i] to -1 if line i is duplicated.
|
// For index line i, set from_index[i] to -1 if line i is duplicated.
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -148,11 +163,11 @@ void WorkingFile::ComputeLineMapping() {
|
|||||||
auto it = hash_to_unique.find(h);
|
auto it = hash_to_unique.find(h);
|
||||||
if (it == hash_to_unique.end()) {
|
if (it == hash_to_unique.end()) {
|
||||||
hash_to_unique[h] = i;
|
hash_to_unique[h] = i;
|
||||||
from_index[i] = i;
|
index_to_buffer[i] = i;
|
||||||
} else {
|
} else {
|
||||||
if (it->second >= 0)
|
if (it->second >= 0)
|
||||||
from_index[it->second] = -1;
|
index_to_buffer[it->second] = -1;
|
||||||
from_index[i] = it->second = -1;
|
index_to_buffer[i] = it->second = -1;
|
||||||
}
|
}
|
||||||
index_hashes[i++] = h;
|
index_hashes[i++] = h;
|
||||||
}
|
}
|
||||||
@ -166,47 +181,48 @@ void WorkingFile::ComputeLineMapping() {
|
|||||||
auto it = hash_to_unique.find(h);
|
auto it = hash_to_unique.find(h);
|
||||||
if (it == hash_to_unique.end()) {
|
if (it == hash_to_unique.end()) {
|
||||||
hash_to_unique[h] = i;
|
hash_to_unique[h] = i;
|
||||||
from_buffer[i] = i;
|
buffer_to_index[i] = i;
|
||||||
} else {
|
} else {
|
||||||
if (it->second >= 0)
|
if (it->second >= 0)
|
||||||
from_buffer[it->second] = -1;
|
buffer_to_index[it->second] = -1;
|
||||||
from_buffer[i] = it->second = -1;
|
buffer_to_index[i] = it->second = -1;
|
||||||
}
|
}
|
||||||
buffer_hashes[i++] = h;
|
buffer_hashes[i++] = h;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If index line i is the same as buffer line j, and they are both unique,
|
// If index line i is the identical to buffer line j, and they are both unique,
|
||||||
// align them by pointing from_index[i] to j.
|
// align them by pointing from_index[i] to j.
|
||||||
i = 0;
|
i = 0;
|
||||||
for (auto h : index_hashes) {
|
for (auto h : index_hashes) {
|
||||||
if (from_index[i] >= 0) {
|
if (index_to_buffer[i] >= 0) {
|
||||||
auto it = hash_to_unique.find(h);
|
auto it = hash_to_unique.find(h);
|
||||||
if (it != hash_to_unique.end() && it->second >= 0 &&
|
if (it != hash_to_unique.end() && it->second >= 0 &&
|
||||||
from_buffer[it->second] >= 0)
|
buffer_to_index[it->second] >= 0)
|
||||||
from_index[i] = it->second;
|
index_to_buffer[i] = it->second;
|
||||||
else
|
else
|
||||||
from_index[i] = -1;
|
index_to_buffer[i] = -1;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Starting at unique lines, extend upwards and downwards.
|
// Starting at unique lines, extend upwards and downwards.
|
||||||
for (i = 0; i < (int)index_hashes.size() - 1; i++) {
|
for (i = 0; i < (int)index_hashes.size() - 1; i++) {
|
||||||
int j = from_index[i];
|
int j = index_to_buffer[i];
|
||||||
if (0 <= j && j + 1 < buffer_hashes.size() &&
|
if (0 <= j && j + 1 < buffer_hashes.size() &&
|
||||||
index_hashes[i + 1] == buffer_hashes[j + 1])
|
index_hashes[i + 1] == buffer_hashes[j + 1])
|
||||||
from_index[i + 1] = j + 1;
|
index_to_buffer[i + 1] = j + 1;
|
||||||
}
|
}
|
||||||
for (i = (int)index_hashes.size(); --i > 0; ) {
|
for (i = (int)index_hashes.size(); --i > 0; ) {
|
||||||
int j = from_index[i];
|
int j = index_to_buffer[i];
|
||||||
if (0 < j && index_hashes[i - 1] == buffer_hashes[j - 1])
|
if (0 < j && index_hashes[i - 1] == buffer_hashes[j - 1])
|
||||||
from_index[i - 1] = j - 1;
|
index_to_buffer[i - 1] = j - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::fill(from_buffer.begin(), from_buffer.end(), -1);
|
// |buffer_to_index| is a reverse mapping of |index_to_buffer|.
|
||||||
|
std::fill(buffer_to_index.begin(), buffer_to_index.end(), -1);
|
||||||
for (i = 0; i < (int)index_hashes.size(); i++)
|
for (i = 0; i < (int)index_hashes.size(); i++)
|
||||||
if (from_index[i] >= 0)
|
if (index_to_buffer[i] >= 0)
|
||||||
from_buffer[from_index[i]] = i;
|
buffer_to_index[index_to_buffer[i]] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
optional<int> WorkingFile::GetBufferLineFromIndexLine(int line) {
|
optional<int> WorkingFile::GetBufferLineFromIndexLine(int line) {
|
||||||
|
@ -19,7 +19,11 @@ struct WorkingFile {
|
|||||||
// Note: This assumes 0-based lines (1-based lines are normally assumed).
|
// Note: This assumes 0-based lines (1-based lines are normally assumed).
|
||||||
std::vector<std::string> buffer_lines;
|
std::vector<std::string> buffer_lines;
|
||||||
// Mappings between index line number and buffer line number.
|
// Mappings between index line number and buffer line number.
|
||||||
// Empty indicates stale.
|
// Empty indicates either buffer or index has been changed and re-computation
|
||||||
|
// is required.
|
||||||
|
// For index_to_buffer[i] == j, if j >= 0, we are confident that index line
|
||||||
|
// i maps to buffer line j; if j == -1, FindMatchingLine will use the nearest
|
||||||
|
// confident lines to resolve its line number.
|
||||||
std::vector<int> index_to_buffer;
|
std::vector<int> index_to_buffer;
|
||||||
std::vector<int> buffer_to_index;
|
std::vector<int> buffer_to_index;
|
||||||
// A set of diagnostics that have been reported for this file.
|
// A set of diagnostics that have been reported for this file.
|
||||||
|
Loading…
Reference in New Issue
Block a user