mirror of
https://github.com/MaskRay/ccls.git
synced 2024-11-26 09:31:59 +00:00
Use Paul Heckel's diff algorithm to convert between buffer/index line number
This commit is contained in:
parent
b81589f6ed
commit
eec5c77d6a
@ -21,19 +21,6 @@ Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) {
|
|||||||
Position((int16_t)end_line, (int16_t)end_column) /*end*/);
|
Position((int16_t)end_line, (int16_t)end_column) /*end*/);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t HashUSR(const char* usr) {
|
|
||||||
extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
|
|
||||||
uint8_t *out, const size_t outlen);
|
|
||||||
union {
|
|
||||||
uint64_t ret;
|
|
||||||
uint8_t out[8];
|
|
||||||
};
|
|
||||||
const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52,
|
|
||||||
0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d};
|
|
||||||
(void)siphash(reinterpret_cast<const uint8_t*>(usr), strlen(usr), k, out, 8);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO Place this global variable into config
|
// TODO Place this global variable into config
|
||||||
int g_enable_comments;
|
int g_enable_comments;
|
||||||
|
|
||||||
|
@ -14,8 +14,6 @@ using Usr = uint64_t;
|
|||||||
Range ResolveCXSourceRange(const CXSourceRange& range,
|
Range ResolveCXSourceRange(const CXSourceRange& range,
|
||||||
CXFile* cx_file = nullptr);
|
CXFile* cx_file = nullptr);
|
||||||
|
|
||||||
Usr HashUSR(const char* usr);
|
|
||||||
|
|
||||||
class ClangType {
|
class ClangType {
|
||||||
public:
|
public:
|
||||||
ClangType();
|
ClangType();
|
||||||
|
18
src/utils.cc
18
src/utils.cc
@ -9,6 +9,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -51,6 +52,23 @@ std::string Trim(std::string s) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t HashUSR(const char* s) {
|
||||||
|
return HashUSR(s, strlen(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t HashUSR(const char* s, size_t n) {
|
||||||
|
extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k,
|
||||||
|
uint8_t *out, const size_t outlen);
|
||||||
|
union {
|
||||||
|
uint64_t ret;
|
||||||
|
uint8_t out[8];
|
||||||
|
};
|
||||||
|
const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52,
|
||||||
|
0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d};
|
||||||
|
(void)siphash(reinterpret_cast<const uint8_t*>(s), n, k, out, 8);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// See http://stackoverflow.com/a/2072890
|
// See http://stackoverflow.com/a/2072890
|
||||||
bool EndsWith(const std::string& value, const std::string& ending) {
|
bool EndsWith(const std::string& value, const std::string& ending) {
|
||||||
if (ending.size() > value.size())
|
if (ending.size() > value.size())
|
||||||
|
@ -18,6 +18,9 @@ void TrimEndInPlace(std::string& s);
|
|||||||
void TrimInPlace(std::string& s);
|
void TrimInPlace(std::string& s);
|
||||||
std::string Trim(std::string s);
|
std::string Trim(std::string s);
|
||||||
|
|
||||||
|
uint64_t HashUSR(const char* s);
|
||||||
|
uint64_t HashUSR(const char* s, size_t n);
|
||||||
|
|
||||||
// Returns true if |value| starts/ends with |start| or |ending|.
|
// Returns true if |value| starts/ends with |start| or |ending|.
|
||||||
bool StartsWith(const std::string& value, const std::string& start);
|
bool StartsWith(const std::string& value, const std::string& start);
|
||||||
bool EndsWith(const std::string& value, const std::string& ending);
|
bool EndsWith(const std::string& value, const std::string& ending);
|
||||||
|
@ -28,7 +28,6 @@ lsPosition GetPositionForOffset(const std::string& content, int offset) {
|
|||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::vector<CXUnsavedFile> WorkingFiles::Snapshot::AsUnsavedFiles() const {
|
std::vector<CXUnsavedFile> WorkingFiles::Snapshot::AsUnsavedFiles() const {
|
||||||
@ -56,6 +55,10 @@ WorkingFile::WorkingFile(const std::string& filename,
|
|||||||
void WorkingFile::SetIndexContent(const std::string& index_content) {
|
void WorkingFile::SetIndexContent(const std::string& index_content) {
|
||||||
index_lines = ToLines(index_content, false /*trim_whitespace*/);
|
index_lines = ToLines(index_content, false /*trim_whitespace*/);
|
||||||
|
|
||||||
|
index_to_buffer.clear();
|
||||||
|
buffer_to_index.clear();
|
||||||
|
|
||||||
|
// TODO Remove
|
||||||
// Build lookup buffer.
|
// Build lookup buffer.
|
||||||
index_lines_lookup.clear();
|
index_lines_lookup.clear();
|
||||||
index_lines_lookup.reserve(index_lines.size());
|
index_lines_lookup.reserve(index_lines.size());
|
||||||
@ -74,6 +77,10 @@ void WorkingFile::OnBufferContentUpdated() {
|
|||||||
all_buffer_lines = ToLines(buffer_content, true /*trim_whitespace*/);
|
all_buffer_lines = ToLines(buffer_content, true /*trim_whitespace*/);
|
||||||
raw_buffer_lines = ToLines(buffer_content, false /*trim_whitespace*/);
|
raw_buffer_lines = ToLines(buffer_content, false /*trim_whitespace*/);
|
||||||
|
|
||||||
|
index_to_buffer.clear();
|
||||||
|
buffer_to_index.clear();
|
||||||
|
|
||||||
|
// TODO Remove
|
||||||
// Build lookup buffer.
|
// Build lookup buffer.
|
||||||
all_buffer_lines_lookup.clear();
|
all_buffer_lines_lookup.clear();
|
||||||
all_buffer_lines_lookup.reserve(all_buffer_lines.size());
|
all_buffer_lines_lookup.reserve(all_buffer_lines.size());
|
||||||
@ -88,7 +95,79 @@ void WorkingFile::OnBufferContentUpdated() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
optional<int> WorkingFile::GetBufferLineFromIndexLine(int index_line) const {
|
// Variant of Paul Heckel's diff algorithm
|
||||||
|
void WorkingFile::ComputeLineMapping() {
|
||||||
|
std::unordered_map<uint64_t, int> hash_to_unique;
|
||||||
|
std::vector<uint64_t> index_hashes(index_lines.size()),
|
||||||
|
buffer_hashes(all_buffer_lines.size());
|
||||||
|
std::vector<int>& from_index = index_to_buffer;
|
||||||
|
std::vector<int>& from_buffer = buffer_to_index;
|
||||||
|
from_index.resize(index_lines.size());
|
||||||
|
from_buffer.resize(all_buffer_lines.size());
|
||||||
|
hash_to_unique.reserve(std::max(from_index.size(), from_buffer.size()));
|
||||||
|
int i = 0;
|
||||||
|
for (auto& line : index_lines) {
|
||||||
|
std::string trimmed = Trim(line);
|
||||||
|
uint64_t h = HashUSR(trimmed.data(), trimmed.size());
|
||||||
|
auto it = hash_to_unique.find(h);
|
||||||
|
if (it == hash_to_unique.end()) {
|
||||||
|
hash_to_unique[h] = i;
|
||||||
|
from_index[i] = i;
|
||||||
|
} else {
|
||||||
|
if (it->second >= 0)
|
||||||
|
from_index[it->second] = -1;
|
||||||
|
from_index[i] = it->second = -1;
|
||||||
|
}
|
||||||
|
index_hashes[i++] = h;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
hash_to_unique.clear();
|
||||||
|
for (auto& line : all_buffer_lines) {
|
||||||
|
uint64_t h = HashUSR(line.data(), line.size());
|
||||||
|
auto it = hash_to_unique.find(h);
|
||||||
|
if (it == hash_to_unique.end()) {
|
||||||
|
hash_to_unique[h] = i;
|
||||||
|
from_buffer[i] = i;
|
||||||
|
} else {
|
||||||
|
if (it->second >= 0)
|
||||||
|
from_buffer[it->second] = -1;
|
||||||
|
from_buffer[i] = it->second = -1;
|
||||||
|
}
|
||||||
|
buffer_hashes[i++] = h;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for (auto h : index_hashes) {
|
||||||
|
if (from_index[i] >= 0) {
|
||||||
|
auto it = hash_to_unique.find(h);
|
||||||
|
if (it != hash_to_unique.end() && it->second >= 0) {
|
||||||
|
from_index[i] = it->second;
|
||||||
|
from_buffer[it->second] = i;
|
||||||
|
} else
|
||||||
|
from_index[i] = -1;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < (int)index_hashes.size() - 1; i++) {
|
||||||
|
int j = from_index[i];
|
||||||
|
if (0 <= j && j + 1 < buffer_hashes.size() &&
|
||||||
|
index_hashes[i + 1] == buffer_hashes[j + 1]) {
|
||||||
|
from_index[i + 1] = j + 1;
|
||||||
|
from_buffer[j + 1] = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i = (int)index_hashes.size(); --i > 0; ) {
|
||||||
|
int j = from_index[i];
|
||||||
|
if (0 < j && index_hashes[i - 1] == buffer_hashes[j - 1]) {
|
||||||
|
from_index[i - 1] = j - 1;
|
||||||
|
from_buffer[j - 1] = i - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
optional<int> WorkingFile::GetBufferLineFromIndexLine(int index_line) {
|
||||||
// The implementation is simple but works pretty well for most cases. We
|
// The implementation is simple but works pretty well for most cases. We
|
||||||
// lookup the line contents in the indexed file contents, and try to find the
|
// lookup the line contents in the indexed file contents, and try to find the
|
||||||
// most similar line in the current buffer file.
|
// most similar line in the current buffer file.
|
||||||
@ -109,16 +188,39 @@ optional<int> WorkingFile::GetBufferLineFromIndexLine(int index_line) const {
|
|||||||
return nullopt;
|
return nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO Remove all_buffer_lines_lookup and only use diff
|
||||||
// Find the line in the cached index file. We'll try to find the most similar
|
// Find the line in the cached index file. We'll try to find the most similar
|
||||||
// line in the buffer and return the index for that.
|
// line in the buffer and return the index for that.
|
||||||
std::string index = Trim(index_lines[index_line - 1]);
|
std::string index = Trim(index_lines[index_line - 1]);
|
||||||
auto buffer_it = all_buffer_lines_lookup.find(index);
|
auto buffer_it = all_buffer_lines_lookup.find(index);
|
||||||
if (buffer_it == all_buffer_lines_lookup.end()) {
|
if (buffer_it == all_buffer_lines_lookup.end()) {
|
||||||
// TODO: Use levenshtein distance to find the best match (but only to an
|
if (index_to_buffer.empty())
|
||||||
// extent)
|
ComputeLineMapping();
|
||||||
return nullopt;
|
index_line--;
|
||||||
|
int up = index_line, down = index_line + 1;
|
||||||
|
while (up >= 0 && index_to_buffer[up] < 0)
|
||||||
|
up--;
|
||||||
|
while (down < int(index_to_buffer.size()) && index_to_buffer[down] < 0)
|
||||||
|
down++;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
if (up >= 0) {
|
||||||
|
if (down == int(index_to_buffer.size()) ||
|
||||||
|
index_line - up < down - index_line)
|
||||||
|
ret = index_to_buffer[up] - up + index_line;
|
||||||
|
else
|
||||||
|
ret = index_to_buffer[down] - down + index_line;
|
||||||
|
} else if (down < int(index_to_buffer.size()))
|
||||||
|
ret = index_to_buffer[down] - down + index_line;
|
||||||
|
else
|
||||||
|
return nullopt;
|
||||||
|
ret = std::max(ret, 0);
|
||||||
|
ret = std::min(ret, int(buffer_to_index.size()) - 1);
|
||||||
|
return ret + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Use levenshtein distance to find the best match (but only to an
|
||||||
|
// extent)
|
||||||
// From all the identical lines, return the one which is closest to
|
// From all the identical lines, return the one which is closest to
|
||||||
// |index_line|. There will usually only be one identical line.
|
// |index_line|. There will usually only be one identical line.
|
||||||
assert(!buffer_it->second.empty());
|
assert(!buffer_it->second.empty());
|
||||||
@ -131,11 +233,11 @@ optional<int> WorkingFile::GetBufferLineFromIndexLine(int index_line) const {
|
|||||||
closest_buffer_line = buffer_line;
|
closest_buffer_line = buffer_line;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return closest_buffer_line;
|
return closest_buffer_line;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
optional<int> WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const {
|
optional<int> WorkingFile::GetIndexLineFromBufferLine(int buffer_line) {
|
||||||
// See GetBufferLineFromIndexLine for additional comments.
|
// See GetBufferLineFromIndexLine for additional comments.
|
||||||
|
|
||||||
// Note: |index_line| and |buffer_line| are 1-based.
|
// Note: |index_line| and |buffer_line| are 1-based.
|
||||||
@ -153,9 +255,29 @@ optional<int> WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const {
|
|||||||
std::string buffer = all_buffer_lines[buffer_line - 1];
|
std::string buffer = all_buffer_lines[buffer_line - 1];
|
||||||
auto index_it = index_lines_lookup.find(buffer);
|
auto index_it = index_lines_lookup.find(buffer);
|
||||||
if (index_it == index_lines_lookup.end()) {
|
if (index_it == index_lines_lookup.end()) {
|
||||||
// TODO: Use levenshtein distance to find the best match (but only to an
|
if (buffer_to_index.empty())
|
||||||
// extent)
|
ComputeLineMapping();
|
||||||
return nullopt;
|
buffer_line--;
|
||||||
|
int up = buffer_line, down = buffer_line + 1;
|
||||||
|
while (up >= 0 && buffer_to_index[up] < 0)
|
||||||
|
up--;
|
||||||
|
while (down < int(buffer_to_index.size()) && buffer_to_index[down] < 0)
|
||||||
|
down++;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
if (up >= 0) {
|
||||||
|
if (down == int(buffer_to_index.size()) ||
|
||||||
|
buffer_line - up <= down - buffer_line)
|
||||||
|
ret = buffer_to_index[up] - up + buffer_line;
|
||||||
|
else
|
||||||
|
ret = buffer_to_index[down] - down + buffer_line;
|
||||||
|
} else if (down < int(buffer_to_index.size()))
|
||||||
|
ret = buffer_to_index[down] - down + buffer_line;
|
||||||
|
else
|
||||||
|
return nullopt;
|
||||||
|
ret = std::max(ret, 0);
|
||||||
|
ret = std::min(ret, int(index_to_buffer.size()) - 1);
|
||||||
|
return ret + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// From all the identical lines, return the one which is closest to
|
// From all the identical lines, return the one which is closest to
|
||||||
@ -170,13 +292,12 @@ optional<int> WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const {
|
|||||||
closest_index_line = index_line;
|
closest_index_line = index_line;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return closest_index_line;
|
return closest_index_line;
|
||||||
}
|
}
|
||||||
|
|
||||||
optional<std::string> WorkingFile::GetBufferLineContentFromIndexLine(
|
optional<std::string> WorkingFile::GetBufferLineContentFromIndexLine(
|
||||||
int indexed_line,
|
int indexed_line,
|
||||||
optional<int>* out_buffer_line) const {
|
optional<int>* out_buffer_line) {
|
||||||
optional<int> buffer_line = GetBufferLineFromIndexLine(indexed_line);
|
optional<int> buffer_line = GetBufferLineFromIndexLine(indexed_line);
|
||||||
if (out_buffer_line)
|
if (out_buffer_line)
|
||||||
*out_buffer_line = buffer_line;
|
*out_buffer_line = buffer_line;
|
||||||
|
@ -27,6 +27,10 @@ struct WorkingFile {
|
|||||||
// This map goes from buffer-line -> indices+1 in all_buffer_lines.
|
// This map goes from buffer-line -> indices+1 in all_buffer_lines.
|
||||||
// Note: The items in the value entry are 1-based liness.
|
// Note: The items in the value entry are 1-based liness.
|
||||||
std::unordered_map<std::string, std::vector<int>> all_buffer_lines_lookup;
|
std::unordered_map<std::string, std::vector<int>> all_buffer_lines_lookup;
|
||||||
|
// Mappings between index line number and buffer line number.
|
||||||
|
// Empty indicates stale.
|
||||||
|
std::vector<int> index_to_buffer;
|
||||||
|
std::vector<int> buffer_to_index;
|
||||||
// A set of diagnostics that have been reported for this file.
|
// A set of diagnostics that have been reported for this file.
|
||||||
// NOTE: _ is appended because it must be accessed under the WorkingFiles
|
// NOTE: _ is appended because it must be accessed under the WorkingFiles
|
||||||
// lock!
|
// lock!
|
||||||
@ -41,14 +45,14 @@ struct WorkingFile {
|
|||||||
|
|
||||||
// Find the buffer-line which should be shown for |indexed_line|. This
|
// Find the buffer-line which should be shown for |indexed_line|. This
|
||||||
// accepts and returns 1-based lines.
|
// accepts and returns 1-based lines.
|
||||||
optional<int> GetBufferLineFromIndexLine(int indexed_line) const;
|
optional<int> GetBufferLineFromIndexLine(int indexed_line);
|
||||||
// Find the indexed-line which should be shown for |buffer_line|. This
|
// Find the indexed-line which should be shown for |buffer_line|. This
|
||||||
// accepts and returns 1-based lines.
|
// accepts and returns 1-based lines.
|
||||||
optional<int> GetIndexLineFromBufferLine(int buffer_line) const;
|
optional<int> GetIndexLineFromBufferLine(int buffer_line);
|
||||||
|
|
||||||
optional<std::string> GetBufferLineContentFromIndexLine(
|
optional<std::string> GetBufferLineContentFromIndexLine(
|
||||||
int indexed_line,
|
int indexed_line,
|
||||||
optional<int>* out_buffer_line) const;
|
optional<int>* out_buffer_line);
|
||||||
|
|
||||||
// TODO: Move FindClosestCallNameInBuffer and FindStableCompletionSource into
|
// TODO: Move FindClosestCallNameInBuffer and FindStableCompletionSource into
|
||||||
// lex_utils.h/cc
|
// lex_utils.h/cc
|
||||||
@ -73,6 +77,10 @@ struct WorkingFile {
|
|||||||
lsPosition FindStableCompletionSource(lsPosition position,
|
lsPosition FindStableCompletionSource(lsPosition position,
|
||||||
bool* is_global_completion,
|
bool* is_global_completion,
|
||||||
std::string* existing_completion) const;
|
std::string* existing_completion) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Compute index_to_buffer and buffer_to_index.
|
||||||
|
void ComputeLineMapping();
|
||||||
};
|
};
|
||||||
|
|
||||||
struct WorkingFiles {
|
struct WorkingFiles {
|
||||||
|
Loading…
Reference in New Issue
Block a user