diff --git a/src/clang_cursor.cc b/src/clang_cursor.cc index 45842bae..5c6cea3f 100644 --- a/src/clang_cursor.cc +++ b/src/clang_cursor.cc @@ -21,19 +21,6 @@ Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) { Position((int16_t)end_line, (int16_t)end_column) /*end*/); } -uint64_t HashUSR(const char* usr) { - extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k, - uint8_t *out, const size_t outlen); - union { - uint64_t ret; - uint8_t out[8]; - }; - const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, - 0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d}; - (void)siphash(reinterpret_cast(usr), strlen(usr), k, out, 8); - return ret; -} - // TODO Place this global variable into config int g_enable_comments; diff --git a/src/clang_cursor.h b/src/clang_cursor.h index c4a7c580..3f0a33dc 100644 --- a/src/clang_cursor.h +++ b/src/clang_cursor.h @@ -14,8 +14,6 @@ using Usr = uint64_t; Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file = nullptr); -Usr HashUSR(const char* usr); - class ClangType { public: ClangType(); diff --git a/src/utils.cc b/src/utils.cc index 772d7476..182403e6 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,23 @@ std::string Trim(std::string s) { return s; } +uint64_t HashUSR(const char* s) { + return HashUSR(s, strlen(s)); +} + +uint64_t HashUSR(const char* s, size_t n) { + extern int siphash(const uint8_t *in, const size_t inlen, const uint8_t *k, + uint8_t *out, const size_t outlen); + union { + uint64_t ret; + uint8_t out[8]; + }; + const uint8_t k[16] = {0xd0, 0xe5, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, + 0x61, 0x79, 0xea, 0x70, 0xca, 0x70, 0xf0, 0x0d}; + (void)siphash(reinterpret_cast(s), n, k, out, 8); + return ret; +} + // See http://stackoverflow.com/a/2072890 bool EndsWith(const std::string& value, const std::string& ending) { if (ending.size() > value.size()) diff --git a/src/utils.h b/src/utils.h index 87a52ebe..c1f5d6a0 100644 --- a/src/utils.h +++ b/src/utils.h @@ -18,6 +18,9 @@ void TrimEndInPlace(std::string& s); void TrimInPlace(std::string& s); std::string Trim(std::string s); +uint64_t HashUSR(const char* s); +uint64_t HashUSR(const char* s, size_t n); + // Returns true if |value| starts/ends with |start| or |ending|. bool StartsWith(const std::string& value, const std::string& start); bool EndsWith(const std::string& value, const std::string& ending); diff --git a/src/working_files.cc b/src/working_files.cc index b75bcfbe..0b14c06d 100644 --- a/src/working_files.cc +++ b/src/working_files.cc @@ -28,7 +28,6 @@ lsPosition GetPositionForOffset(const std::string& content, int offset) { return result; } - } // namespace std::vector WorkingFiles::Snapshot::AsUnsavedFiles() const { @@ -56,6 +55,10 @@ WorkingFile::WorkingFile(const std::string& filename, void WorkingFile::SetIndexContent(const std::string& index_content) { index_lines = ToLines(index_content, false /*trim_whitespace*/); + index_to_buffer.clear(); + buffer_to_index.clear(); + + // TODO Remove // Build lookup buffer. index_lines_lookup.clear(); index_lines_lookup.reserve(index_lines.size()); @@ -74,6 +77,10 @@ void WorkingFile::OnBufferContentUpdated() { all_buffer_lines = ToLines(buffer_content, true /*trim_whitespace*/); raw_buffer_lines = ToLines(buffer_content, false /*trim_whitespace*/); + index_to_buffer.clear(); + buffer_to_index.clear(); + + // TODO Remove // Build lookup buffer. all_buffer_lines_lookup.clear(); all_buffer_lines_lookup.reserve(all_buffer_lines.size()); @@ -88,7 +95,79 @@ void WorkingFile::OnBufferContentUpdated() { } } -optional WorkingFile::GetBufferLineFromIndexLine(int index_line) const { +// Variant of Paul Heckel's diff algorithm +void WorkingFile::ComputeLineMapping() { + std::unordered_map hash_to_unique; + std::vector index_hashes(index_lines.size()), + buffer_hashes(all_buffer_lines.size()); + std::vector& from_index = index_to_buffer; + std::vector& from_buffer = buffer_to_index; + from_index.resize(index_lines.size()); + from_buffer.resize(all_buffer_lines.size()); + hash_to_unique.reserve(std::max(from_index.size(), from_buffer.size())); + int i = 0; + for (auto& line : index_lines) { + std::string trimmed = Trim(line); + uint64_t h = HashUSR(trimmed.data(), trimmed.size()); + auto it = hash_to_unique.find(h); + if (it == hash_to_unique.end()) { + hash_to_unique[h] = i; + from_index[i] = i; + } else { + if (it->second >= 0) + from_index[it->second] = -1; + from_index[i] = it->second = -1; + } + index_hashes[i++] = h; + } + + i = 0; + hash_to_unique.clear(); + for (auto& line : all_buffer_lines) { + uint64_t h = HashUSR(line.data(), line.size()); + auto it = hash_to_unique.find(h); + if (it == hash_to_unique.end()) { + hash_to_unique[h] = i; + from_buffer[i] = i; + } else { + if (it->second >= 0) + from_buffer[it->second] = -1; + from_buffer[i] = it->second = -1; + } + buffer_hashes[i++] = h; + } + + i = 0; + for (auto h : index_hashes) { + if (from_index[i] >= 0) { + auto it = hash_to_unique.find(h); + if (it != hash_to_unique.end() && it->second >= 0) { + from_index[i] = it->second; + from_buffer[it->second] = i; + } else + from_index[i] = -1; + } + i++; + } + + for (i = 0; i < (int)index_hashes.size() - 1; i++) { + int j = from_index[i]; + if (0 <= j && j + 1 < buffer_hashes.size() && + index_hashes[i + 1] == buffer_hashes[j + 1]) { + from_index[i + 1] = j + 1; + from_buffer[j + 1] = i + 1; + } + } + for (i = (int)index_hashes.size(); --i > 0; ) { + int j = from_index[i]; + if (0 < j && index_hashes[i - 1] == buffer_hashes[j - 1]) { + from_index[i - 1] = j - 1; + from_buffer[j - 1] = i - 1; + } + } +} + +optional WorkingFile::GetBufferLineFromIndexLine(int index_line) { // The implementation is simple but works pretty well for most cases. We // lookup the line contents in the indexed file contents, and try to find the // most similar line in the current buffer file. @@ -109,16 +188,39 @@ optional WorkingFile::GetBufferLineFromIndexLine(int index_line) const { return nullopt; } + // TODO Remove all_buffer_lines_lookup and only use diff // Find the line in the cached index file. We'll try to find the most similar // line in the buffer and return the index for that. std::string index = Trim(index_lines[index_line - 1]); auto buffer_it = all_buffer_lines_lookup.find(index); if (buffer_it == all_buffer_lines_lookup.end()) { - // TODO: Use levenshtein distance to find the best match (but only to an - // extent) - return nullopt; + if (index_to_buffer.empty()) + ComputeLineMapping(); + index_line--; + int up = index_line, down = index_line + 1; + while (up >= 0 && index_to_buffer[up] < 0) + up--; + while (down < int(index_to_buffer.size()) && index_to_buffer[down] < 0) + down++; + + int ret; + if (up >= 0) { + if (down == int(index_to_buffer.size()) || + index_line - up < down - index_line) + ret = index_to_buffer[up] - up + index_line; + else + ret = index_to_buffer[down] - down + index_line; + } else if (down < int(index_to_buffer.size())) + ret = index_to_buffer[down] - down + index_line; + else + return nullopt; + ret = std::max(ret, 0); + ret = std::min(ret, int(buffer_to_index.size()) - 1); + return ret + 1; } + // TODO: Use levenshtein distance to find the best match (but only to an + // extent) // From all the identical lines, return the one which is closest to // |index_line|. There will usually only be one identical line. assert(!buffer_it->second.empty()); @@ -131,11 +233,11 @@ optional WorkingFile::GetBufferLineFromIndexLine(int index_line) const { closest_buffer_line = buffer_line; } } - return closest_buffer_line; + } -optional WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const { +optional WorkingFile::GetIndexLineFromBufferLine(int buffer_line) { // See GetBufferLineFromIndexLine for additional comments. // Note: |index_line| and |buffer_line| are 1-based. @@ -153,9 +255,29 @@ optional WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const { std::string buffer = all_buffer_lines[buffer_line - 1]; auto index_it = index_lines_lookup.find(buffer); if (index_it == index_lines_lookup.end()) { - // TODO: Use levenshtein distance to find the best match (but only to an - // extent) - return nullopt; + if (buffer_to_index.empty()) + ComputeLineMapping(); + buffer_line--; + int up = buffer_line, down = buffer_line + 1; + while (up >= 0 && buffer_to_index[up] < 0) + up--; + while (down < int(buffer_to_index.size()) && buffer_to_index[down] < 0) + down++; + + int ret; + if (up >= 0) { + if (down == int(buffer_to_index.size()) || + buffer_line - up <= down - buffer_line) + ret = buffer_to_index[up] - up + buffer_line; + else + ret = buffer_to_index[down] - down + buffer_line; + } else if (down < int(buffer_to_index.size())) + ret = buffer_to_index[down] - down + buffer_line; + else + return nullopt; + ret = std::max(ret, 0); + ret = std::min(ret, int(index_to_buffer.size()) - 1); + return ret + 1; } // From all the identical lines, return the one which is closest to @@ -170,13 +292,12 @@ optional WorkingFile::GetIndexLineFromBufferLine(int buffer_line) const { closest_index_line = index_line; } } - return closest_index_line; } optional WorkingFile::GetBufferLineContentFromIndexLine( int indexed_line, - optional* out_buffer_line) const { + optional* out_buffer_line) { optional buffer_line = GetBufferLineFromIndexLine(indexed_line); if (out_buffer_line) *out_buffer_line = buffer_line; diff --git a/src/working_files.h b/src/working_files.h index 67339b77..905427f5 100644 --- a/src/working_files.h +++ b/src/working_files.h @@ -27,6 +27,10 @@ struct WorkingFile { // This map goes from buffer-line -> indices+1 in all_buffer_lines. // Note: The items in the value entry are 1-based liness. std::unordered_map> all_buffer_lines_lookup; + // Mappings between index line number and buffer line number. + // Empty indicates stale. + std::vector index_to_buffer; + std::vector buffer_to_index; // A set of diagnostics that have been reported for this file. // NOTE: _ is appended because it must be accessed under the WorkingFiles // lock! @@ -41,14 +45,14 @@ struct WorkingFile { // Find the buffer-line which should be shown for |indexed_line|. This // accepts and returns 1-based lines. - optional GetBufferLineFromIndexLine(int indexed_line) const; + optional GetBufferLineFromIndexLine(int indexed_line); // Find the indexed-line which should be shown for |buffer_line|. This // accepts and returns 1-based lines. - optional GetIndexLineFromBufferLine(int buffer_line) const; + optional GetIndexLineFromBufferLine(int buffer_line); optional GetBufferLineContentFromIndexLine( int indexed_line, - optional* out_buffer_line) const; + optional* out_buffer_line); // TODO: Move FindClosestCallNameInBuffer and FindStableCompletionSource into // lex_utils.h/cc @@ -73,6 +77,10 @@ struct WorkingFile { lsPosition FindStableCompletionSource(lsPosition position, bool* is_global_completion, std::string* existing_completion) const; + + private: + // Compute index_to_buffer and buffer_to_index. + void ComputeLineMapping(); }; struct WorkingFiles {