Use UTF-8 character iterator in GetOffsetForPosition which is good unless UTF-16 surrogate pairs are used #57

This commit is contained in:
Fangrui Song 2018-01-13 10:43:37 -08:00
parent dab379ad46
commit 942a0354d3
2 changed files with 18 additions and 17 deletions

View File

@ -4,20 +4,22 @@
#include <algorithm> #include <algorithm>
// VSCode (UTF-16) disagrees with Emacs lsp-mode (UTF-8) on how to represent
// text documents.
// We use a UTF-8 iterator to approximate UTF-16 in the specification (weird).
// This is good enough and fails only for UTF-16 surrogate pairs.
int GetOffsetForPosition(lsPosition position, const std::string& content) { int GetOffsetForPosition(lsPosition position, const std::string& content) {
if (content.empty()) size_t i = 0;
return 0; for (; position.line > 0 && i < content.size(); i++)
if (content[i] == '\n')
int offset = 0; position.line--;
for (; position.character > 0 && i < content.size(); position.character--)
int remaining_lines = position.line; if (uint8_t(content[i++]) >= 128) {
while (remaining_lines > 0 && offset < static_cast<int>(content.size())) { // Skip 0b10xxxxxx
if (content[offset] == '\n') while (i < content.size() && uint8_t(content[i]) >= 128 && uint8_t(content[i]) < 192)
--remaining_lines; i++;
++offset; }
} return int(i);
return std::min<int>(offset + position.character, content.size());
} }
lsPosition CharPos(const std::string& search, lsPosition CharPos(const std::string& search,

View File

@ -344,10 +344,9 @@ void WorkingFiles::OnChange(const lsTextDocumentDidChangeParams& change) {
} else { } else {
int start_offset = int start_offset =
GetOffsetForPosition(diff.range->start, file->buffer_content); GetOffsetForPosition(diff.range->start, file->buffer_content);
int end_offset = // Ignore TextDocumentContentChangeEvent.rangeLength which causes trouble
diff.rangeLength // when UTF-16 surrogate pairs are used.
? start_offset + *diff.rangeLength int end_offset = GetOffsetForPosition(diff.range->end, file->buffer_content);
: GetOffsetForPosition(diff.range->end, file->buffer_content);
file->buffer_content.replace(file->buffer_content.begin() + start_offset, file->buffer_content.replace(file->buffer_content.begin() + start_offset,
file->buffer_content.begin() + end_offset, file->buffer_content.begin() + end_offset,
diff.text); diff.text);