diff --git a/src/lex_utils.cc b/src/lex_utils.cc index 81f80bb7..59955d31 100644 --- a/src/lex_utils.cc +++ b/src/lex_utils.cc @@ -4,20 +4,22 @@ #include +// VSCode (UTF-16) disagrees with Emacs lsp-mode (UTF-8) on how to represent +// text documents. +// We use a UTF-8 iterator to approximate UTF-16 in the specification (weird). +// This is good enough and fails only for UTF-16 surrogate pairs. int GetOffsetForPosition(lsPosition position, const std::string& content) { - if (content.empty()) - return 0; - - int offset = 0; - - int remaining_lines = position.line; - while (remaining_lines > 0 && offset < static_cast(content.size())) { - if (content[offset] == '\n') - --remaining_lines; - ++offset; - } - - return std::min(offset + position.character, content.size()); + size_t i = 0; + for (; position.line > 0 && i < content.size(); i++) + if (content[i] == '\n') + position.line--; + for (; position.character > 0 && i < content.size(); position.character--) + if (uint8_t(content[i++]) >= 128) { + // Skip 0b10xxxxxx + while (i < content.size() && uint8_t(content[i]) >= 128 && uint8_t(content[i]) < 192) + i++; + } + return int(i); } lsPosition CharPos(const std::string& search, diff --git a/src/working_files.cc b/src/working_files.cc index 62976f07..b75bcfbe 100644 --- a/src/working_files.cc +++ b/src/working_files.cc @@ -344,10 +344,9 @@ void WorkingFiles::OnChange(const lsTextDocumentDidChangeParams& change) { } else { int start_offset = GetOffsetForPosition(diff.range->start, file->buffer_content); - int end_offset = - diff.rangeLength - ? start_offset + *diff.rangeLength - : GetOffsetForPosition(diff.range->end, file->buffer_content); + // Ignore TextDocumentContentChangeEvent.rangeLength which causes trouble + // when UTF-16 surrogate pairs are used. + int end_offset = GetOffsetForPosition(diff.range->end, file->buffer_content); file->buffer_content.replace(file->buffer_content.begin() + start_offset, file->buffer_content.begin() + end_offset, diff.text);