From 4f9e7b219e280f03b1ea255db7519bb7618bf3ca Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 16 Feb 2019 17:23:47 +0800 Subject: [PATCH] indexer: change Pos computation from byte offset to UTF-8 encoded code point offset --- src/clang_tu.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/clang_tu.cc b/src/clang_tu.cc index 449b7912..d10ed746 100644 --- a/src/clang_tu.cc +++ b/src/clang_tu.cc @@ -33,6 +33,16 @@ static Pos Decomposed2LineAndCol(const SourceManager &SM, std::pair I) { int l = (int)SM.getLineNumber(I.first, I.second) - 1, c = (int)SM.getColumnNumber(I.first, I.second) - 1; + bool Invalid = false; + StringRef Buf = SM.getBufferData(I.first, &Invalid); + if (!Invalid) { + StringRef P = Buf.substr(I.second - c, c); + c = 0; + for (size_t i = 0; i < P.size(); ) + if (c++, (uint8_t)P[i++] >= 128) + while (i < P.size() && (uint8_t)P[i] >= 128 && (uint8_t)P[i] < 192) + i++; + } return {(int16_t)std::min(l, INT16_MAX), (int16_t)std::min(c, INT16_MAX)}; }