indexer: change Pos computation from byte offset to UTF-8 encoded code point offset

This commit is contained in:
Fangrui Song 2019-02-16 17:23:47 +08:00
parent 3f6ece0a44
commit ea774dadf5

View File

@ -45,6 +45,16 @@ static Pos Decomposed2LineAndCol(const SourceManager &SM,
std::pair<FileID, unsigned> I) { std::pair<FileID, unsigned> I) {
int l = SM.getLineNumber(I.first, I.second) - 1, int l = SM.getLineNumber(I.first, I.second) - 1,
c = SM.getColumnNumber(I.first, I.second) - 1; c = SM.getColumnNumber(I.first, I.second) - 1;
bool Invalid = false;
StringRef Buf = SM.getBufferData(I.first, &Invalid);
if (!Invalid) {
StringRef P = Buf.substr(I.second - c, c);
c = 0;
for (size_t i = 0; i < P.size(); )
if (c++, (uint8_t)P[i++] >= 128)
while (i < P.size() && (uint8_t)P[i] >= 128 && (uint8_t)P[i] < 192)
i++;
}
return {(int16_t)std::min<int>(l, INT16_MAX), return {(int16_t)std::min<int>(l, INT16_MAX),
(int16_t)std::min<int>(c, INT16_MAX)}; (int16_t)std::min<int>(c, INT16_MAX)};
} }