mirror of
https://github.com/MaskRay/ccls.git
synced 2024-11-27 18:11:57 +00:00
indexer: decode invalid UTF-8 byte sequence in comments as ?
This commit is contained in:
parent
ff4ee614b9
commit
71fb3f4fe9
@ -423,22 +423,23 @@ public:
|
|||||||
unsigned start_column = sm.getLineNumber(bInfo.first, bInfo.second);
|
unsigned start_column = sm.getLineNumber(bInfo.first, bInfo.second);
|
||||||
std::string ret;
|
std::string ret;
|
||||||
int pad = -1;
|
int pad = -1;
|
||||||
for (const char *p = raw.data(), *e = raw.end(); p < e;) {
|
for (const uint8_t *p = raw.bytes_begin(), *e = raw.bytes_end(); p < e;) {
|
||||||
// The first line starts with a comment marker, but the rest needs
|
// The first line starts with a comment marker, but the rest needs
|
||||||
// un-indenting.
|
// un-indenting.
|
||||||
unsigned skip = start_column - 1;
|
unsigned skip = start_column - 1;
|
||||||
for (; skip > 0 && p < e && (*p == ' ' || *p == '\t'); p++)
|
for (; skip > 0 && p < e && (*p == ' ' || *p == '\t'); p++)
|
||||||
skip--;
|
skip--;
|
||||||
const char *q = p;
|
bool high = false;
|
||||||
|
const uint8_t *q = p;
|
||||||
while (q < e && *q != '\n')
|
while (q < e && *q != '\n')
|
||||||
q++;
|
high |= *q++ >= 0x80;
|
||||||
if (q < e)
|
if (q < e)
|
||||||
q++;
|
q++;
|
||||||
// A minimalist approach to skip Doxygen comment markers.
|
// A minimalist approach to skip Doxygen comment markers.
|
||||||
// See https://www.stack.nl/~dimitri/doxygen/manual/docblocks.html
|
// See https://www.stack.nl/~dimitri/doxygen/manual/docblocks.html
|
||||||
if (pad < 0) {
|
if (pad < 0) {
|
||||||
// First line, detect the length of comment marker and put into |pad|
|
// First line, detect the length of comment marker and put into |pad|
|
||||||
const char *begin = p;
|
const uint8_t *begin = p;
|
||||||
while (p < e && (*p == '/' || *p == '*' || *p == '-' || *p == '='))
|
while (p < e && (*p == '/' || *p == '*' || *p == '-' || *p == '='))
|
||||||
p++;
|
p++;
|
||||||
if (p < e && (*p == '<' || *p == '!'))
|
if (p < e && (*p == '<' || *p == '!'))
|
||||||
@ -456,7 +457,24 @@ public:
|
|||||||
(*p == ' ' || *p == '/' || *p == '*' || *p == '<' || *p == '!'))
|
(*p == ' ' || *p == '/' || *p == '*' || *p == '<' || *p == '!'))
|
||||||
prefix--, p++;
|
prefix--, p++;
|
||||||
}
|
}
|
||||||
ret.insert(ret.end(), p, q);
|
if (high) {
|
||||||
|
while (p < q) {
|
||||||
|
int i = 0, c = *p < 0x80 ? 0
|
||||||
|
: *p < 0xc0 || *p >= 0xf8
|
||||||
|
? -1
|
||||||
|
: *p >= 0xf0 ? 3 : *p >= 0xe0 ? 2 : 1;
|
||||||
|
const uint8_t *r = p + 1;
|
||||||
|
for (; i < c && r < q && *r >= 0x80; i++, r++)
|
||||||
|
;
|
||||||
|
if (i == c)
|
||||||
|
ret.insert(ret.end(), (const char *)p, (const char *)r);
|
||||||
|
else
|
||||||
|
ret += '?';
|
||||||
|
p = r;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret.insert(ret.end(), (const char *)p, (const char *)q);
|
||||||
|
}
|
||||||
p = q;
|
p = q;
|
||||||
}
|
}
|
||||||
while (ret.size() && isspace(ret.back()))
|
while (ret.size() && isspace(ret.back()))
|
||||||
|
Loading…
Reference in New Issue
Block a user