Clean up clang_tu; retrieve comments with clang::ASTContext::getRawCommentForAnyRedecl

This commit is contained in:
Fangrui Song 2018-07-07 15:25:25 -07:00
parent 103aa711d3
commit 9b54e5391c
4 changed files with 57 additions and 327 deletions

View File

@ -65,204 +65,6 @@ void EmitDiagnostics(std::string path,
} }
} // namespace } // namespace
Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) {
CXSourceLocation start = clang_getRangeStart(range);
CXSourceLocation end = clang_getRangeEnd(range);
unsigned int start_line, start_column;
clang_getSpellingLocation(start, cx_file, &start_line, &start_column,
nullptr);
unsigned int end_line, end_column;
clang_getSpellingLocation(end, nullptr, &end_line, &end_column, nullptr);
return Range{{int16_t(start_line - 1), (int16_t)(start_column - 1)},
{int16_t(end_line - 1), int16_t(end_column - 1)}};
}
ClangCursor ClangType::get_declaration() const {
return clang_getTypeDeclaration(cx_type);
}
std::string ClangType::get_usr() const {
return ClangCursor{clang_getTypeDeclaration(cx_type)}.get_usr();
}
Usr ClangType::get_usr_hash() const {
if (is_builtin())
return static_cast<Usr>(cx_type.kind);
return ClangCursor{clang_getTypeDeclaration(cx_type)}.get_usr_hash();
}
ClangType ClangType::get_canonical() const {
return clang_getCanonicalType(cx_type);
}
ClangType ClangType::strip_qualifiers() const {
CXType cx = cx_type;
while (1) {
switch (cx.kind) {
default:
break;
case CXType_ConstantArray:
case CXType_DependentSizedArray:
case CXType_IncompleteArray:
case CXType_VariableArray:
cx = clang_getElementType(cx);
continue;
case CXType_BlockPointer:
case CXType_LValueReference:
case CXType_MemberPointer:
case CXType_ObjCObjectPointer:
case CXType_Pointer:
case CXType_RValueReference:
cx = clang_getPointeeType(cx);
continue;
}
break;
}
return cx;
}
std::string ClangType::get_spell_name() const {
return ToString(clang_getTypeSpelling(cx_type));
}
ClangType ClangType::get_return_type() const {
return clang_getResultType(cx_type);
}
std::vector<ClangType> ClangType::get_arguments() const {
int size = clang_getNumArgTypes(cx_type);
if (size < 0)
return {};
std::vector<ClangType> types(size);
for (int i = 0; i < size; ++i)
types.emplace_back(clang_getArgType(cx_type, i));
return types;
}
std::vector<ClangType> ClangType::get_template_arguments() const {
int size = clang_Type_getNumTemplateArguments(cx_type);
assert(size >= 0);
if (size < 0)
return std::vector<ClangType>();
std::vector<ClangType> types(size);
for (int i = 0; i < size; ++i)
types.emplace_back(clang_Type_getTemplateArgumentAsType(cx_type, i));
return types;
}
ClangType ClangCursor::get_type() const {
return {clang_getCursorType(cx_cursor)};
}
std::string ClangCursor::get_spell_name() const {
return ::ToString(clang_getCursorSpelling(cx_cursor));
}
Range ClangCursor::get_spell(CXFile* cx_file) const {
// TODO for Objective-C methods and Objective-C message expressions, there are
// multiple pieces for each selector identifier.
CXSourceRange range = clang_Cursor_getSpellingNameRange(cx_cursor, 0, 0);
return ResolveCXSourceRange(range, cx_file);
}
Range ClangCursor::get_extent() const {
CXSourceRange range = clang_getCursorExtent(cx_cursor);
return ResolveCXSourceRange(range, nullptr);
}
std::string ClangCursor::get_display_name() const {
return ::ToString(clang_getCursorDisplayName(cx_cursor));
}
std::string ClangCursor::get_usr() const {
return ::ToString(clang_getCursorUSR(cx_cursor));
}
Usr ClangCursor::get_usr_hash() const {
CXString usr = clang_getCursorUSR(cx_cursor);
Usr ret = HashUsr(clang_getCString(usr));
clang_disposeString(usr);
return ret;
}
std::optional<Usr> ClangCursor::get_opt_usr_hash() const {
CXString usr = clang_getCursorUSR(cx_cursor);
const char* str = clang_getCString(usr);
if (!str || str[0] == '\0') {
clang_disposeString(usr);
return {};
}
Usr ret = HashUsr(str);
clang_disposeString(usr);
return ret;
}
bool ClangCursor::is_definition() const {
return clang_isCursorDefinition(cx_cursor);
}
ClangCursor ClangCursor::template_specialization_to_template_definition()
const {
CXCursor definition = clang_getSpecializedCursorTemplate(cx_cursor);
if (definition.kind == CXCursor_FirstInvalid)
return cx_cursor;
return definition;
}
ClangCursor ClangCursor::get_referenced() const {
return {clang_getCursorReferenced(cx_cursor)};
}
ClangCursor ClangCursor::get_canonical() const {
return {clang_getCanonicalCursor(cx_cursor)};
}
ClangCursor ClangCursor::get_definition() const {
return {clang_getCursorDefinition(cx_cursor)};
}
ClangCursor ClangCursor::get_lexical_parent() const {
return {clang_getCursorLexicalParent(cx_cursor)};
}
ClangCursor ClangCursor::get_semantic_parent() const {
return {clang_getCursorSemanticParent(cx_cursor)};
}
std::vector<ClangCursor> ClangCursor::get_arguments() const {
int size = clang_Cursor_getNumArguments(cx_cursor);
if (size < 0)
return std::vector<ClangCursor>();
std::vector<ClangCursor> cursors(size);
for (int i = 0; i < size; ++i)
cursors.emplace_back(clang_Cursor_getArgument(cx_cursor, i));
return cursors;
}
bool ClangCursor::is_valid_kind() const {
CXCursor referenced = clang_getCursorReferenced(cx_cursor);
if (clang_Cursor_isNull(referenced))
return false;
CXCursorKind kind = get_kind();
return kind > CXCursor_UnexposedDecl &&
(kind < CXCursor_FirstInvalid || kind > CXCursor_LastInvalid);
}
std::string ClangCursor::get_type_description() const {
auto type = clang_getCursorType(cx_cursor);
return ::ToString(clang_getTypeSpelling(type));
}
std::string ClangCursor::ToString() const {
return ::ToString(get_kind()) + " " + get_spell_name();
}
ClangIndex::ClangIndex() : ClangIndex(1, 0) {} ClangIndex::ClangIndex() : ClangIndex(1, 0) {}
ClangIndex::ClangIndex(int exclude_declarations_from_pch, ClangIndex::ClangIndex(int exclude_declarations_from_pch,

View File

@ -7,104 +7,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
using Usr = uint64_t;
Range ResolveCXSourceRange(const CXSourceRange& range,
CXFile* cx_file = nullptr);
class ClangCursor;
class ClangType {
public:
ClangType() = default;
ClangType(const CXType& cx) : cx_type(cx) {}
// Returns true if this is a fundamental type like int.
bool is_builtin() const {
// NOTE: This will return false for pointed types. Should we call
// strip_qualifiers for the user?
return cx_type.kind >= CXType_FirstBuiltin &&
cx_type.kind <= CXType_LastBuiltin;
}
ClangCursor get_declaration() const;
std::string get_usr() const;
Usr get_usr_hash() const;
std::string get_spell_name() const;
ClangType get_canonical() const;
// Try to resolve this type and remove qualifies, ie, Foo* will become Foo
ClangType strip_qualifiers() const;
ClangType get_return_type() const;
std::vector<ClangType> get_arguments() const;
std::vector<ClangType> get_template_arguments() const;
CXType cx_type;
};
class ClangCursor {
public:
ClangCursor() = default;
ClangCursor(CXCursor cx) : cx_cursor(cx) {}
bool operator==(const ClangCursor& o) const {
return clang_equalCursors(cx_cursor, o.cx_cursor);
}
bool operator!=(const ClangCursor& o) const {
return !(*this == o);
}
CXCursorKind get_kind() const {
return cx_cursor.kind;
}
ClangType get_type() const;
std::string get_spell_name() const;
Range get_spell(CXFile* cx_file = nullptr) const;
Range get_extent() const;
std::string get_display_name() const;
std::string get_usr() const;
Usr get_usr_hash() const;
std::optional<Usr> get_opt_usr_hash() const;
bool is_definition() const;
// If the given cursor points to a template specialization, this
// will return the cursor pointing to the template definition.
// If the given cursor is not a template specialization, this will
// just return the same cursor.
//
// This means it is always safe to call this method.
ClangCursor template_specialization_to_template_definition() const;
ClangCursor get_referenced() const;
ClangCursor get_canonical() const;
ClangCursor get_definition() const;
ClangCursor get_lexical_parent() const;
ClangCursor get_semantic_parent() const;
std::vector<ClangCursor> get_arguments() const;
bool is_valid_kind() const;
std::string get_type_description() const;
std::string ToString() const;
enum class VisitResult { Break, Continue, Recurse };
template <typename TClientData>
using Visitor = VisitResult (*)(ClangCursor cursor,
ClangCursor parent,
TClientData* client_data);
template <typename TClientData>
void VisitChildren(Visitor<TClientData> visitor,
TClientData* client_data) const {
clang_visitChildren(cx_cursor, reinterpret_cast<CXCursorVisitor>(visitor),
client_data);
}
CXCursor cx_cursor;
};
// Simple RAII wrapper about CXIndex. // Simple RAII wrapper about CXIndex.
// Note: building a ClangIndex instance acquires a global lock, since libclang // Note: building a ClangIndex instance acquires a global lock, since libclang
// API does not appear to be thread-safe here. // API does not appear to be thread-safe here.

View File

@ -270,6 +270,58 @@ class IndexDataConsumer : public index::IndexDataConsumer {
IndexParam& param; IndexParam& param;
llvm::DenseMap<const Decl*, Usr> Decl2usr; llvm::DenseMap<const Decl*, Usr> Decl2usr;
std::string GetComment(const Decl* D) {
SourceManager &SM = Ctx->getSourceManager();
const RawComment *RC = Ctx->getRawCommentForAnyRedecl(D);
if (!RC) return "";
StringRef Raw = RC->getRawText(Ctx->getSourceManager());
SourceRange R = RC->getSourceRange();
std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(R.getBegin());
unsigned start_column = SM.getLineNumber(BInfo.first, BInfo.second);
std::string ret;
int pad = -1;
for (const char *p = Raw.data(), *E = Raw.end(); p < E;) {
// The first line starts with a comment marker, but the rest needs
// un-indenting.
unsigned skip = start_column - 1;
for (; skip > 0 && p < E && (*p == ' ' || *p == '\t'); p++)
skip--;
const char *q = p;
while (q < E && *q != '\n')
q++;
if (q < E)
q++;
// A minimalist approach to skip Doxygen comment markers.
// See https://www.stack.nl/~dimitri/doxygen/manual/docblocks.html
if (pad < 0) {
// First line, detect the length of comment marker and put into |pad|
const char *begin = p;
while (p < E && (*p == '/' || *p == '*'))
p++;
if (p < E && (*p == '<' || *p == '!'))
p++;
if (p < E && *p == ' ')
p++;
pad = int(p - begin);
} else {
// Other lines, skip |pad| bytes
int prefix = pad;
while (prefix > 0 && p < E &&
(*p == ' ' || *p == '/' || *p == '*' || *p == '<' || *p == '!'))
prefix--, p++;
}
ret.insert(ret.end(), p, q);
p = q;
}
while (ret.size() && isspace(ret.back()))
ret.pop_back();
if (StringRef(ret).endswith("*/") || StringRef(ret).endswith("\n/"))
ret.resize(ret.size() - 2);
while (ret.size() && isspace(ret.back()))
ret.pop_back();
return ret;
}
Usr GetUsr(const Decl* D) { Usr GetUsr(const Decl* D) {
D = D->getCanonicalDecl(); D = D->getCanonicalDecl();
auto R = Decl2usr.try_emplace(D); auto R = Decl2usr.try_emplace(D);
@ -429,7 +481,7 @@ public:
if (!func->def.detailed_name[0]) { if (!func->def.detailed_name[0]) {
SetName(D, short_name, qualified, func->def); SetName(D, short_name, qualified, func->def);
if (g_config->index.comments) if (g_config->index.comments)
func->def.comments = Intern(""); func->def.comments = Intern(GetComment(D));
} }
if (is_def || (is_decl && !func->def.spell)) { if (is_def || (is_decl && !func->def.spell)) {
if (func->def.spell) if (func->def.spell)
@ -450,7 +502,7 @@ public:
if (!type->def.detailed_name[0]) { if (!type->def.detailed_name[0]) {
SetName(D, short_name, qualified, type->def); SetName(D, short_name, qualified, type->def);
if (g_config->index.comments) if (g_config->index.comments)
type->def.comments = Intern(""); type->def.comments = Intern(GetComment(D));
} }
if (is_def || (is_decl && !type->def.spell)) { if (is_def || (is_decl && !type->def.spell)) {
if (type->def.spell) if (type->def.spell)
@ -465,7 +517,7 @@ public:
if (!var->def.detailed_name[0]) { if (!var->def.detailed_name[0]) {
SetName(D, short_name, qualified, var->def); SetName(D, short_name, qualified, var->def);
if (g_config->index.comments) if (g_config->index.comments)
var->def.comments = Intern(""); var->def.comments = Intern(GetComment(D));
} }
if (is_def || (is_decl && !var->def.spell)) { if (is_def || (is_decl && !var->def.spell)) {
if (var->def.spell) if (var->def.spell)
@ -633,27 +685,6 @@ void Uniquify(std::vector<Use>& uses) {
uses.resize(n); uses.resize(n);
} }
void AddUse(IndexFile* db,
std::vector<Use>& uses,
Range range,
ClangCursor parent,
Role role = Role::Reference) {
// switch (GetSymbolKind(parent.get_kind())) {
// case SymbolKind::Func:
// uses.push_back(Use{
// {range, db->ToFunc(parent.cx_cursor).usr, SymbolKind::Func, role}});
// break;
// case SymbolKind::Type:
// uses.push_back(Use{
// {range, db->ToType(parent.cx_cursor).usr, SymbolKind::Type, role}});
// break;
// default:
// uses.push_back(Use{{range, 0, SymbolKind::File, role}});
// break;
// }
}
std::vector<std::unique_ptr<IndexFile>> ClangIndexer::Index( std::vector<std::unique_ptr<IndexFile>> ClangIndexer::Index(
VFS* vfs, VFS* vfs,
std::string file, std::string file,

View File

@ -1,6 +1,5 @@
#pragma once #pragma once
#include "clang_tu.h"
#include "clang_utils.h" #include "clang_utils.h"
#include "file_consumer.h" #include "file_consumer.h"
#include "language.h" #include "language.h"
@ -21,6 +20,8 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
using Usr = uint64_t;
struct SymbolIdx { struct SymbolIdx {
Usr usr; Usr usr;
SymbolKind kind; SymbolKind kind;
@ -279,9 +280,6 @@ struct IndexFile {
IndexFunc& ToFunc(Usr usr); IndexFunc& ToFunc(Usr usr);
IndexType& ToType(Usr usr); IndexType& ToType(Usr usr);
IndexVar& ToVar(Usr usr); IndexVar& ToVar(Usr usr);
IndexFunc& ToFunc(const ClangCursor& c) { return ToFunc(c.get_usr_hash()); }
IndexType& ToType(const ClangCursor& c) { return ToType(c.get_usr_hash()); }
IndexVar& ToVar(const ClangCursor& c) { return ToVar(c.get_usr_hash()); }
std::string ToString(); std::string ToString();
}; };
@ -304,7 +302,4 @@ struct ClangIndexer {
std::string file, std::string file,
const std::vector<std::string>& args, const std::vector<std::string>& args,
const std::vector<FileContents>& file_contents); const std::vector<FileContents>& file_contents);
// Note: constructing this acquires a global lock
ClangIndex index;
}; };