From 9b54e5391c4063cabec78932a9dcddf994c10694 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 7 Jul 2018 15:25:25 -0700 Subject: [PATCH] Clean up clang_tu; retrieve comments with clang::ASTContext::getRawCommentForAnyRedecl --- src/clang_tu.cc | 198 ------------------------------------------------ src/clang_tu.h | 98 ------------------------ src/indexer.cc | 79 +++++++++++++------ src/indexer.h | 9 +-- 4 files changed, 57 insertions(+), 327 deletions(-) diff --git a/src/clang_tu.cc b/src/clang_tu.cc index 95e44c90..e98d0453 100644 --- a/src/clang_tu.cc +++ b/src/clang_tu.cc @@ -65,204 +65,6 @@ void EmitDiagnostics(std::string path, } } // namespace -Range ResolveCXSourceRange(const CXSourceRange& range, CXFile* cx_file) { - CXSourceLocation start = clang_getRangeStart(range); - CXSourceLocation end = clang_getRangeEnd(range); - - unsigned int start_line, start_column; - clang_getSpellingLocation(start, cx_file, &start_line, &start_column, - nullptr); - unsigned int end_line, end_column; - clang_getSpellingLocation(end, nullptr, &end_line, &end_column, nullptr); - - return Range{{int16_t(start_line - 1), (int16_t)(start_column - 1)}, - {int16_t(end_line - 1), int16_t(end_column - 1)}}; -} - -ClangCursor ClangType::get_declaration() const { - return clang_getTypeDeclaration(cx_type); -} - -std::string ClangType::get_usr() const { - return ClangCursor{clang_getTypeDeclaration(cx_type)}.get_usr(); -} - -Usr ClangType::get_usr_hash() const { - if (is_builtin()) - return static_cast(cx_type.kind); - return ClangCursor{clang_getTypeDeclaration(cx_type)}.get_usr_hash(); -} - -ClangType ClangType::get_canonical() const { - return clang_getCanonicalType(cx_type); -} - -ClangType ClangType::strip_qualifiers() const { - CXType cx = cx_type; - while (1) { - switch (cx.kind) { - default: - break; - case CXType_ConstantArray: - case CXType_DependentSizedArray: - case CXType_IncompleteArray: - case CXType_VariableArray: - cx = clang_getElementType(cx); - continue; - case CXType_BlockPointer: - case CXType_LValueReference: - case CXType_MemberPointer: - case CXType_ObjCObjectPointer: - case CXType_Pointer: - case CXType_RValueReference: - cx = clang_getPointeeType(cx); - continue; - } - break; - } - - return cx; -} - -std::string ClangType::get_spell_name() const { - return ToString(clang_getTypeSpelling(cx_type)); -} - -ClangType ClangType::get_return_type() const { - return clang_getResultType(cx_type); -} - -std::vector ClangType::get_arguments() const { - int size = clang_getNumArgTypes(cx_type); - if (size < 0) - return {}; - std::vector types(size); - for (int i = 0; i < size; ++i) - types.emplace_back(clang_getArgType(cx_type, i)); - return types; -} - -std::vector ClangType::get_template_arguments() const { - int size = clang_Type_getNumTemplateArguments(cx_type); - assert(size >= 0); - if (size < 0) - return std::vector(); - - std::vector types(size); - for (int i = 0; i < size; ++i) - types.emplace_back(clang_Type_getTemplateArgumentAsType(cx_type, i)); - return types; -} - -ClangType ClangCursor::get_type() const { - return {clang_getCursorType(cx_cursor)}; -} - -std::string ClangCursor::get_spell_name() const { - return ::ToString(clang_getCursorSpelling(cx_cursor)); -} - -Range ClangCursor::get_spell(CXFile* cx_file) const { - // TODO for Objective-C methods and Objective-C message expressions, there are - // multiple pieces for each selector identifier. - CXSourceRange range = clang_Cursor_getSpellingNameRange(cx_cursor, 0, 0); - return ResolveCXSourceRange(range, cx_file); -} - -Range ClangCursor::get_extent() const { - CXSourceRange range = clang_getCursorExtent(cx_cursor); - return ResolveCXSourceRange(range, nullptr); -} - -std::string ClangCursor::get_display_name() const { - return ::ToString(clang_getCursorDisplayName(cx_cursor)); -} - -std::string ClangCursor::get_usr() const { - return ::ToString(clang_getCursorUSR(cx_cursor)); -} - -Usr ClangCursor::get_usr_hash() const { - CXString usr = clang_getCursorUSR(cx_cursor); - Usr ret = HashUsr(clang_getCString(usr)); - clang_disposeString(usr); - return ret; -} - -std::optional ClangCursor::get_opt_usr_hash() const { - CXString usr = clang_getCursorUSR(cx_cursor); - const char* str = clang_getCString(usr); - if (!str || str[0] == '\0') { - clang_disposeString(usr); - return {}; - } - Usr ret = HashUsr(str); - clang_disposeString(usr); - return ret; -} - -bool ClangCursor::is_definition() const { - return clang_isCursorDefinition(cx_cursor); -} - -ClangCursor ClangCursor::template_specialization_to_template_definition() - const { - CXCursor definition = clang_getSpecializedCursorTemplate(cx_cursor); - if (definition.kind == CXCursor_FirstInvalid) - return cx_cursor; - return definition; -} - -ClangCursor ClangCursor::get_referenced() const { - return {clang_getCursorReferenced(cx_cursor)}; -} - -ClangCursor ClangCursor::get_canonical() const { - return {clang_getCanonicalCursor(cx_cursor)}; -} - -ClangCursor ClangCursor::get_definition() const { - return {clang_getCursorDefinition(cx_cursor)}; -} - -ClangCursor ClangCursor::get_lexical_parent() const { - return {clang_getCursorLexicalParent(cx_cursor)}; -} - -ClangCursor ClangCursor::get_semantic_parent() const { - return {clang_getCursorSemanticParent(cx_cursor)}; -} - -std::vector ClangCursor::get_arguments() const { - int size = clang_Cursor_getNumArguments(cx_cursor); - if (size < 0) - return std::vector(); - - std::vector cursors(size); - for (int i = 0; i < size; ++i) - cursors.emplace_back(clang_Cursor_getArgument(cx_cursor, i)); - return cursors; -} - -bool ClangCursor::is_valid_kind() const { - CXCursor referenced = clang_getCursorReferenced(cx_cursor); - if (clang_Cursor_isNull(referenced)) - return false; - - CXCursorKind kind = get_kind(); - return kind > CXCursor_UnexposedDecl && - (kind < CXCursor_FirstInvalid || kind > CXCursor_LastInvalid); -} - -std::string ClangCursor::get_type_description() const { - auto type = clang_getCursorType(cx_cursor); - return ::ToString(clang_getTypeSpelling(type)); -} - -std::string ClangCursor::ToString() const { - return ::ToString(get_kind()) + " " + get_spell_name(); -} - ClangIndex::ClangIndex() : ClangIndex(1, 0) {} ClangIndex::ClangIndex(int exclude_declarations_from_pch, diff --git a/src/clang_tu.h b/src/clang_tu.h index 03fbf993..b363c68b 100644 --- a/src/clang_tu.h +++ b/src/clang_tu.h @@ -7,104 +7,6 @@ #include #include -using Usr = uint64_t; - -Range ResolveCXSourceRange(const CXSourceRange& range, - CXFile* cx_file = nullptr); - -class ClangCursor; - -class ClangType { - public: - ClangType() = default; - ClangType(const CXType& cx) : cx_type(cx) {} - - // Returns true if this is a fundamental type like int. - bool is_builtin() const { - // NOTE: This will return false for pointed types. Should we call - // strip_qualifiers for the user? - return cx_type.kind >= CXType_FirstBuiltin && - cx_type.kind <= CXType_LastBuiltin; - } - - ClangCursor get_declaration() const; - std::string get_usr() const; - Usr get_usr_hash() const; - std::string get_spell_name() const; - ClangType get_canonical() const; - - // Try to resolve this type and remove qualifies, ie, Foo* will become Foo - ClangType strip_qualifiers() const; - - ClangType get_return_type() const; - std::vector get_arguments() const; - std::vector get_template_arguments() const; - - CXType cx_type; -}; - -class ClangCursor { - public: - ClangCursor() = default; - ClangCursor(CXCursor cx) : cx_cursor(cx) {} - bool operator==(const ClangCursor& o) const { - return clang_equalCursors(cx_cursor, o.cx_cursor); - } - bool operator!=(const ClangCursor& o) const { - return !(*this == o); - } - - CXCursorKind get_kind() const { - return cx_cursor.kind; - } - ClangType get_type() const; - std::string get_spell_name() const; - Range get_spell(CXFile* cx_file = nullptr) const; - Range get_extent() const; - std::string get_display_name() const; - std::string get_usr() const; - Usr get_usr_hash() const; - std::optional get_opt_usr_hash() const; - - bool is_definition() const; - - // If the given cursor points to a template specialization, this - // will return the cursor pointing to the template definition. - // If the given cursor is not a template specialization, this will - // just return the same cursor. - // - // This means it is always safe to call this method. - ClangCursor template_specialization_to_template_definition() const; - - ClangCursor get_referenced() const; - ClangCursor get_canonical() const; - ClangCursor get_definition() const; - ClangCursor get_lexical_parent() const; - ClangCursor get_semantic_parent() const; - std::vector get_arguments() const; - bool is_valid_kind() const; - - std::string get_type_description() const; - - std::string ToString() const; - - enum class VisitResult { Break, Continue, Recurse }; - - template - using Visitor = VisitResult (*)(ClangCursor cursor, - ClangCursor parent, - TClientData* client_data); - - template - void VisitChildren(Visitor visitor, - TClientData* client_data) const { - clang_visitChildren(cx_cursor, reinterpret_cast(visitor), - client_data); - } - - CXCursor cx_cursor; -}; - // Simple RAII wrapper about CXIndex. // Note: building a ClangIndex instance acquires a global lock, since libclang // API does not appear to be thread-safe here. diff --git a/src/indexer.cc b/src/indexer.cc index 9d6252c3..9e2a2034 100644 --- a/src/indexer.cc +++ b/src/indexer.cc @@ -270,6 +270,58 @@ class IndexDataConsumer : public index::IndexDataConsumer { IndexParam& param; llvm::DenseMap Decl2usr; + std::string GetComment(const Decl* D) { + SourceManager &SM = Ctx->getSourceManager(); + const RawComment *RC = Ctx->getRawCommentForAnyRedecl(D); + if (!RC) return ""; + StringRef Raw = RC->getRawText(Ctx->getSourceManager()); + SourceRange R = RC->getSourceRange(); + std::pair BInfo = SM.getDecomposedLoc(R.getBegin()); + unsigned start_column = SM.getLineNumber(BInfo.first, BInfo.second); + std::string ret; + int pad = -1; + for (const char *p = Raw.data(), *E = Raw.end(); p < E;) { + // The first line starts with a comment marker, but the rest needs + // un-indenting. + unsigned skip = start_column - 1; + for (; skip > 0 && p < E && (*p == ' ' || *p == '\t'); p++) + skip--; + const char *q = p; + while (q < E && *q != '\n') + q++; + if (q < E) + q++; + // A minimalist approach to skip Doxygen comment markers. + // See https://www.stack.nl/~dimitri/doxygen/manual/docblocks.html + if (pad < 0) { + // First line, detect the length of comment marker and put into |pad| + const char *begin = p; + while (p < E && (*p == '/' || *p == '*')) + p++; + if (p < E && (*p == '<' || *p == '!')) + p++; + if (p < E && *p == ' ') + p++; + pad = int(p - begin); + } else { + // Other lines, skip |pad| bytes + int prefix = pad; + while (prefix > 0 && p < E && + (*p == ' ' || *p == '/' || *p == '*' || *p == '<' || *p == '!')) + prefix--, p++; + } + ret.insert(ret.end(), p, q); + p = q; + } + while (ret.size() && isspace(ret.back())) + ret.pop_back(); + if (StringRef(ret).endswith("*/") || StringRef(ret).endswith("\n/")) + ret.resize(ret.size() - 2); + while (ret.size() && isspace(ret.back())) + ret.pop_back(); + return ret; + } + Usr GetUsr(const Decl* D) { D = D->getCanonicalDecl(); auto R = Decl2usr.try_emplace(D); @@ -429,7 +481,7 @@ public: if (!func->def.detailed_name[0]) { SetName(D, short_name, qualified, func->def); if (g_config->index.comments) - func->def.comments = Intern(""); + func->def.comments = Intern(GetComment(D)); } if (is_def || (is_decl && !func->def.spell)) { if (func->def.spell) @@ -450,7 +502,7 @@ public: if (!type->def.detailed_name[0]) { SetName(D, short_name, qualified, type->def); if (g_config->index.comments) - type->def.comments = Intern(""); + type->def.comments = Intern(GetComment(D)); } if (is_def || (is_decl && !type->def.spell)) { if (type->def.spell) @@ -465,7 +517,7 @@ public: if (!var->def.detailed_name[0]) { SetName(D, short_name, qualified, var->def); if (g_config->index.comments) - var->def.comments = Intern(""); + var->def.comments = Intern(GetComment(D)); } if (is_def || (is_decl && !var->def.spell)) { if (var->def.spell) @@ -633,27 +685,6 @@ void Uniquify(std::vector& uses) { uses.resize(n); } -void AddUse(IndexFile* db, - std::vector& uses, - Range range, - ClangCursor parent, - Role role = Role::Reference) { - // switch (GetSymbolKind(parent.get_kind())) { - // case SymbolKind::Func: - // uses.push_back(Use{ - // {range, db->ToFunc(parent.cx_cursor).usr, SymbolKind::Func, role}}); - // break; - // case SymbolKind::Type: - // uses.push_back(Use{ - // {range, db->ToType(parent.cx_cursor).usr, SymbolKind::Type, role}}); - // break; - // default: - // uses.push_back(Use{{range, 0, SymbolKind::File, role}}); - // break; - // } -} - - std::vector> ClangIndexer::Index( VFS* vfs, std::string file, diff --git a/src/indexer.h b/src/indexer.h index ec180cd1..441dc44c 100644 --- a/src/indexer.h +++ b/src/indexer.h @@ -1,6 +1,5 @@ #pragma once -#include "clang_tu.h" #include "clang_utils.h" #include "file_consumer.h" #include "language.h" @@ -21,6 +20,8 @@ #include #include +using Usr = uint64_t; + struct SymbolIdx { Usr usr; SymbolKind kind; @@ -279,9 +280,6 @@ struct IndexFile { IndexFunc& ToFunc(Usr usr); IndexType& ToType(Usr usr); IndexVar& ToVar(Usr usr); - IndexFunc& ToFunc(const ClangCursor& c) { return ToFunc(c.get_usr_hash()); } - IndexType& ToType(const ClangCursor& c) { return ToType(c.get_usr_hash()); } - IndexVar& ToVar(const ClangCursor& c) { return ToVar(c.get_usr_hash()); } std::string ToString(); }; @@ -304,7 +302,4 @@ struct ClangIndexer { std::string file, const std::vector& args, const std::vector& file_contents); - - // Note: constructing this acquires a global lock - ClangIndex index; };