Add --use-clang-cxx to waf and move type printer to src/type_printer.cc

src/type_printer.cc demonstrates how to leverage clang C++ API.
It includes clang/**/*.h headers and CXTranslationUnit.h which resides
in clang/tools/libclang/ but is not exposed in clang/include/clang-c/

These libclang/ header files are stable and provide some struct definitions used by clang-c/ header files.
A copy of them from clang source tree is checked into libclang/ .

If you want to an early adopter,
CXXFLAGS='-std=c++11 -fno-rtti -D_GLIBCXX_USE_CXX11_ABI=0 -g' ./waf configure --variant=cxx-debug --bundled-clang=5.0.1 --use-clang-cxx

Some clang/**/*.h are unstable and should be used with caution,
in addition, Windows pre-built binaries on https://releases.llvm.org/download.html do not include C++ header files.
Thus `--use-clang-cxx` defaults to False.
This commit is contained in:
Fangrui Song 2017-12-29 01:19:02 -08:00
parent 59e79b3c42
commit d4ce12bb9a
5 changed files with 240 additions and 162 deletions

View File

@ -5,6 +5,7 @@
#include "platform.h"
#include "serializer.h"
#include "timer.h"
#include "type_printer.h"
#include <loguru.hpp>
@ -70,66 +71,6 @@ bool IsScopeSemanticContainer(CXCursorKind kind) {
}
}
struct NamespaceHelper {
std::unordered_map<ClangCursor, std::string>
container_cursor_to_qualified_name;
void RegisterQualifiedName(std::string usr,
const CXIdxContainerInfo* container,
std::string qualified_name) {}
std::string QualifiedName(const CXIdxContainerInfo* container,
std::string unqualified_name) {
if (!container)
return unqualified_name;
// Anonymous namespaces are not processed by indexDeclaration. We trace
// nested namespaces bottom-up through clang_getCursorSemanticParent until
// one that we know its qualified name. Then do another trace top-down and
// put their names into a map of USR -> qualified_name.
ClangCursor cursor = container->cursor;
std::vector<ClangCursor> namespaces;
std::string qualifier;
while (cursor.get_kind() != CXCursor_TranslationUnit &&
!IsScopeSemanticContainer(cursor.get_kind())) {
auto it = container_cursor_to_qualified_name.find(cursor);
if (it != container_cursor_to_qualified_name.end()) {
qualifier = it->second;
break;
}
namespaces.push_back(cursor);
cursor = clang_getCursorSemanticParent(cursor.cx_cursor);
}
for (size_t i = namespaces.size(); i > 0;) {
i--;
std::string name = namespaces[i].get_spelling();
// Empty name indicates unnamed namespace, anonymous struct, anonymous
// union, ...
if (name.size())
qualifier += name;
else
switch (namespaces[i].get_kind()) {
case CXCursor_ClassDecl:
qualifier += "(anon class)";
break;
case CXCursor_EnumDecl:
qualifier += "(anon enum)";
break;
case CXCursor_StructDecl:
qualifier += "(anon struct)";
break;
case CXCursor_UnionDecl:
qualifier += "(anon union)";
break;
default:
qualifier += "(anon)";
break;
}
qualifier += "::";
container_cursor_to_qualified_name[namespaces[i]] = qualifier;
}
return qualifier + unqualified_name;
}
};
// Caches all instances of constructors, regardless if they are indexed or not.
// The constructor may have a make_unique call associated with it that we need
@ -1113,111 +1054,60 @@ ClangCursor::VisitResult TemplateVisitor(ClangCursor cursor,
return ClangCursor::VisitResult::Continue;
}
// Build a detailed function signature, including argument names.
std::string GetFunctionSignature(IndexFile* db,
NamespaceHelper* ns,
const CXIdxDeclInfo* decl) {
// Build the function name, with scope and parameters
std::string type_desc = ClangCursor(decl->cursor).get_type_description();
int num_args = clang_Cursor_getNumArguments(decl->cursor);
std::string function_name =
ns->QualifiedName(decl->semanticContainer, decl->entityInfo->name);
} // namespace
std::vector<std::pair<int, std::string>> args;
for (int i = 0; i < num_args; i++) {
args.emplace_back(-1, ::ToString(clang_getCursorDisplayName(
clang_Cursor_getArgument(decl->cursor, i))));
}
if (clang_Cursor_isVariadic(decl->cursor)) {
args.emplace_back(-1, "");
num_args++;
}
int function_name_offset = -1;
// Scan the function type backwards.
// First pass: find the position of the closing bracket in the type.
for (int balance = 0, i = int(type_desc.size()); i--;) {
if (type_desc[i] == ')')
balance++;
// Balanced paren pair that may appear before the paren enclosing
// function parameters, see clang/lib/AST/TypePrinter.cpp
else if (type_desc[i] == '(' && --balance == 0 &&
!((i >= 5 && !type_desc.compare(i - 5, 5, "throw")) ||
(i >= 6 && !type_desc.compare(i - 6, 6, "typeof")) ||
(i >= 7 && !type_desc.compare(i - 7, 7, "_Atomic")) ||
(i >= 7 && !type_desc.compare(i - 7, 7, "typeof ")) ||
(i >= 8 && !type_desc.compare(i - 8, 8, "decltype")) ||
(i >= 8 && !type_desc.compare(i - 8, 8, "noexcept")) ||
(i >= 13 && !type_desc.compare(i - 13, 13, "__attribute__")))) {
// Do not bother with function types which return function pointers.
if (type_desc.find("(*") >= std::string::size_type(i))
function_name_offset = i;
std::string NamespaceHelper::QualifiedName(const CXIdxContainerInfo* container,
std::string unqualified_name) {
if (!container)
return unqualified_name;
// Anonymous namespaces are not processed by indexDeclaration. We trace
// nested namespaces bottom-up through clang_getCursorSemanticParent until
// one that we know its qualified name. Then do another trace top-down and
// put their names into a map of USR -> qualified_name.
ClangCursor cursor = container->cursor;
std::vector<ClangCursor> namespaces;
std::string qualifier;
while (cursor.get_kind() != CXCursor_TranslationUnit &&
!IsScopeSemanticContainer(cursor.get_kind())) {
auto it = container_cursor_to_qualified_name.find(cursor);
if (it != container_cursor_to_qualified_name.end()) {
qualifier = it->second;
break;
}
namespaces.push_back(cursor);
cursor = clang_getCursorSemanticParent(cursor.cx_cursor);
}
if (function_name_offset >= 0) {
if (num_args > 0) {
// Find positions to insert argument names.
// Last argument name is before ')'
num_args = 0;
// Other argument names come before ','
for (int balance = 0, i = function_name_offset;
i < int(type_desc.size()) && num_args < int(args.size()); i++) {
if (type_desc[i] == '(' || type_desc[i] == '<' || type_desc[i] == '[')
balance++;
else if (type_desc[i] == ')' || type_desc[i] == '>' ||
type_desc[i] == ']') {
if (--balance <= 0) {
args[num_args].first = i;
break;
}
} else if (type_desc[i] == ',' && balance == 1)
args[num_args++].first = i;
}
// Second pass: Insert argument names before each comma.
int i = 0;
std::string type_desc_with_names;
for (auto& arg : args) {
if (arg.first < 0) {
LOG_S(ERROR)
<< "When adding argument names to '" << type_desc
<< "', failed to detect positions to insert argument names";
for (size_t i = namespaces.size(); i > 0;) {
i--;
std::string name = namespaces[i].get_spelling();
// Empty name indicates unnamed namespace, anonymous struct, anonymous
// union, ...
if (name.size())
qualifier += name;
else
switch (namespaces[i].get_kind()) {
case CXCursor_ClassDecl:
qualifier += "(anon class)";
break;
case CXCursor_EnumDecl:
qualifier += "(anon enum)";
break;
case CXCursor_StructDecl:
qualifier += "(anon struct)";
break;
case CXCursor_UnionDecl:
qualifier += "(anon union)";
break;
default:
qualifier += "(anon)";
break;
}
if (arg.second.empty())
continue;
type_desc_with_names.insert(type_desc_with_names.end(), &type_desc[i],
&type_desc[arg.first]);
i = arg.first;
if (type_desc_with_names.size() &&
(type_desc_with_names.back() != ' ' &&
type_desc_with_names.back() != '*' &&
type_desc_with_names.back() != '&'))
type_desc_with_names.push_back(' ');
type_desc_with_names.append(arg.second);
}
type_desc_with_names.insert(type_desc_with_names.end(),
type_desc.begin() + i, type_desc.end());
type_desc = std::move(type_desc_with_names);
}
// TODO auto f() -> int(*)() ; int(*f())()
type_desc.insert(function_name_offset, function_name);
} else {
// type_desc is either a typedef, or some complicated type we cannot handle.
// Append the function_name in this case.
type_desc.push_back(' ');
type_desc.append(function_name);
qualifier += "::";
container_cursor_to_qualified_name[namespaces[i]] = qualifier;
}
return type_desc;
return qualifier + unqualified_name;
}
} // namespace
void OnIndexDeclaration(CXClientData client_data, const CXIdxDeclInfo* decl) {
if (!kIndexStdDeclarations &&
clang_Location_isInSystemHeader(

View File

@ -544,6 +544,18 @@ struct FileContentsWithOffsets {
optional<std::string> ContentsInRange(Range range) const;
};
struct NamespaceHelper {
std::unordered_map<ClangCursor, std::string>
container_cursor_to_qualified_name;
void RegisterQualifiedName(std::string usr,
const CXIdxContainerInfo* container,
std::string qualified_name) {}
std::string QualifiedName(const CXIdxContainerInfo* container,
std::string unqualified_name);
};
// |import_file| is the cc file which is what gets passed to clang.
// |desired_index_file| is the (h or cc) file which has actually changed.
// |dependencies| are the existing dependencies of |import_file| if this is a

167
src/type_printer.cc Normal file
View File

@ -0,0 +1,167 @@
#include "type_printer.h"
#include <string>
#include "loguru.hpp"
#if USE_CLANG_CXX
# include "CXTranslationUnit.h"
# include "clang/AST/Type.h"
# include "clang/AST/PrettyPrinter.h"
# include "clang/Frontend/ASTUnit.h"
# include "llvm/ADT/SmallString.h"
# include "llvm/Support/raw_ostream.h"
using namespace clang;
// Extracted from clang/tools/libclang/CXType.cpp
static inline QualType GetQualType(CXType CT) {
return QualType::getFromOpaquePtr(CT.data[0]);
}
static inline CXTranslationUnit GetTU(CXType CT) {
return static_cast<CXTranslationUnit>(CT.data[1]);
}
#endif
namespace {
// TODO int(*sig(int(*)(int)))(int); is incorrect
// int(A::*(*y())(int))() is correct
int GetNameInsertingPosition(const std::string& type_desc) {
int ret = -1;
// Scan the function type backwards.
// First pass: find the position of the closing bracket in the type.
for (int balance = 0, i = int(type_desc.size()); i--;) {
if (type_desc[i] == ')')
balance++;
// Balanced paren pair that may appear before the paren enclosing
// function parameters, see clang/lib/AST/TypePrinter.cpp
else if (type_desc[i] == '(' && --balance == 0 &&
!((i >= 5 && !type_desc.compare(i - 5, 5, "throw")) ||
(i >= 6 && !type_desc.compare(i - 6, 6, "typeof")) ||
(i >= 7 && !type_desc.compare(i - 7, 7, "_Atomic")) ||
(i >= 7 && !type_desc.compare(i - 7, 7, "typeof ")) ||
(i >= 8 && !type_desc.compare(i - 8, 8, "decltype")) ||
(i >= 8 && !type_desc.compare(i - 8, 8, "noexcept")) ||
(i >= 13 && !type_desc.compare(i - 13, 13, "__attribute__")))) {
// Do not bother with function types which return function pointers.
if (type_desc.find("(*") >= std::string::size_type(i) &&
type_desc.find("(&") >= std::string::size_type(i))
ret = i;
break;
}
}
return ret;
}
}
// Build a detailed function signature, including argument names.
std::string GetFunctionSignature(IndexFile* db,
NamespaceHelper* ns,
const CXIdxDeclInfo* decl) {
int num_args = clang_Cursor_getNumArguments(decl->cursor);
std::string function_name =
ns->QualifiedName(decl->semanticContainer, decl->entityInfo->name);
std::vector<std::pair<int, std::string>> args;
for (int i = 0; i < num_args; i++) {
args.emplace_back(-1, ::ToString(clang_getCursorDisplayName(
clang_Cursor_getArgument(decl->cursor, i))));
}
if (clang_Cursor_isVariadic(decl->cursor)) {
args.emplace_back(-1, "");
num_args++;
}
std::string type_desc;
int function_name_offset;
#if USE_CLANG_CXX
{
CXType CT = clang_getCursorType(decl->cursor);
QualType T = GetQualType(CT);
if (!T.isNull()) {
CXTranslationUnit TU = GetTU(CT);
SmallString<64> Str;
llvm::raw_svector_ostream OS(Str);
PrintingPolicy PP(cxtu::getASTUnit(TU)->getASTContext().getLangOpts());
T.print(OS, PP, "=^_^=");
type_desc = OS.str();
function_name_offset = type_desc.find("=^_^=");
if (type_desc[function_name_offset + 5] != ')')
type_desc = type_desc.replace(function_name_offset, 5, "");
else {
type_desc = type_desc.replace(function_name_offset, 6, "");
for (int i = function_name_offset; i-- > 0; )
if (type_desc[i] == '(') {
type_desc.erase(type_desc.begin() + i);
break;
}
function_name_offset--;
}
}
}
#else
type_desc = ClangCursor(decl->cursor).get_type_description();
function_name_offset = GetNameInsertingPosition(type_desc);
#endif
if (function_name_offset >= 0) {
if (num_args > 0) {
// Find positions to insert argument names.
// Last argument name is before ')'
num_args = 0;
// Other argument names come before ','
for (int balance = 0, i = function_name_offset;
i < int(type_desc.size()) && num_args < int(args.size()); i++) {
if (type_desc[i] == '(' || type_desc[i] == '[')
balance++;
else if (type_desc[i] == ')' || type_desc[i] == ']') {
if (--balance <= 0) {
args[num_args].first = i;
break;
}
} else if (type_desc[i] == ',' && balance == 1)
args[num_args++].first = i;
}
// Second pass: Insert argument names before each comma.
int i = 0;
std::string type_desc_with_names;
for (auto& arg : args) {
if (arg.first < 0) {
LOG_S(ERROR)
<< "When adding argument names to '" << type_desc
<< "', failed to detect positions to insert argument names";
break;
}
if (arg.second.empty())
continue;
// TODO Use inside-out syntax. Note, clang/lib/AST/TypePrinter.cpp does
// not print arg names.
type_desc_with_names.insert(type_desc_with_names.end(), &type_desc[i],
&type_desc[arg.first]);
i = arg.first;
if (type_desc_with_names.size() &&
(type_desc_with_names.back() != ' ' &&
type_desc_with_names.back() != '*' &&
type_desc_with_names.back() != '&'))
type_desc_with_names.push_back(' ');
type_desc_with_names.append(arg.second);
}
type_desc_with_names.insert(type_desc_with_names.end(),
type_desc.begin() + i, type_desc.end());
type_desc = std::move(type_desc_with_names);
}
// TODO auto f() -> int(*)() ; int(*f())()
type_desc.insert(function_name_offset, function_name);
} else {
// type_desc is either a typedef, or some complicated type we cannot handle.
// Append the function_name in this case.
type_desc.push_back(' ');
type_desc.append(function_name);
}
return type_desc;
}

7
src/type_printer.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include "indexer.h"
std::string GetFunctionSignature(IndexFile* db,
NamespaceHelper* ns,
const CXIdxDeclInfo* decl);

14
wscript
View File

@ -226,7 +226,7 @@ def configure(ctx):
def build(bld):
cc_files = bld.path.ant_glob(['src/*.cc', 'src/messages/*.cc'])
if bld.env['use_clang_cxx']:
cc_files += bld.path.ant_glob(['src/cxx/*.cc'])
cc_files += bld.path.ant_glob(['src/clang_cxx/*.cc'])
lib = []
if sys.platform.startswith('linux'):
@ -326,17 +326,19 @@ def build(bld):
source=cc_files,
use='clang',
includes=[
#'libclang/',
'src/',
'third_party/',
'third_party/doctest/',
'third_party/loguru/',
'third_party/rapidjson/include/',
'third_party/sparsepp/'],
defines=[#'_GLIBCXX_USE_CXX11_ABI=0', 'clang+llvm-$version-x86_64-linux-gnu-ubuntu-14.04' is pre CXX11_ABI
'third_party/sparsepp/'] +
(['libclang'] if bld.env['use_clang_cxx'] else []),
defines=[
#'_GLIBCXX_USE_CXX11_ABI=0', 'clang+llvm-$version-x86_64-linux-gnu-ubuntu-14.04' is pre CXX11_ABI
#'LOGURU_STACKTRACES=0',
'LOGURU_WITH_STREAMS=1',
'DEFAULT_RESOURCE_DIRECTORY="' + default_resource_directory + '"'],
'LOGURU_WITH_STREAMS=1',
'DEFAULT_RESOURCE_DIRECTORY="' + default_resource_directory + '"'] +
(['USE_CLANG_CXX=1'] if bld.env['use_clang_cxx'] else []),
lib=lib,
rpath=rpath,
target='bin/cquery')