clang  9.0.0
BuildTree.cpp
Go to the documentation of this file.
1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 #include "clang/AST/Stmt.h"
11 #include "clang/Basic/LLVM.h"
14 #include "clang/Basic/TokenKinds.h"
15 #include "clang/Lex/Lexer.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/Allocator.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <map>
27 
28 using namespace clang;
29 
30 /// A helper class for constructing the syntax tree while traversing a clang
31 /// AST.
32 ///
33 /// At each point of the traversal we maintain a list of pending nodes.
34 /// Initially all tokens are added as pending nodes. When processing a clang AST
35 /// node, the clients need to:
36 /// - create a corresponding syntax node,
37 /// - assign roles to all pending child nodes with 'markChild' and
38 /// 'markChildToken',
39 /// - replace the child nodes with the new syntax node in the pending list
40 /// with 'foldNode'.
41 ///
42 /// Note that all children are expected to be processed when building a node.
43 ///
44 /// Call finalize() to finish building the tree and consume the root node.
46 public:
47  TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
48 
49  llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
50 
51  /// Populate children for \p New node, assuming it covers tokens from \p
52  /// Range.
53  void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
54 
55  /// Set role for a token starting at \p Loc.
56  void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
57 
58  /// Finish building the tree and consume the root node.
60  auto Tokens = Arena.tokenBuffer().expandedTokens();
61  // Build the root of the tree, consuming all the children.
62  Pending.foldChildren(Tokens,
63  new (Arena.allocator()) syntax::TranslationUnit);
64 
65  return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
66  }
67 
68  /// getRange() finds the syntax tokens corresponding to the passed source
69  /// locations.
70  /// \p First is the start position of the first token and \p Last is the start
71  /// position of the last token.
73  SourceLocation Last) const {
74  assert(First.isValid());
75  assert(Last.isValid());
76  assert(First == Last ||
77  Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
78  return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
79  }
81  return getRange(D->getBeginLoc(), D->getEndLoc());
82  }
84  return getRange(S->getBeginLoc(), S->getEndLoc());
85  }
86 
87 private:
88  /// Finds a token starting at \p L. The token must exist.
89  const syntax::Token *findToken(SourceLocation L) const;
90 
91  /// A collection of trees covering the input tokens.
92  /// When created, each tree corresponds to a single token in the file.
93  /// Clients call 'foldChildren' to attach one or more subtrees to a parent
94  /// node and update the list of trees accordingly.
95  ///
96  /// Ensures that added nodes properly nest and cover the whole token stream.
97  struct Forest {
98  Forest(syntax::Arena &A) {
99  // FIXME: do not add 'eof' to the tree.
100 
101  // Create all leaf nodes.
102  for (auto &T : A.tokenBuffer().expandedTokens())
103  Trees.insert(Trees.end(),
104  {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
105  }
106 
107  void assignRole(llvm::ArrayRef<syntax::Token> Range,
108  syntax::NodeRole Role) {
109  assert(!Range.empty());
110  auto It = Trees.lower_bound(Range.begin());
111  assert(It != Trees.end() && "no node found");
112  assert(It->first == Range.begin() && "no child with the specified range");
113  assert((std::next(It) == Trees.end() ||
114  std::next(It)->first == Range.end()) &&
115  "no child with the specified range");
116  It->second.Role = Role;
117  }
118 
119  /// Add \p Node to the forest and fill its children nodes based on the \p
120  /// NodeRange.
121  void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
122  syntax::Tree *Node) {
123  assert(!NodeTokens.empty());
124  assert(Node->firstChild() == nullptr && "node already has children");
125 
126  auto *FirstToken = NodeTokens.begin();
127  auto BeginChildren = Trees.lower_bound(FirstToken);
128  assert(BeginChildren != Trees.end() &&
129  BeginChildren->first == FirstToken &&
130  "fold crosses boundaries of existing subtrees");
131  auto EndChildren = Trees.lower_bound(NodeTokens.end());
132  assert((EndChildren == Trees.end() ||
133  EndChildren->first == NodeTokens.end()) &&
134  "fold crosses boundaries of existing subtrees");
135 
136  // (!) we need to go in reverse order, because we can only prepend.
137  for (auto It = EndChildren; It != BeginChildren; --It)
138  Node->prependChildLowLevel(std::prev(It)->second.Node,
139  std::prev(It)->second.Role);
140 
141  Trees.erase(BeginChildren, EndChildren);
142  Trees.insert({FirstToken, NodeAndRole(Node)});
143  }
144 
145  // EXPECTS: all tokens were consumed and are owned by a single root node.
146  syntax::Node *finalize() && {
147  assert(Trees.size() == 1);
148  auto *Root = Trees.begin()->second.Node;
149  Trees = {};
150  return Root;
151  }
152 
153  std::string str(const syntax::Arena &A) const {
154  std::string R;
155  for (auto It = Trees.begin(); It != Trees.end(); ++It) {
156  unsigned CoveredTokens =
157  It != Trees.end()
158  ? (std::next(It)->first - It->first)
159  : A.tokenBuffer().expandedTokens().end() - It->first;
160 
161  R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
162  It->second.Node->kind(),
163  It->first->text(A.sourceManager()), CoveredTokens);
164  R += It->second.Node->dump(A);
165  }
166  return R;
167  }
168 
169  private:
170  /// A with a role that should be assigned to it when adding to a parent.
171  struct NodeAndRole {
172  explicit NodeAndRole(syntax::Node *Node)
173  : Node(Node), Role(NodeRole::Unknown) {}
174 
176  NodeRole Role;
177  };
178 
179  /// Maps from the start token to a subtree starting at that token.
180  /// FIXME: storing the end tokens is redundant.
181  /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
182  std::map<const syntax::Token *, NodeAndRole> Trees;
183  };
184 
185  /// For debugging purposes.
186  std::string str() { return Pending.str(Arena); }
187 
188  syntax::Arena &Arena;
189  Forest Pending;
190 };
191 
192 namespace {
193 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
194 public:
195  explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
196  : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
197 
198  bool shouldTraversePostOrder() const { return true; }
199 
200  bool TraverseDecl(Decl *D) {
201  if (!D || isa<TranslationUnitDecl>(D))
203  if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
204  return true; // Only build top-level decls for now, do not recurse.
206  }
207 
208  bool VisitDecl(Decl *D) {
209  assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
210  "expected a top-level decl");
211  assert(!D->isImplicit());
212  Builder.foldNode(Builder.getRange(D),
213  new (allocator()) syntax::TopLevelDeclaration());
214  return true;
215  }
216 
217  bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
218  // (!) we do not want to call VisitDecl(), the declaration for translation
219  // unit is built by finalize().
220  return true;
221  }
222 
223  bool WalkUpFromCompoundStmt(CompoundStmt *S) {
224  using NodeRole = syntax::NodeRole;
225 
226  Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
228  Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
230 
231  Builder.foldNode(Builder.getRange(S),
232  new (allocator()) syntax::CompoundStatement);
233  return true;
234  }
235 
236 private:
237  /// A small helper to save some typing.
238  llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
239 
240  syntax::TreeBuilder &Builder;
241  const LangOptions &LangOpts;
242 };
243 } // namespace
244 
246  syntax::Tree *New) {
247  Pending.foldChildren(Range, New);
248 }
249 
251  tok::TokenKind Kind, NodeRole Role) {
252  if (Loc.isInvalid())
253  return;
254  Pending.assignRole(*findToken(Loc), Role);
255 }
256 
257 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
258  auto Tokens = Arena.tokenBuffer().expandedTokens();
259  auto &SM = Arena.sourceManager();
260  auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
261  return SM.isBeforeInTranslationUnit(T.location(), L);
262  });
263  assert(It != Tokens.end());
264  assert(It->location() == L);
265  return &*It;
266 }
267 
270  TreeBuilder Builder(A);
271  BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
272  return std::move(Builder).finalize();
273 }
SourceLocation getRBracLoc() const
Definition: Stmt.h:1418
Stmt - This represents one statement.
Definition: Stmt.h:66
Defines the SourceManager interface.
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:421
bool isBeforeInTranslationUnit(SourceLocation LHS, SourceLocation RHS) const
Determines the order of 2 source locations in the translation unit.
syntax::TranslationUnit * buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU)
Build a syntax tree for the main file.
Definition: BuildTree.cpp:269
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:425
void finalize(TemplateInstantiationCallbackPtrs &Callbacks, const Sema &TheSema)
void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R)
Set role for a token starting at Loc.
Definition: BuildTree.cpp:250
void foldNode(llvm::ArrayRef< syntax::Token > Range, syntax::Tree *New)
Populate children for New node, assuming it covers tokens from Range.
Definition: BuildTree.cpp:245
bool TraverseDecl(Decl *D)
Recursively visit a declaration, by dispatching to Traverse*Decl() based on the argument&#39;s dynamic ty...
llvm::BumpPtrAllocator & allocator()
Definition: BuildTree.cpp:49
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:154
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:263
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:49
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getLBracLoc() const
Definition: Stmt.h:1417
ASTContext & getASTContext() const
Definition: Decl.h:119
llvm::ArrayRef< syntax::Token > getRange(const Stmt *S) const
Definition: BuildTree.cpp:83
A class that does preorder or postorder depth-first traversal on the entire Clang AST and visits each...
CompoundStmt - This represents a group of statements like { stmt stmt }.
Definition: Stmt.h:1310
A memory arena for syntax trees.
Definition: Tree.h:39
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:558
DeclContext * getDeclContext()
Definition: DeclBase.h:438
const SourceManager & SM
Definition: Format.cpp:1572
SourceLocation getEndLoc() const LLVM_READONLY
Definition: Stmt.cpp:276
Kind
Encodes a location in the source.
A helper class for constructing the syntax tree while traversing a clang AST.
Definition: BuildTree.cpp:45
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
syntax::TranslationUnit * finalize() &&
Finish building the tree and consume the root node.
Definition: BuildTree.cpp:59
ast_type_traits::DynTypedNode Node
NodeRole
A relation between a parent and child node. Used for implementing accessors.
Definition: Nodes.h:35
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
SyntaxTree::Impl & Tree
Definition: ASTDiff.cpp:192
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
llvm::ArrayRef< syntax::Token > getRange(const Decl *D) const
Definition: BuildTree.cpp:80
The top declaration context.
Definition: Decl.h:107
TreeBuilder(syntax::Arena &Arena)
Definition: BuildTree.cpp:47
const LangOptions & getLangOpts() const
Definition: ASTContext.h:710
llvm::ArrayRef< syntax::Token > getRange(SourceLocation First, SourceLocation Last) const
getRange() finds the syntax tokens corresponding to the passed source locations.
Definition: BuildTree.cpp:72