LLVM  13.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/MDBuilder.h"
24 #include "llvm/Support/Error.h"
27 
28 #include <sstream>
29 
30 #define DEBUG_TYPE "openmp-ir-builder"
31 
32 using namespace llvm;
33 using namespace omp;
34 
35 static cl::opt<bool>
36  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
37  cl::desc("Use optimistic attributes describing "
38  "'as-if' properties of runtime calls."),
39  cl::init(false));
40 
42  LLVMContext &Ctx = Fn.getContext();
43 
44  // Get the function's current attributes.
45  auto Attrs = Fn.getAttributes();
46  auto FnAttrs = Attrs.getFnAttributes();
47  auto RetAttrs = Attrs.getRetAttributes();
49  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
50  ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo));
51 
52 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
53 #include "llvm/Frontend/OpenMP/OMPKinds.def"
54 
55  // Add attributes to the function declaration.
56  switch (FnID) {
57 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
58  case Enum: \
59  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
60  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
61  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
62  ArgAttrs[ArgNo] = \
63  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
64  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
65  break;
66 #include "llvm/Frontend/OpenMP/OMPKinds.def"
67  default:
68  // Attributes are optional.
69  break;
70  }
71 }
72 
75  FunctionType *FnTy = nullptr;
76  Function *Fn = nullptr;
77 
78  // Try to find the declation in the module first.
79  switch (FnID) {
80 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
81  case Enum: \
82  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
83  IsVarArg); \
84  Fn = M.getFunction(Str); \
85  break;
86 #include "llvm/Frontend/OpenMP/OMPKinds.def"
87  }
88 
89  if (!Fn) {
90  // Create a new declaration if we need one.
91  switch (FnID) {
92 #define OMP_RTL(Enum, Str, ...) \
93  case Enum: \
94  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
95  break;
96 #include "llvm/Frontend/OpenMP/OMPKinds.def"
97  }
98 
99  // Add information if the runtime function takes a callback function
100  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
101  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
102  LLVMContext &Ctx = Fn->getContext();
103  MDBuilder MDB(Ctx);
104  // Annotate the callback behavior of the runtime function:
105  // - The callback callee is argument number 2 (microtask).
106  // - The first two arguments of the callback callee are unknown (-1).
107  // - All variadic arguments to the runtime function are passed to the
108  // callback callee.
109  Fn->addMetadata(
110  LLVMContext::MD_callback,
112  2, {-1, -1}, /* VarArgsArePassed */ true)}));
113  }
114  }
115 
116  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
117  << " with type " << *Fn->getFunctionType() << "\n");
118  addAttributes(FnID, *Fn);
119 
120  } else {
121  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
122  << " with type " << *Fn->getFunctionType() << "\n");
123  }
124 
125  assert(Fn && "Failed to create OpenMP runtime function");
126 
127  // Cast the function to the expected type if necessary
129  return {FnTy, C};
130 }
131 
133  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
134  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
135  assert(Fn && "Failed to create OpenMP runtime function pointer");
136  return Fn;
137 }
138 
139 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
140 
141 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
142  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
144  SmallVector<OutlineInfo, 16> DeferredOutlines;
145  for (OutlineInfo &OI : OutlineInfos) {
146  // Skip functions that have not finalized yet; may happen with nested
147  // function generation.
148  if (Fn && OI.getFunction() != Fn) {
149  DeferredOutlines.push_back(OI);
150  continue;
151  }
152 
153  ParallelRegionBlockSet.clear();
154  Blocks.clear();
155  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
156 
157  Function *OuterFn = OI.getFunction();
158  CodeExtractorAnalysisCache CEAC(*OuterFn);
159  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
160  /* AggregateArgs */ false,
161  /* BlockFrequencyInfo */ nullptr,
162  /* BranchProbabilityInfo */ nullptr,
163  /* AssumptionCache */ nullptr,
164  /* AllowVarArgs */ true,
165  /* AllowAlloca */ true,
166  /* Suffix */ ".omp_par");
167 
168  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
169  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
170  << " Exit: " << OI.ExitBB->getName() << "\n");
171  assert(Extractor.isEligible() &&
172  "Expected OpenMP outlining to be possible!");
173 
174  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
175 
176  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
177  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
178  assert(OutlinedFn->getReturnType()->isVoidTy() &&
179  "OpenMP outlined functions should not return a value!");
180 
181  // For compability with the clang CG we move the outlined function after the
182  // one with the parallel region.
183  OutlinedFn->removeFromParent();
184  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
185 
186  // Remove the artificial entry introduced by the extractor right away, we
187  // made our own entry block after all.
188  {
189  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
190  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
191  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
192  if (AllowExtractorSinking) {
193  // Move instructions from the to-be-deleted ArtificialEntry to the entry
194  // basic block of the parallel region. CodeExtractor may have sunk
195  // allocas/bitcasts for values that are solely used in the outlined
196  // region and do not escape.
197  assert(!ArtificialEntry.empty() &&
198  "Expected instructions to sink in the outlined region");
199  for (BasicBlock::iterator It = ArtificialEntry.begin(),
200  End = ArtificialEntry.end();
201  It != End;) {
202  Instruction &I = *It;
203  It++;
204 
205  if (I.isTerminator())
206  continue;
207 
208  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
209  }
210  }
211  OI.EntryBB->moveBefore(&ArtificialEntry);
212  ArtificialEntry.eraseFromParent();
213  }
214  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
215  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
216 
217  // Run a user callback, e.g. to add attributes.
218  if (OI.PostOutlineCB)
219  OI.PostOutlineCB(*OutlinedFn);
220  }
221 
222  // Remove work items that have been completed.
223  OutlineInfos = std::move(DeferredOutlines);
224 }
225 
227  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
228 }
229 
231  IdentFlag LocFlags,
232  unsigned Reserve2Flags) {
233  // Enable "C-mode".
234  LocFlags |= OMP_IDENT_FLAG_KMPC;
235 
236  Value *&Ident =
237  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
238  if (!Ident) {
239  Constant *I32Null = ConstantInt::getNullValue(Int32);
240  Constant *IdentData[] = {
241  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
242  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
243  Constant *Initializer = ConstantStruct::get(
244  cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
245 
246  // Look for existing encoding of the location + flags, not needed but
247  // minimizes the difference to the existing solution while we transition.
248  for (GlobalVariable &GV : M.getGlobalList())
249  if (GV.getType() == IdentPtr && GV.hasInitializer())
250  if (GV.getInitializer() == Initializer)
251  return Ident = &GV;
252 
253  auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
254  /* isConstant = */ true,
255  GlobalValue::PrivateLinkage, Initializer);
256  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
257  GV->setAlignment(Align(8));
258  Ident = GV;
259  }
260  return Builder.CreatePointerCast(Ident, IdentPtr);
261 }
262 
264  LLVMContext &Ctx = M.getContext();
265  Triple triple(M.getTargetTriple());
266 
267  // This test is adequate until deviceRTL has finer grained lane widths
268  return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
269 }
270 
272  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
273  if (!SrcLocStr) {
274  Constant *Initializer =
275  ConstantDataArray::getString(M.getContext(), LocStr);
276 
277  // Look for existing encoding of the location, not needed but minimizes the
278  // difference to the existing solution while we transition.
279  for (GlobalVariable &GV : M.getGlobalList())
280  if (GV.isConstant() && GV.hasInitializer() &&
281  GV.getInitializer() == Initializer)
282  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
283 
284  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
285  /* AddressSpace */ 0, &M);
286  }
287  return SrcLocStr;
288 }
289 
291  StringRef FileName,
292  unsigned Line,
293  unsigned Column) {
294  SmallString<128> Buffer;
295  Buffer.push_back(';');
296  Buffer.append(FileName);
297  Buffer.push_back(';');
298  Buffer.append(FunctionName);
299  Buffer.push_back(';');
300  Buffer.append(std::to_string(Line));
301  Buffer.push_back(';');
302  Buffer.append(std::to_string(Column));
303  Buffer.push_back(';');
304  Buffer.push_back(';');
305  return getOrCreateSrcLocStr(Buffer.str());
306 }
307 
309  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
310 }
311 
312 Constant *
314  DILocation *DIL = Loc.DL.get();
315  if (!DIL)
316  return getOrCreateDefaultSrcLocStr();
317  StringRef FileName = M.getName();
318  if (DIFile *DIF = DIL->getFile())
319  if (Optional<StringRef> Source = DIF->getSource())
320  FileName = *Source;
321  StringRef Function = DIL->getScope()->getSubprogram()->getName();
322  Function =
323  !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
324  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
325  DIL->getColumn());
326 }
327 
329  return Builder.CreateCall(
330  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
331  "omp_global_thread_num");
332 }
333 
336  bool ForceSimpleCall, bool CheckCancelFlag) {
337  if (!updateToLocation(Loc))
338  return Loc.IP;
339  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
340 }
341 
344  bool ForceSimpleCall, bool CheckCancelFlag) {
345  // Build call __kmpc_cancel_barrier(loc, thread_id) or
346  // __kmpc_barrier(loc, thread_id);
347 
348  IdentFlag BarrierLocFlags;
349  switch (Kind) {
350  case OMPD_for:
351  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
352  break;
353  case OMPD_sections:
354  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
355  break;
356  case OMPD_single:
357  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
358  break;
359  case OMPD_barrier:
360  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
361  break;
362  default:
363  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
364  break;
365  }
366 
367  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
368  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
369  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
370 
371  // If we are in a cancellable parallel region, barriers are cancellation
372  // points.
373  // TODO: Check why we would force simple calls or to ignore the cancel flag.
374  bool UseCancelBarrier =
375  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
376 
377  Value *Result =
378  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
379  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
380  : OMPRTL___kmpc_barrier),
381  Args);
382 
383  if (UseCancelBarrier && CheckCancelFlag)
384  emitCancelationCheckImpl(Result, OMPD_parallel);
385 
386  return Builder.saveIP();
387 }
388 
391  Value *IfCondition,
392  omp::Directive CanceledDirective) {
393  if (!updateToLocation(Loc))
394  return Loc.IP;
395 
396  // LLVM utilities like blocks with terminators.
397  auto *UI = Builder.CreateUnreachable();
398 
399  Instruction *ThenTI = UI, *ElseTI = nullptr;
400  if (IfCondition)
401  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
402  Builder.SetInsertPoint(ThenTI);
403 
404  Value *CancelKind = nullptr;
405  switch (CanceledDirective) {
406 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
407  case DirectiveEnum: \
408  CancelKind = Builder.getInt32(Value); \
409  break;
410 #include "llvm/Frontend/OpenMP/OMPKinds.def"
411  default:
412  llvm_unreachable("Unknown cancel kind!");
413  }
414 
415  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
416  Value *Ident = getOrCreateIdent(SrcLocStr);
417  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
418  Value *Result = Builder.CreateCall(
419  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
420  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
421  if (CanceledDirective == OMPD_parallel) {
423  Builder.restoreIP(IP);
424  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
425  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
426  /* CheckCancelFlag */ false);
427  }
428  };
429 
430  // The actual cancel logic is shared with others, e.g., cancel_barriers.
431  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
432 
433  // Update the insertion point and remove the terminator we introduced.
434  Builder.SetInsertPoint(UI->getParent());
435  UI->eraseFromParent();
436 
437  return Builder.saveIP();
438 }
439 
441  omp::Directive CanceledDirective,
442  FinalizeCallbackTy ExitCB) {
443  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
444  "Unexpected cancellation!");
445 
446  // For a cancel barrier we create two new blocks.
447  BasicBlock *BB = Builder.GetInsertBlock();
448  BasicBlock *NonCancellationBlock;
449  if (Builder.GetInsertPoint() == BB->end()) {
450  // TODO: This branch will not be needed once we moved to the
451  // OpenMPIRBuilder codegen completely.
452  NonCancellationBlock = BasicBlock::Create(
453  BB->getContext(), BB->getName() + ".cont", BB->getParent());
454  } else {
455  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
456  BB->getTerminator()->eraseFromParent();
457  Builder.SetInsertPoint(BB);
458  }
459  BasicBlock *CancellationBlock = BasicBlock::Create(
460  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
461 
462  // Jump to them based on the return value.
463  Value *Cmp = Builder.CreateIsNull(CancelFlag);
464  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
465  /* TODO weight */ nullptr, nullptr);
466 
467  // From the cancellation block we finalize all variables and go to the
468  // post finalization block that is known to the FiniCB callback.
469  Builder.SetInsertPoint(CancellationBlock);
470  if (ExitCB)
471  ExitCB(Builder.saveIP());
472  auto &FI = FinalizationStack.back();
473  FI.FiniCB(Builder.saveIP());
474 
475  // The continuation block is where code generation continues.
476  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
477 }
478 
480  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
481  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
482  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
483  omp::ProcBindKind ProcBind, bool IsCancellable) {
484  if (!updateToLocation(Loc))
485  return Loc.IP;
486 
487  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
488  Value *Ident = getOrCreateIdent(SrcLocStr);
489  Value *ThreadID = getOrCreateThreadID(Ident);
490 
491  if (NumThreads) {
492  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
493  Value *Args[] = {
494  Ident, ThreadID,
495  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
496  Builder.CreateCall(
497  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
498  }
499 
500  if (ProcBind != OMP_PROC_BIND_default) {
501  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
502  Value *Args[] = {
503  Ident, ThreadID,
504  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
505  Builder.CreateCall(
506  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
507  }
508 
509  BasicBlock *InsertBB = Builder.GetInsertBlock();
510  Function *OuterFn = InsertBB->getParent();
511 
512  // Save the outer alloca block because the insertion iterator may get
513  // invalidated and we still need this later.
514  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
515 
516  // Vector to remember instructions we used only during the modeling but which
517  // we want to delete at the end.
518  SmallVector<Instruction *, 4> ToBeDeleted;
519 
520  // Change the location to the outer alloca insertion point to create and
521  // initialize the allocas we pass into the parallel region.
522  Builder.restoreIP(OuterAllocaIP);
523  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
524  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
525 
526  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
527  // program, otherwise we only need them for modeling purposes to get the
528  // associated arguments in the outlined function. In the former case,
529  // initialize the allocas properly, in the latter case, delete them later.
530  if (IfCondition) {
531  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
532  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
533  } else {
534  ToBeDeleted.push_back(TIDAddr);
535  ToBeDeleted.push_back(ZeroAddr);
536  }
537 
538  // Create an artificial insertion point that will also ensure the blocks we
539  // are about to split are not degenerated.
540  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
541 
542  Instruction *ThenTI = UI, *ElseTI = nullptr;
543  if (IfCondition)
544  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
545 
546  BasicBlock *ThenBB = ThenTI->getParent();
547  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
548  BasicBlock *PRegBodyBB =
549  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
550  BasicBlock *PRegPreFiniBB =
551  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
552  BasicBlock *PRegExitBB =
553  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
554 
555  auto FiniCBWrapper = [&](InsertPointTy IP) {
556  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
557  // target to the region exit block.
558  if (IP.getBlock()->end() == IP.getPoint()) {
560  Builder.restoreIP(IP);
561  Instruction *I = Builder.CreateBr(PRegExitBB);
562  IP = InsertPointTy(I->getParent(), I->getIterator());
563  }
564  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
565  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
566  "Unexpected insertion point for finalization call!");
567  return FiniCB(IP);
568  };
569 
570  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
571 
572  // Generate the privatization allocas in the block that will become the entry
573  // of the outlined function.
574  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
575  InsertPointTy InnerAllocaIP = Builder.saveIP();
576 
577  AllocaInst *PrivTIDAddr =
578  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
579  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
580 
581  // Add some fake uses for OpenMP provided arguments.
582  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
583  Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
584  "zero.addr.use");
585  ToBeDeleted.push_back(ZeroAddrUse);
586 
587  // ThenBB
588  // |
589  // V
590  // PRegionEntryBB <- Privatization allocas are placed here.
591  // |
592  // V
593  // PRegionBodyBB <- BodeGen is invoked here.
594  // |
595  // V
596  // PRegPreFiniBB <- The block we will start finalization from.
597  // |
598  // V
599  // PRegionExitBB <- A common exit to simplify block collection.
600  //
601 
602  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
603 
604  // Let the caller create the body.
605  assert(BodyGenCB && "Expected body generation callback!");
606  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
607  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
608 
609  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
610 
611  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
612  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
613  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
614  llvm::LLVMContext &Ctx = F->getContext();
615  MDBuilder MDB(Ctx);
616  // Annotate the callback behavior of the __kmpc_fork_call:
617  // - The callback callee is argument number 2 (microtask).
618  // - The first two arguments of the callback callee are unknown (-1).
619  // - All variadic arguments to the __kmpc_fork_call are passed to the
620  // callback callee.
621  F->addMetadata(
622  llvm::LLVMContext::MD_callback,
624  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
625  /* VarArgsArePassed */ true)}));
626  }
627  }
628 
629  OutlineInfo OI;
630  OI.PostOutlineCB = [=](Function &OutlinedFn) {
631  // Add some known attributes.
632  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
633  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
634  OutlinedFn.addFnAttr(Attribute::NoUnwind);
635  OutlinedFn.addFnAttr(Attribute::NoRecurse);
636 
637  assert(OutlinedFn.arg_size() >= 2 &&
638  "Expected at least tid and bounded tid as arguments");
639  unsigned NumCapturedVars =
640  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
641 
642  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
643  CI->getParent()->setName("omp_parallel");
644  Builder.SetInsertPoint(CI);
645 
646  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
647  Value *ForkCallArgs[] = {
648  Ident, Builder.getInt32(NumCapturedVars),
649  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
650 
651  SmallVector<Value *, 16> RealArgs;
652  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
653  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
654 
655  Builder.CreateCall(RTLFn, RealArgs);
656 
657  LLVM_DEBUG(dbgs() << "With fork_call placed: "
658  << *Builder.GetInsertBlock()->getParent() << "\n");
659 
660  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
661 
662  // Initialize the local TID stack location with the argument value.
663  Builder.SetInsertPoint(PrivTID);
664  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
665  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
666 
667  // If no "if" clause was present we do not need the call created during
668  // outlining, otherwise we reuse it in the serialized parallel region.
669  if (!ElseTI) {
670  CI->eraseFromParent();
671  } else {
672 
673  // If an "if" clause was present we are now generating the serialized
674  // version into the "else" branch.
675  Builder.SetInsertPoint(ElseTI);
676 
677  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
678  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
679  Builder.CreateCall(
680  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
681  SerializedParallelCallArgs);
682 
683  // OutlinedFn(&GTid, &zero, CapturedStruct);
684  CI->removeFromParent();
685  Builder.Insert(CI);
686 
687  // __kmpc_end_serialized_parallel(&Ident, GTid);
688  Value *EndArgs[] = {Ident, ThreadID};
689  Builder.CreateCall(
690  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
691  EndArgs);
692 
693  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
694  << *Builder.GetInsertBlock()->getParent() << "\n");
695  }
696 
697  for (Instruction *I : ToBeDeleted)
698  I->eraseFromParent();
699  };
700 
701  // Adjust the finalization stack, verify the adjustment, and call the
702  // finalize function a last time to finalize values between the pre-fini
703  // block and the exit block if we left the parallel "the normal way".
704  auto FiniInfo = FinalizationStack.pop_back_val();
705  (void)FiniInfo;
706  assert(FiniInfo.DK == OMPD_parallel &&
707  "Unexpected finalization stack state!");
708 
709  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
710 
711  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
712  FiniCB(PreFiniIP);
713 
714  OI.EntryBB = PRegEntryBB;
715  OI.ExitBB = PRegExitBB;
716 
717  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
719  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
720 
721  // Ensure a single exit node for the outlined region by creating one.
722  // We might have multiple incoming edges to the exit now due to finalizations,
723  // e.g., cancel calls that cause the control flow to leave the region.
724  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
725  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
726  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
727  Blocks.push_back(PRegOutlinedExitBB);
728 
729  CodeExtractorAnalysisCache CEAC(*OuterFn);
730  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
731  /* AggregateArgs */ false,
732  /* BlockFrequencyInfo */ nullptr,
733  /* BranchProbabilityInfo */ nullptr,
734  /* AssumptionCache */ nullptr,
735  /* AllowVarArgs */ true,
736  /* AllowAlloca */ true,
737  /* Suffix */ ".omp_par");
738 
739  // Find inputs to, outputs from the code region.
740  BasicBlock *CommonExit = nullptr;
741  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
742  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
743  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
744 
745  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
746 
747  FunctionCallee TIDRTLFn =
748  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
749 
750  auto PrivHelper = [&](Value &V) {
751  if (&V == TIDAddr || &V == ZeroAddr)
752  return;
753 
755  for (Use &U : V.uses())
756  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
757  if (ParallelRegionBlockSet.count(UserI->getParent()))
758  Uses.insert(&U);
759 
760  // __kmpc_fork_call expects extra arguments as pointers. If the input
761  // already has a pointer type, everything is fine. Otherwise, store the
762  // value onto stack and load it back inside the to-be-outlined region. This
763  // will ensure only the pointer will be passed to the function.
764  // FIXME: if there are more than 15 trailing arguments, they must be
765  // additionally packed in a struct.
766  Value *Inner = &V;
767  if (!V.getType()->isPointerTy()) {
769  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
770 
771  Builder.restoreIP(OuterAllocaIP);
772  Value *Ptr =
773  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
774 
775  // Store to stack at end of the block that currently branches to the entry
776  // block of the to-be-outlined region.
777  Builder.SetInsertPoint(InsertBB,
778  InsertBB->getTerminator()->getIterator());
779  Builder.CreateStore(&V, Ptr);
780 
781  // Load back next to allocations in the to-be-outlined region.
782  Builder.restoreIP(InnerAllocaIP);
783  Inner = Builder.CreateLoad(V.getType(), Ptr);
784  }
785 
786  Value *ReplacementValue = nullptr;
787  CallInst *CI = dyn_cast<CallInst>(&V);
788  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
789  ReplacementValue = PrivTID;
790  } else {
791  Builder.restoreIP(
792  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
793  assert(ReplacementValue &&
794  "Expected copy/create callback to set replacement value!");
795  if (ReplacementValue == &V)
796  return;
797  }
798 
799  for (Use *UPtr : Uses)
800  UPtr->set(ReplacementValue);
801  };
802 
803  // Reset the inner alloca insertion as it will be used for loading the values
804  // wrapped into pointers before passing them into the to-be-outlined region.
805  // Configure it to insert immediately after the fake use of zero address so
806  // that they are available in the generated body and so that the
807  // OpenMP-related values (thread ID and zero address pointers) remain leading
808  // in the argument list.
809  InnerAllocaIP = IRBuilder<>::InsertPoint(
810  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
811 
812  // Reset the outer alloca insertion point to the entry of the relevant block
813  // in case it was invalidated.
814  OuterAllocaIP = IRBuilder<>::InsertPoint(
815  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
816 
817  for (Value *Input : Inputs) {
818  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
819  PrivHelper(*Input);
820  }
821  LLVM_DEBUG({
822  for (Value *Output : Outputs)
823  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
824  });
825  assert(Outputs.empty() &&
826  "OpenMP outlining should not produce live-out values!");
827 
828  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
829  LLVM_DEBUG({
830  for (auto *BB : Blocks)
831  dbgs() << " PBR: " << BB->getName() << "\n";
832  });
833 
834  // Register the outlined info.
835  addOutlineInfo(std::move(OI));
836 
837  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
838  UI->eraseFromParent();
839 
840  return AfterIP;
841 }
842 
844  // Build call void __kmpc_flush(ident_t *loc)
845  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
846  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
847 
848  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
849 }
850 
852  if (!updateToLocation(Loc))
853  return;
854  emitFlush(Loc);
855 }
856 
858  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
859  // global_tid);
860  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
861  Value *Ident = getOrCreateIdent(SrcLocStr);
862  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
863 
864  // Ignore return result until untied tasks are supported.
865  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
866  Args);
867 }
868 
870  if (!updateToLocation(Loc))
871  return;
872  emitTaskwaitImpl(Loc);
873 }
874 
876  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
877  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
878  Value *Ident = getOrCreateIdent(SrcLocStr);
879  Constant *I32Null = ConstantInt::getNullValue(Int32);
880  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
881 
882  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
883  Args);
884 }
885 
887  if (!updateToLocation(Loc))
888  return;
889  emitTaskyieldImpl(Loc);
890 }
891 
893  const LocationDescription &Loc, InsertPointTy AllocaIP,
895  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
896  if (!updateToLocation(Loc))
897  return Loc.IP;
898 
899  auto FiniCBWrapper = [&](InsertPointTy IP) {
900  if (IP.getBlock()->end() != IP.getPoint())
901  return FiniCB(IP);
902  // This must be done otherwise any nested constructs using FinalizeOMPRegion
903  // will fail because that function requires the Finalization Basic Block to
904  // have a terminator, which is already removed by EmitOMPRegionBody.
905  // IP is currently at cancelation block.
906  // We need to backtrack to the condition block to fetch
907  // the exit block and create a branch from cancelation
908  // to exit block.
910  Builder.restoreIP(IP);
911  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
912  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
913  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
914  Instruction *I = Builder.CreateBr(ExitBB);
915  IP = InsertPointTy(I->getParent(), I->getIterator());
916  return FiniCB(IP);
917  };
918 
919  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
920 
921  // Each section is emitted as a switch case
922  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
923  // -> OMP.createSection() which generates the IR for each section
924  // Iterate through all sections and emit a switch construct:
925  // switch (IV) {
926  // case 0:
927  // <SectionStmt[0]>;
928  // break;
929  // ...
930  // case <NumSection> - 1:
931  // <SectionStmt[<NumSection> - 1]>;
932  // break;
933  // }
934  // ...
935  // section_loop.after:
936  // <FiniCB>;
937  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
938  auto *CurFn = CodeGenIP.getBlock()->getParent();
939  auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
940  auto *ForExitBB = CodeGenIP.getBlock()
941  ->getSinglePredecessor()
942  ->getTerminator()
943  ->getSuccessor(1);
944  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
945  Builder.restoreIP(CodeGenIP);
946  unsigned CaseNumber = 0;
947  for (auto SectionCB : SectionCBs) {
948  auto *CaseBB = BasicBlock::Create(M.getContext(),
949  "omp_section_loop.body.case", CurFn);
950  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
951  Builder.SetInsertPoint(CaseBB);
952  SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
953  CaseNumber++;
954  }
955  // remove the existing terminator from body BB since there can be no
956  // terminators after switch/case
957  CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
958  };
959  // Loop body ends here
960  // LowerBound, UpperBound, and STride for createCanonicalLoop
961  Type *I32Ty = Type::getInt32Ty(M.getContext());
962  Value *LB = ConstantInt::get(I32Ty, 0);
963  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
964  Value *ST = ConstantInt::get(I32Ty, 1);
965  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
966  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
967  LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
968  BasicBlock *LoopAfterBB = LoopInfo->getAfter();
969  Instruction *SplitPos = LoopAfterBB->getTerminator();
970  if (!isa_and_nonnull<BranchInst>(SplitPos))
971  SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
972  // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
973  // which requires a BB with branch
974  BasicBlock *ExitBB =
975  LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
976  SplitPos->eraseFromParent();
977 
978  // Apply the finalization callback in LoopAfterBB
979  auto FiniInfo = FinalizationStack.pop_back_val();
980  assert(FiniInfo.DK == OMPD_sections &&
981  "Unexpected finalization stack state!");
982  Builder.SetInsertPoint(LoopAfterBB->getTerminator());
983  FiniInfo.FiniCB(Builder.saveIP());
984  Builder.SetInsertPoint(ExitBB);
985 
986  return Builder.saveIP();
987 }
988 
991  BodyGenCallbackTy BodyGenCB,
992  FinalizeCallbackTy FiniCB) {
993  if (!updateToLocation(Loc))
994  return Loc.IP;
995 
996  auto FiniCBWrapper = [&](InsertPointTy IP) {
997  if (IP.getBlock()->end() != IP.getPoint())
998  return FiniCB(IP);
999  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1000  // will fail because that function requires the Finalization Basic Block to
1001  // have a terminator, which is already removed by EmitOMPRegionBody.
1002  // IP is currently at cancelation block.
1003  // We need to backtrack to the condition block to fetch
1004  // the exit block and create a branch from cancelation
1005  // to exit block.
1007  Builder.restoreIP(IP);
1008  auto *CaseBB = Loc.IP.getBlock();
1009  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1010  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1011  Instruction *I = Builder.CreateBr(ExitBB);
1012  IP = InsertPointTy(I->getParent(), I->getIterator());
1013  return FiniCB(IP);
1014  };
1015 
1016  Directive OMPD = Directive::OMPD_sections;
1017  // Since we are using Finalization Callback here, HasFinalize
1018  // and IsCancellable have to be true
1019  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1020  /*Conditional*/ false, /*hasFinalize*/ true,
1021  /*IsCancellable*/ true);
1022 }
1023 
1026  BodyGenCallbackTy BodyGenCB,
1027  FinalizeCallbackTy FiniCB) {
1028 
1029  if (!updateToLocation(Loc))
1030  return Loc.IP;
1031 
1032  Directive OMPD = Directive::OMPD_master;
1033  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1034  Value *Ident = getOrCreateIdent(SrcLocStr);
1035  Value *ThreadId = getOrCreateThreadID(Ident);
1036  Value *Args[] = {Ident, ThreadId};
1037 
1038  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1039  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1040 
1041  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1042  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1043 
1044  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1045  /*Conditional*/ true, /*hasFinalize*/ true);
1046 }
1047 
1050  BodyGenCallbackTy BodyGenCB,
1051  FinalizeCallbackTy FiniCB, Value *Filter) {
1052  if (!updateToLocation(Loc))
1053  return Loc.IP;
1054 
1055  Directive OMPD = Directive::OMPD_masked;
1056  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1057  Value *Ident = getOrCreateIdent(SrcLocStr);
1058  Value *ThreadId = getOrCreateThreadID(Ident);
1059  Value *Args[] = {Ident, ThreadId, Filter};
1060  Value *ArgsEnd[] = {Ident, ThreadId};
1061 
1062  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1063  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1064 
1065  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1066  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1067 
1068  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1069  /*Conditional*/ true, /*hasFinalize*/ true);
1070 }
1071 
1073  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1074  BasicBlock *PostInsertBefore, const Twine &Name) {
1075  Module *M = F->getParent();
1076  LLVMContext &Ctx = M->getContext();
1077  Type *IndVarTy = TripCount->getType();
1078 
1079  // Create the basic block structure.
1080  BasicBlock *Preheader =
1081  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1082  BasicBlock *Header =
1083  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1084  BasicBlock *Cond =
1085  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1086  BasicBlock *Body =
1087  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1088  BasicBlock *Latch =
1089  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1090  BasicBlock *Exit =
1091  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1092  BasicBlock *After =
1093  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1094 
1095  // Use specified DebugLoc for new instructions.
1096  Builder.SetCurrentDebugLocation(DL);
1097 
1098  Builder.SetInsertPoint(Preheader);
1099  Builder.CreateBr(Header);
1100 
1101  Builder.SetInsertPoint(Header);
1102  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1103  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1104  Builder.CreateBr(Cond);
1105 
1106  Builder.SetInsertPoint(Cond);
1107  Value *Cmp =
1108  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1109  Builder.CreateCondBr(Cmp, Body, Exit);
1110 
1111  Builder.SetInsertPoint(Body);
1112  Builder.CreateBr(Latch);
1113 
1114  Builder.SetInsertPoint(Latch);
1115  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1116  "omp_" + Name + ".next", /*HasNUW=*/true);
1117  Builder.CreateBr(Header);
1118  IndVarPHI->addIncoming(Next, Latch);
1119 
1120  Builder.SetInsertPoint(Exit);
1121  Builder.CreateBr(After);
1122 
1123  // Remember and return the canonical control flow.
1124  LoopInfos.emplace_front();
1125  CanonicalLoopInfo *CL = &LoopInfos.front();
1126 
1127  CL->Preheader = Preheader;
1128  CL->Header = Header;
1129  CL->Cond = Cond;
1130  CL->Body = Body;
1131  CL->Latch = Latch;
1132  CL->Exit = Exit;
1133  CL->After = After;
1134 
1135  CL->IsValid = true;
1136 
1137 #ifndef NDEBUG
1138  CL->assertOK();
1139 #endif
1140  return CL;
1141 }
1142 
1145  LoopBodyGenCallbackTy BodyGenCB,
1146  Value *TripCount, const Twine &Name) {
1147  BasicBlock *BB = Loc.IP.getBlock();
1148  BasicBlock *NextBB = BB->getNextNode();
1149 
1150  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1151  NextBB, NextBB, Name);
1152  BasicBlock *After = CL->getAfter();
1153 
1154  // If location is not set, don't connect the loop.
1155  if (updateToLocation(Loc)) {
1156  // Split the loop at the insertion point: Branch to the preheader and move
1157  // every following instruction to after the loop (the After BB). Also, the
1158  // new successor is the loop's after block.
1159  Builder.CreateBr(CL->Preheader);
1160  After->getInstList().splice(After->begin(), BB->getInstList(),
1161  Builder.GetInsertPoint(), BB->end());
1162  After->replaceSuccessorsPhiUsesWith(BB, After);
1163  }
1164 
1165  // Emit the body content. We do it after connecting the loop to the CFG to
1166  // avoid that the callback encounters degenerate BBs.
1167  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1168 
1169 #ifndef NDEBUG
1170  CL->assertOK();
1171 #endif
1172  return CL;
1173 }
1174 
1176  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1177  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1178  InsertPointTy ComputeIP, const Twine &Name) {
1179 
1180  // Consider the following difficulties (assuming 8-bit signed integers):
1181  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1182  // DO I = 1, 100, 50
1183  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1184  // DO I = 100, 0, -128
1185 
1186  // Start, Stop and Step must be of the same integer type.
1187  auto *IndVarTy = cast<IntegerType>(Start->getType());
1188  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1189  assert(IndVarTy == Step->getType() && "Step type mismatch");
1190 
1191  LocationDescription ComputeLoc =
1192  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1193  updateToLocation(ComputeLoc);
1194 
1195  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1196  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1197 
1198  // Like Step, but always positive.
1199  Value *Incr = Step;
1200 
1201  // Distance between Start and Stop; always positive.
1202  Value *Span;
1203 
1204  // Condition whether there are no iterations are executed at all, e.g. because
1205  // UB < LB.
1206  Value *ZeroCmp;
1207 
1208  if (IsSigned) {
1209  // Ensure that increment is positive. If not, negate and invert LB and UB.
1210  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1211  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1212  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1213  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1214  Span = Builder.CreateSub(UB, LB, "", false, true);
1215  ZeroCmp = Builder.CreateICmp(
1216  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1217  } else {
1218  Span = Builder.CreateSub(Stop, Start, "", true);
1219  ZeroCmp = Builder.CreateICmp(
1220  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1221  }
1222 
1223  Value *CountIfLooping;
1224  if (InclusiveStop) {
1225  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1226  } else {
1227  // Avoid incrementing past stop since it could overflow.
1228  Value *CountIfTwo = Builder.CreateAdd(
1229  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1230  Value *OneCmp = Builder.CreateICmp(
1231  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1232  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1233  }
1234  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1235  "omp_" + Name + ".tripcount");
1236 
1237  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1238  Builder.restoreIP(CodeGenIP);
1239  Value *Span = Builder.CreateMul(IV, Step);
1240  Value *IndVar = Builder.CreateAdd(Span, Start);
1241  BodyGenCB(Builder.saveIP(), IndVar);
1242  };
1243  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1244  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1245 }
1246 
1247 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1248 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1249 // runtime. Always interpret integers as unsigned similarly to
1250 // CanonicalLoopInfo.
1252  OpenMPIRBuilder &OMPBuilder) {
1253  unsigned Bitwidth = Ty->getIntegerBitWidth();
1254  if (Bitwidth == 32)
1255  return OMPBuilder.getOrCreateRuntimeFunction(
1256  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1257  if (Bitwidth == 64)
1258  return OMPBuilder.getOrCreateRuntimeFunction(
1259  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1260  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1261 }
1262 
1263 // Sets the number of loop iterations to the given value. This value must be
1264 // valid in the condition block (i.e., defined in the preheader) and is
1265 // interpreted as an unsigned integer.
1267  Instruction *CmpI = &CLI->getCond()->front();
1268  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1269  CmpI->setOperand(1, TripCount);
1270  CLI->assertOK();
1271 }
1272 
1274  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1275  InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
1276  // Set up the source location value for OpenMP runtime.
1277  if (!updateToLocation(Loc))
1278  return nullptr;
1279 
1280  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1281  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1282 
1283  // Declare useful OpenMP runtime functions.
1284  Value *IV = CLI->getIndVar();
1285  Type *IVTy = IV->getType();
1286  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1287  FunctionCallee StaticFini =
1288  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1289 
1290  // Allocate space for computed loop bounds as expected by the "init" function.
1291  Builder.restoreIP(AllocaIP);
1292  Type *I32Type = Type::getInt32Ty(M.getContext());
1293  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1294  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1295  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1296  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1297 
1298  // At the end of the preheader, prepare for calling the "init" function by
1299  // storing the current loop bounds into the allocated space. A canonical loop
1300  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1301  // and produces an inclusive upper bound.
1302  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1303  Constant *Zero = ConstantInt::get(IVTy, 0);
1304  Constant *One = ConstantInt::get(IVTy, 1);
1305  Builder.CreateStore(Zero, PLowerBound);
1306  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1307  Builder.CreateStore(UpperBound, PUpperBound);
1308  Builder.CreateStore(One, PStride);
1309 
1310  if (!Chunk)
1311  Chunk = One;
1312 
1313  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1314 
1315  Constant *SchedulingType =
1316  ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1317 
1318  // Call the "init" function and update the trip count of the loop with the
1319  // value it produced.
1320  Builder.CreateCall(StaticInit,
1321  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1322  PUpperBound, PStride, One, Chunk});
1323  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1324  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1325  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1326  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1327  setCanonicalLoopTripCount(CLI, TripCount);
1328 
1329  // Update all uses of the induction variable except the one in the condition
1330  // block that compares it with the actual upper bound, and the increment in
1331  // the latch block.
1332  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1333  // CanonicalLoopInfoUpdater interface.
1334  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1335  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1336  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1337  auto *Instr = dyn_cast<Instruction>(U.getUser());
1338  return !Instr ||
1339  (Instr->getParent() != CLI->getCond() &&
1340  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1341  });
1342 
1343  // In the "exit" block, call the "fini" function.
1344  Builder.SetInsertPoint(CLI->getExit(),
1345  CLI->getExit()->getTerminator()->getIterator());
1346  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1347 
1348  // Add the barrier if requested.
1349  if (NeedsBarrier)
1350  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1351  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1352  /* CheckCancelFlag */ false);
1353 
1354  CLI->assertOK();
1355  return CLI;
1356 }
1357 
1359  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1360  InsertPointTy AllocaIP, bool NeedsBarrier) {
1361  // Currently only supports static schedules.
1362  return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
1363 }
1364 
1365 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1366 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1367 /// the runtime. Always interpret integers as unsigned similarly to
1368 /// CanonicalLoopInfo.
1369 static FunctionCallee
1371  unsigned Bitwidth = Ty->getIntegerBitWidth();
1372  if (Bitwidth == 32)
1373  return OMPBuilder.getOrCreateRuntimeFunction(
1374  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1375  if (Bitwidth == 64)
1376  return OMPBuilder.getOrCreateRuntimeFunction(
1377  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1378  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1379 }
1380 
1381 /// Returns an LLVM function to call for updating the next loop using OpenMP
1382 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1383 /// the runtime. Always interpret integers as unsigned similarly to
1384 /// CanonicalLoopInfo.
1385 static FunctionCallee
1387  unsigned Bitwidth = Ty->getIntegerBitWidth();
1388  if (Bitwidth == 32)
1389  return OMPBuilder.getOrCreateRuntimeFunction(
1390  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1391  if (Bitwidth == 64)
1392  return OMPBuilder.getOrCreateRuntimeFunction(
1393  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1394  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1395 }
1396 
1398  const LocationDescription &Loc, CanonicalLoopInfo *CLI,
1399  InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
1400  Value *Chunk) {
1401  // Set up the source location value for OpenMP runtime.
1402  Builder.SetCurrentDebugLocation(Loc.DL);
1403 
1404  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1405  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1406 
1407  // Declare useful OpenMP runtime functions.
1408  Value *IV = CLI->getIndVar();
1409  Type *IVTy = IV->getType();
1410  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1411  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1412 
1413  // Allocate space for computed loop bounds as expected by the "init" function.
1414  Builder.restoreIP(AllocaIP);
1415  Type *I32Type = Type::getInt32Ty(M.getContext());
1416  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1417  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1418  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1419  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1420 
1421  // At the end of the preheader, prepare for calling the "init" function by
1422  // storing the current loop bounds into the allocated space. A canonical loop
1423  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1424  // and produces an inclusive upper bound.
1425  BasicBlock *PreHeader = CLI->getPreheader();
1426  Builder.SetInsertPoint(PreHeader->getTerminator());
1427  Constant *One = ConstantInt::get(IVTy, 1);
1428  Builder.CreateStore(One, PLowerBound);
1429  Value *UpperBound = CLI->getTripCount();
1430  Builder.CreateStore(UpperBound, PUpperBound);
1431  Builder.CreateStore(One, PStride);
1432 
1433  BasicBlock *Header = CLI->getHeader();
1434  BasicBlock *Exit = CLI->getExit();
1435  BasicBlock *Cond = CLI->getCond();
1436  InsertPointTy AfterIP = CLI->getAfterIP();
1437 
1438  // The CLI will be "broken" in the code below, as the loop is no longer
1439  // a valid canonical loop.
1440 
1441  if (!Chunk)
1442  Chunk = One;
1443 
1444  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1445 
1446  Constant *SchedulingType =
1447  ConstantInt::get(I32Type, static_cast<int>(SchedType));
1448 
1449  // Call the "init" function.
1450  Builder.CreateCall(DynamicInit,
1451  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1452  UpperBound, /* step */ One, Chunk});
1453 
1454  // An outer loop around the existing one.
1455  BasicBlock *OuterCond = BasicBlock::Create(
1456  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1457  PreHeader->getParent());
1458  // This needs to be 32-bit always, so can't use the IVTy Zero above.
1459  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1460  Value *Res =
1461  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1462  PLowerBound, PUpperBound, PStride});
1463  Constant *Zero32 = ConstantInt::get(I32Type, 0);
1464  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1465  Value *LowerBound =
1466  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1467  Builder.CreateCondBr(MoreWork, Header, Exit);
1468 
1469  // Change PHI-node in loop header to use outer cond rather than preheader,
1470  // and set IV to the LowerBound.
1471  Instruction *Phi = &Header->front();
1472  auto *PI = cast<PHINode>(Phi);
1473  PI->setIncomingBlock(0, OuterCond);
1474  PI->setIncomingValue(0, LowerBound);
1475 
1476  // Then set the pre-header to jump to the OuterCond
1477  Instruction *Term = PreHeader->getTerminator();
1478  auto *Br = cast<BranchInst>(Term);
1479  Br->setSuccessor(0, OuterCond);
1480 
1481  // Modify the inner condition:
1482  // * Use the UpperBound returned from the DynamicNext call.
1483  // * jump to the loop outer loop when done with one of the inner loops.
1484  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1485  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1486  Instruction *Comp = &*Builder.GetInsertPoint();
1487  auto *CI = cast<CmpInst>(Comp);
1488  CI->setOperand(1, UpperBound);
1489  // Redirect the inner exit to branch to outer condition.
1490  Instruction *Branch = &Cond->back();
1491  auto *BI = cast<BranchInst>(Branch);
1492  assert(BI->getSuccessor(1) == Exit);
1493  BI->setSuccessor(1, OuterCond);
1494 
1495  // Add the barrier if requested.
1496  if (NeedsBarrier) {
1497  Builder.SetInsertPoint(&Exit->back());
1498  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
1499  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1500  /* CheckCancelFlag */ false);
1501  }
1502 
1503  return AfterIP;
1504 }
1505 
1506 /// Make \p Source branch to \p Target.
1507 ///
1508 /// Handles two situations:
1509 /// * \p Source already has an unconditional branch.
1510 /// * \p Source is a degenerate block (no terminator because the BB is
1511 /// the current head of the IR construction).
1513  if (Instruction *Term = Source->getTerminator()) {
1514  auto *Br = cast<BranchInst>(Term);
1515  assert(!Br->isConditional() &&
1516  "BB's terminator must be an unconditional branch (or degenerate)");
1517  BasicBlock *Succ = Br->getSuccessor(0);
1518  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1519  Br->setSuccessor(0, Target);
1520  return;
1521  }
1522 
1523  auto *NewBr = BranchInst::Create(Target, Source);
1524  NewBr->setDebugLoc(DL);
1525 }
1526 
1527 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1528 /// after this \p OldTarget will be orphaned.
1529 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1530  BasicBlock *NewTarget, DebugLoc DL) {
1531  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1532  redirectTo(Pred, NewTarget, DL);
1533 }
1534 
1535 /// Determine which blocks in \p BBs are reachable from outside and remove the
1536 /// ones that are not reachable from the function.
1538  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1539  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1540  for (Use &U : BB->uses()) {
1541  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1542  if (!UseInst)
1543  continue;
1544  if (BBsToErase.count(UseInst->getParent()))
1545  continue;
1546  return true;
1547  }
1548  return false;
1549  };
1550 
1551  while (true) {
1552  bool Changed = false;
1553  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1554  if (HasRemainingUses(BB)) {
1555  BBsToErase.erase(BB);
1556  Changed = true;
1557  }
1558  }
1559  if (!Changed)
1560  break;
1561  }
1562 
1563  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1564  DeleteDeadBlocks(BBVec);
1565 }
1566 
1569  InsertPointTy ComputeIP) {
1570  assert(Loops.size() >= 1 && "At least one loop required");
1571  size_t NumLoops = Loops.size();
1572 
1573  // Nothing to do if there is already just one loop.
1574  if (NumLoops == 1)
1575  return Loops.front();
1576 
1577  CanonicalLoopInfo *Outermost = Loops.front();
1578  CanonicalLoopInfo *Innermost = Loops.back();
1579  BasicBlock *OrigPreheader = Outermost->getPreheader();
1580  BasicBlock *OrigAfter = Outermost->getAfter();
1581  Function *F = OrigPreheader->getParent();
1582 
1583  // Setup the IRBuilder for inserting the trip count computation.
1584  Builder.SetCurrentDebugLocation(DL);
1585  if (ComputeIP.isSet())
1586  Builder.restoreIP(ComputeIP);
1587  else
1588  Builder.restoreIP(Outermost->getPreheaderIP());
1589 
1590  // Derive the collapsed' loop trip count.
1591  // TODO: Find common/largest indvar type.
1592  Value *CollapsedTripCount = nullptr;
1593  for (CanonicalLoopInfo *L : Loops) {
1594  Value *OrigTripCount = L->getTripCount();
1595  if (!CollapsedTripCount) {
1596  CollapsedTripCount = OrigTripCount;
1597  continue;
1598  }
1599 
1600  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1601  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1602  {}, /*HasNUW=*/true);
1603  }
1604 
1605  // Create the collapsed loop control flow.
1606  CanonicalLoopInfo *Result =
1607  createLoopSkeleton(DL, CollapsedTripCount, F,
1608  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1609 
1610  // Build the collapsed loop body code.
1611  // Start with deriving the input loop induction variables from the collapsed
1612  // one, using a divmod scheme. To preserve the original loops' order, the
1613  // innermost loop use the least significant bits.
1614  Builder.restoreIP(Result->getBodyIP());
1615 
1616  Value *Leftover = Result->getIndVar();
1617  SmallVector<Value *> NewIndVars;
1618  NewIndVars.set_size(NumLoops);
1619  for (int i = NumLoops - 1; i >= 1; --i) {
1620  Value *OrigTripCount = Loops[i]->getTripCount();
1621 
1622  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1623  NewIndVars[i] = NewIndVar;
1624 
1625  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1626  }
1627  // Outermost loop gets all the remaining bits.
1628  NewIndVars[0] = Leftover;
1629 
1630  // Construct the loop body control flow.
1631  // We progressively construct the branch structure following in direction of
1632  // the control flow, from the leading in-between code, the loop nest body, the
1633  // trailing in-between code, and rejoining the collapsed loop's latch.
1634  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1635  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1636  // its predecessors as sources.
1637  BasicBlock *ContinueBlock = Result->getBody();
1638  BasicBlock *ContinuePred = nullptr;
1639  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1640  BasicBlock *NextSrc) {
1641  if (ContinueBlock)
1642  redirectTo(ContinueBlock, Dest, DL);
1643  else
1644  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1645 
1646  ContinueBlock = nullptr;
1647  ContinuePred = NextSrc;
1648  };
1649 
1650  // The code before the nested loop of each level.
1651  // Because we are sinking it into the nest, it will be executed more often
1652  // that the original loop. More sophisticated schemes could keep track of what
1653  // the in-between code is and instantiate it only once per thread.
1654  for (size_t i = 0; i < NumLoops - 1; ++i)
1655  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1656 
1657  // Connect the loop nest body.
1658  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1659 
1660  // The code after the nested loop at each level.
1661  for (size_t i = NumLoops - 1; i > 0; --i)
1662  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1663 
1664  // Connect the finished loop to the collapsed loop latch.
1665  ContinueWith(Result->getLatch(), nullptr);
1666 
1667  // Replace the input loops with the new collapsed loop.
1668  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1669  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1670 
1671  // Replace the input loop indvars with the derived ones.
1672  for (size_t i = 0; i < NumLoops; ++i)
1673  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1674 
1675  // Remove unused parts of the input loops.
1676  SmallVector<BasicBlock *, 12> OldControlBBs;
1677  OldControlBBs.reserve(6 * Loops.size());
1678  for (CanonicalLoopInfo *Loop : Loops)
1679  Loop->collectControlBlocks(OldControlBBs);
1680  removeUnusedBlocksFromParent(OldControlBBs);
1681 
1682 #ifndef NDEBUG
1683  Result->assertOK();
1684 #endif
1685  return Result;
1686 }
1687 
1688 std::vector<CanonicalLoopInfo *>
1690  ArrayRef<Value *> TileSizes) {
1691  assert(TileSizes.size() == Loops.size() &&
1692  "Must pass as many tile sizes as there are loops");
1693  int NumLoops = Loops.size();
1694  assert(NumLoops >= 1 && "At least one loop to tile required");
1695 
1696  CanonicalLoopInfo *OutermostLoop = Loops.front();
1697  CanonicalLoopInfo *InnermostLoop = Loops.back();
1698  Function *F = OutermostLoop->getBody()->getParent();
1699  BasicBlock *InnerEnter = InnermostLoop->getBody();
1700  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1701 
1702  // Collect original trip counts and induction variable to be accessible by
1703  // index. Also, the structure of the original loops is not preserved during
1704  // the construction of the tiled loops, so do it before we scavenge the BBs of
1705  // any original CanonicalLoopInfo.
1706  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1707  for (CanonicalLoopInfo *L : Loops) {
1708  OrigTripCounts.push_back(L->getTripCount());
1709  OrigIndVars.push_back(L->getIndVar());
1710  }
1711 
1712  // Collect the code between loop headers. These may contain SSA definitions
1713  // that are used in the loop nest body. To be usable with in the innermost
1714  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1715  // these instructions may be executed more often than before the tiling.
1716  // TODO: It would be sufficient to only sink them into body of the
1717  // corresponding tile loop.
1719  for (int i = 0; i < NumLoops - 1; ++i) {
1720  CanonicalLoopInfo *Surrounding = Loops[i];
1721  CanonicalLoopInfo *Nested = Loops[i + 1];
1722 
1723  BasicBlock *EnterBB = Surrounding->getBody();
1724  BasicBlock *ExitBB = Nested->getHeader();
1725  InbetweenCode.emplace_back(EnterBB, ExitBB);
1726  }
1727 
1728  // Compute the trip counts of the floor loops.
1729  Builder.SetCurrentDebugLocation(DL);
1730  Builder.restoreIP(OutermostLoop->getPreheaderIP());
1731  SmallVector<Value *, 4> FloorCount, FloorRems;
1732  for (int i = 0; i < NumLoops; ++i) {
1733  Value *TileSize = TileSizes[i];
1734  Value *OrigTripCount = OrigTripCounts[i];
1735  Type *IVType = OrigTripCount->getType();
1736 
1737  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1738  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1739 
1740  // 0 if tripcount divides the tilesize, 1 otherwise.
1741  // 1 means we need an additional iteration for a partial tile.
1742  //
1743  // Unfortunately we cannot just use the roundup-formula
1744  // (tripcount + tilesize - 1)/tilesize
1745  // because the summation might overflow. We do not want introduce undefined
1746  // behavior when the untiled loop nest did not.
1747  Value *FloorTripOverflow =
1748  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1749 
1750  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1751  FloorTripCount =
1752  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1753  "omp_floor" + Twine(i) + ".tripcount", true);
1754 
1755  // Remember some values for later use.
1756  FloorCount.push_back(FloorTripCount);
1757  FloorRems.push_back(FloorTripRem);
1758  }
1759 
1760  // Generate the new loop nest, from the outermost to the innermost.
1761  std::vector<CanonicalLoopInfo *> Result;
1762  Result.reserve(NumLoops * 2);
1763 
1764  // The basic block of the surrounding loop that enters the nest generated
1765  // loop.
1766  BasicBlock *Enter = OutermostLoop->getPreheader();
1767 
1768  // The basic block of the surrounding loop where the inner code should
1769  // continue.
1770  BasicBlock *Continue = OutermostLoop->getAfter();
1771 
1772  // Where the next loop basic block should be inserted.
1773  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1774 
1775  auto EmbeddNewLoop =
1776  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1777  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1778  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1779  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1780  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1781  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1782 
1783  // Setup the position where the next embedded loop connects to this loop.
1784  Enter = EmbeddedLoop->getBody();
1785  Continue = EmbeddedLoop->getLatch();
1786  OutroInsertBefore = EmbeddedLoop->getLatch();
1787  return EmbeddedLoop;
1788  };
1789 
1790  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
1791  const Twine &NameBase) {
1792  for (auto P : enumerate(TripCounts)) {
1793  CanonicalLoopInfo *EmbeddedLoop =
1794  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
1795  Result.push_back(EmbeddedLoop);
1796  }
1797  };
1798 
1799  EmbeddNewLoops(FloorCount, "floor");
1800 
1801  // Within the innermost floor loop, emit the code that computes the tile
1802  // sizes.
1803  Builder.SetInsertPoint(Enter->getTerminator());
1804  SmallVector<Value *, 4> TileCounts;
1805  for (int i = 0; i < NumLoops; ++i) {
1806  CanonicalLoopInfo *FloorLoop = Result[i];
1807  Value *TileSize = TileSizes[i];
1808 
1809  Value *FloorIsEpilogue =
1810  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
1811  Value *TileTripCount =
1812  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
1813 
1814  TileCounts.push_back(TileTripCount);
1815  }
1816 
1817  // Create the tile loops.
1818  EmbeddNewLoops(TileCounts, "tile");
1819 
1820  // Insert the inbetween code into the body.
1821  BasicBlock *BodyEnter = Enter;
1822  BasicBlock *BodyEntered = nullptr;
1823  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
1824  BasicBlock *EnterBB = P.first;
1825  BasicBlock *ExitBB = P.second;
1826 
1827  if (BodyEnter)
1828  redirectTo(BodyEnter, EnterBB, DL);
1829  else
1830  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
1831 
1832  BodyEnter = nullptr;
1833  BodyEntered = ExitBB;
1834  }
1835 
1836  // Append the original loop nest body into the generated loop nest body.
1837  if (BodyEnter)
1838  redirectTo(BodyEnter, InnerEnter, DL);
1839  else
1840  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
1841  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
1842 
1843  // Replace the original induction variable with an induction variable computed
1844  // from the tile and floor induction variables.
1845  Builder.restoreIP(Result.back()->getBodyIP());
1846  for (int i = 0; i < NumLoops; ++i) {
1847  CanonicalLoopInfo *FloorLoop = Result[i];
1848  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
1849  Value *OrigIndVar = OrigIndVars[i];
1850  Value *Size = TileSizes[i];
1851 
1852  Value *Scale =
1853  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
1854  Value *Shift =
1855  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
1856  OrigIndVar->replaceAllUsesWith(Shift);
1857  }
1858 
1859  // Remove unused parts of the original loops.
1860  SmallVector<BasicBlock *, 12> OldControlBBs;
1861  OldControlBBs.reserve(6 * Loops.size());
1862  for (CanonicalLoopInfo *Loop : Loops)
1863  Loop->collectControlBlocks(OldControlBBs);
1864  removeUnusedBlocksFromParent(OldControlBBs);
1865 
1866 #ifndef NDEBUG
1867  for (CanonicalLoopInfo *GenL : Result)
1868  GenL->assertOK();
1869 #endif
1870  return Result;
1871 }
1872 
1875  llvm::Value *BufSize, llvm::Value *CpyBuf,
1876  llvm::Value *CpyFn, llvm::Value *DidIt) {
1877  if (!updateToLocation(Loc))
1878  return Loc.IP;
1879 
1880  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1881  Value *Ident = getOrCreateIdent(SrcLocStr);
1882  Value *ThreadId = getOrCreateThreadID(Ident);
1883 
1884  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
1885 
1886  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
1887 
1888  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
1889  Builder.CreateCall(Fn, Args);
1890 
1891  return Builder.saveIP();
1892 }
1893 
1896  BodyGenCallbackTy BodyGenCB,
1897  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
1898 
1899  if (!updateToLocation(Loc))
1900  return Loc.IP;
1901 
1902  // If needed (i.e. not null), initialize `DidIt` with 0
1903  if (DidIt) {
1904  Builder.CreateStore(Builder.getInt32(0), DidIt);
1905  }
1906 
1907  Directive OMPD = Directive::OMPD_single;
1908  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1909  Value *Ident = getOrCreateIdent(SrcLocStr);
1910  Value *ThreadId = getOrCreateThreadID(Ident);
1911  Value *Args[] = {Ident, ThreadId};
1912 
1913  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
1914  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1915 
1916  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
1917  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1918 
1919  // generates the following:
1920  // if (__kmpc_single()) {
1921  // .... single region ...
1922  // __kmpc_end_single
1923  // }
1924 
1925  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1926  /*Conditional*/ true, /*hasFinalize*/ true);
1927 }
1928 
1930  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1931  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
1932 
1933  if (!updateToLocation(Loc))
1934  return Loc.IP;
1935 
1936  Directive OMPD = Directive::OMPD_critical;
1937  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1938  Value *Ident = getOrCreateIdent(SrcLocStr);
1939  Value *ThreadId = getOrCreateThreadID(Ident);
1940  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
1941  Value *Args[] = {Ident, ThreadId, LockVar};
1942 
1944  Function *RTFn = nullptr;
1945  if (HintInst) {
1946  // Add Hint to entry Args and create call
1947  EnterArgs.push_back(HintInst);
1948  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
1949  } else {
1950  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
1951  }
1952  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
1953 
1954  Function *ExitRTLFn =
1955  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
1956  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1957 
1958  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1959  /*Conditional*/ false, /*hasFinalize*/ true);
1960 }
1961 
1962 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
1963  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
1964  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
1965  bool HasFinalize, bool IsCancellable) {
1966 
1967  if (HasFinalize)
1968  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
1969 
1970  // Create inlined region's entry and body blocks, in preparation
1971  // for conditional creation
1972  BasicBlock *EntryBB = Builder.GetInsertBlock();
1973  Instruction *SplitPos = EntryBB->getTerminator();
1974  if (!isa_and_nonnull<BranchInst>(SplitPos))
1975  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
1976  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
1977  BasicBlock *FiniBB =
1978  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
1979 
1980  Builder.SetInsertPoint(EntryBB->getTerminator());
1981  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
1982 
1983  // generate body
1984  BodyGenCB(/* AllocaIP */ InsertPointTy(),
1985  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
1986 
1987  // If we didn't emit a branch to FiniBB during body generation, it means
1988  // FiniBB is unreachable (e.g. while(1);). stop generating all the
1989  // unreachable blocks, and remove anything we are not going to use.
1990  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
1991  if (SkipEmittingRegion) {
1992  FiniBB->eraseFromParent();
1993  ExitCall->eraseFromParent();
1994  // Discard finalization if we have it.
1995  if (HasFinalize) {
1996  assert(!FinalizationStack.empty() &&
1997  "Unexpected finalization stack state!");
1998  FinalizationStack.pop_back();
1999  }
2000  } else {
2001  // emit exit call and do any needed finalization.
2002  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2003  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
2004  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
2005  "Unexpected control flow graph state!!");
2006  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2007  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
2008  "Unexpected Control Flow State!");
2009  MergeBlockIntoPredecessor(FiniBB);
2010  }
2011 
2012  // If we are skipping the region of a non conditional, remove the exit
2013  // block, and clear the builder's insertion point.
2014  assert(SplitPos->getParent() == ExitBB &&
2015  "Unexpected Insertion point location!");
2016  if (!Conditional && SkipEmittingRegion) {
2017  ExitBB->eraseFromParent();
2018  Builder.ClearInsertionPoint();
2019  } else {
2020  auto merged = MergeBlockIntoPredecessor(ExitBB);
2021  BasicBlock *ExitPredBB = SplitPos->getParent();
2022  auto InsertBB = merged ? ExitPredBB : ExitBB;
2023  if (!isa_and_nonnull<BranchInst>(SplitPos))
2024  SplitPos->eraseFromParent();
2025  Builder.SetInsertPoint(InsertBB);
2026  }
2027 
2028  return Builder.saveIP();
2029 }
2030 
2031 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2032  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2033  // if nothing to do, Return current insertion point.
2034  if (!Conditional || !EntryCall)
2035  return Builder.saveIP();
2036 
2037  BasicBlock *EntryBB = Builder.GetInsertBlock();
2038  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2039  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2040  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2041 
2042  // Emit thenBB and set the Builder's insertion point there for
2043  // body generation next. Place the block after the current block.
2044  Function *CurFn = EntryBB->getParent();
2045  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2046 
2047  // Move Entry branch to end of ThenBB, and replace with conditional
2048  // branch (If-stmt)
2049  Instruction *EntryBBTI = EntryBB->getTerminator();
2050  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2051  EntryBBTI->removeFromParent();
2052  Builder.SetInsertPoint(UI);
2053  Builder.Insert(EntryBBTI);
2054  UI->eraseFromParent();
2055  Builder.SetInsertPoint(ThenBB->getTerminator());
2056 
2057  // return an insertion point to ExitBB.
2058  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2059 }
2060 
2061 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2062  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2063  bool HasFinalize) {
2064 
2065  Builder.restoreIP(FinIP);
2066 
2067  // If there is finalization to do, emit it before the exit call
2068  if (HasFinalize) {
2069  assert(!FinalizationStack.empty() &&
2070  "Unexpected finalization stack state!");
2071 
2072  FinalizationInfo Fi = FinalizationStack.pop_back_val();
2073  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
2074 
2075  Fi.FiniCB(FinIP);
2076 
2077  BasicBlock *FiniBB = FinIP.getBlock();
2078  Instruction *FiniBBTI = FiniBB->getTerminator();
2079 
2080  // set Builder IP for call creation
2081  Builder.SetInsertPoint(FiniBBTI);
2082  }
2083 
2084  if (!ExitCall)
2085  return Builder.saveIP();
2086 
2087  // place the Exitcall as last instruction before Finalization block terminator
2088  ExitCall->removeFromParent();
2089  Builder.Insert(ExitCall);
2090 
2091  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2092  ExitCall->getIterator());
2093 }
2094 
2096  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2097  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2098  if (!IP.isSet())
2099  return IP;
2100 
2102 
2103  // creates the following CFG structure
2104  // OMP_Entry : (MasterAddr != PrivateAddr)?
2105  // F T
2106  // | \
2107  // | copin.not.master
2108  // | /
2109  // v /
2110  // copyin.not.master.end
2111  // |
2112  // v
2113  // OMP.Entry.Next
2114 
2115  BasicBlock *OMP_Entry = IP.getBlock();
2116  Function *CurFn = OMP_Entry->getParent();
2117  BasicBlock *CopyBegin =
2118  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2119  BasicBlock *CopyEnd = nullptr;
2120 
2121  // If entry block is terminated, split to preserve the branch to following
2122  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2123  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2124  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2125  "copyin.not.master.end");
2126  OMP_Entry->getTerminator()->eraseFromParent();
2127  } else {
2128  CopyEnd =
2129  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2130  }
2131 
2132  Builder.SetInsertPoint(OMP_Entry);
2133  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2134  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2135  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2136  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2137 
2138  Builder.SetInsertPoint(CopyBegin);
2139  if (BranchtoEnd)
2140  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2141 
2142  return Builder.saveIP();
2143 }
2144 
2147  std::string Name) {
2149  Builder.restoreIP(Loc.IP);
2150 
2151  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2152  Value *Ident = getOrCreateIdent(SrcLocStr);
2153  Value *ThreadId = getOrCreateThreadID(Ident);
2154  Value *Args[] = {ThreadId, Size, Allocator};
2155 
2156  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2157 
2158  return Builder.CreateCall(Fn, Args, Name);
2159 }
2160 
2163  std::string Name) {
2165  Builder.restoreIP(Loc.IP);
2166 
2167  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2168  Value *Ident = getOrCreateIdent(SrcLocStr);
2169  Value *ThreadId = getOrCreateThreadID(Ident);
2170  Value *Args[] = {ThreadId, Addr, Allocator};
2171  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2172  return Builder.CreateCall(Fn, Args, Name);
2173 }
2174 
2176  const LocationDescription &Loc, llvm::Value *Pointer,
2179  Builder.restoreIP(Loc.IP);
2180 
2181  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2182  Value *Ident = getOrCreateIdent(SrcLocStr);
2183  Value *ThreadId = getOrCreateThreadID(Ident);
2184  Constant *ThreadPrivateCache =
2185  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2186  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2187 
2188  Function *Fn =
2189  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2190 
2191  return Builder.CreateCall(Fn, Args);
2192 }
2193 
2194 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2195  StringRef FirstSeparator,
2196  StringRef Separator) {
2197  SmallString<128> Buffer;
2198  llvm::raw_svector_ostream OS(Buffer);
2199  StringRef Sep = FirstSeparator;
2200  for (StringRef Part : Parts) {
2201  OS << Sep << Part;
2202  Sep = Separator;
2203  }
2204  return OS.str().str();
2205 }
2206 
2207 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2208  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2209  // TODO: Replace the twine arg with stringref to get rid of the conversion
2210  // logic. However This is taken from current implementation in clang as is.
2211  // Since this method is used in many places exclusively for OMP internal use
2212  // we will keep it as is for temporarily until we move all users to the
2213  // builder and then, if possible, fix it everywhere in one go.
2214  SmallString<256> Buffer;
2215  llvm::raw_svector_ostream Out(Buffer);
2216  Out << Name;
2217  StringRef RuntimeName = Out.str();
2218  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2219  if (Elem.second) {
2220  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2221  "OMP internal variable has different type than requested");
2222  } else {
2223  // TODO: investigate the appropriate linkage type used for the global
2224  // variable for possibly changing that to internal or private, or maybe
2225  // create different versions of the function for different OMP internal
2226  // variables.
2227  Elem.second = new llvm::GlobalVariable(
2228  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2229  llvm::Constant::getNullValue(Ty), Elem.first(),
2230  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2231  AddressSpace);
2232  }
2233 
2234  return Elem.second;
2235 }
2236 
2237 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
2238  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2239  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
2240  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
2241 }
2242 
2245  std::string VarName) {
2246  llvm::Constant *MaptypesArrayInit =
2247  llvm::ConstantDataArray::get(M.getContext(), Mappings);
2248  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
2249  M, MaptypesArrayInit->getType(),
2250  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
2251  VarName);
2252  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2253  return MaptypesArrayGlobal;
2254 }
2255 
2256 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2257  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
2260  "Unexpected Atomic Ordering.");
2261 
2262  bool Flush = false;
2264 
2265  switch (AK) {
2266  case Read:
2269  FlushAO = AtomicOrdering::Acquire;
2270  Flush = true;
2271  }
2272  break;
2273  case Write:
2274  case Update:
2277  FlushAO = AtomicOrdering::Release;
2278  Flush = true;
2279  }
2280  break;
2281  case Capture:
2282  switch (AO) {
2284  FlushAO = AtomicOrdering::Acquire;
2285  Flush = true;
2286  break;
2288  FlushAO = AtomicOrdering::Release;
2289  Flush = true;
2290  break;
2294  Flush = true;
2295  break;
2296  default:
2297  // do nothing - leave silently.
2298  break;
2299  }
2300  }
2301 
2302  if (Flush) {
2303  // Currently Flush RT call still doesn't take memory_ordering, so for when
2304  // that happens, this tries to do the resolution of which atomic ordering
2305  // to use with but issue the flush call
2306  // TODO: pass `FlushAO` after memory ordering support is added
2307  (void)FlushAO;
2308  emitFlush(Loc);
2309  }
2310 
2311  // for AO == AtomicOrdering::Monotonic and all other case combinations
2312  // do nothing
2313  return Flush;
2314 }
2315 
2319  AtomicOrdering AO) {
2320  if (!updateToLocation(Loc))
2321  return Loc.IP;
2322 
2323  Type *XTy = X.Var->getType();
2324  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2325  Type *XElemTy = XTy->getPointerElementType();
2326  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
2327  XElemTy->isPointerTy()) &&
2328  "OMP atomic read expected a scalar type");
2329 
2330  Value *XRead = nullptr;
2331 
2332  if (XElemTy->isIntegerTy()) {
2333  LoadInst *XLD =
2334  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
2335  XLD->setAtomic(AO);
2336  XRead = cast<Value>(XLD);
2337  } else {
2338  // We need to bitcast and perform atomic op as integer
2339  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
2340  IntegerType *IntCastTy =
2341  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2342  Value *XBCast = Builder.CreateBitCast(
2343  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
2344  LoadInst *XLoad =
2345  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
2346  XLoad->setAtomic(AO);
2347  if (XElemTy->isFloatingPointTy()) {
2348  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
2349  } else {
2350  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
2351  }
2352  }
2353  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
2354  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
2355  return Builder.saveIP();
2356 }
2357 
2360  AtomicOpValue &X, Value *Expr,
2361  AtomicOrdering AO) {
2362  if (!updateToLocation(Loc))
2363  return Loc.IP;
2364 
2365  Type *XTy = X.Var->getType();
2366  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2367  Type *XElemTy = XTy->getPointerElementType();
2368  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
2369  XElemTy->isPointerTy()) &&
2370  "OMP atomic write expected a scalar type");
2371 
2372  if (XElemTy->isIntegerTy()) {
2373  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
2374  XSt->setAtomic(AO);
2375  } else {
2376  // We need to bitcast and perform atomic op as integers
2377  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
2378  IntegerType *IntCastTy =
2379  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2380  Value *XBCast = Builder.CreateBitCast(
2381  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
2382  Value *ExprCast =
2383  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
2384  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
2385  XSt->setAtomic(AO);
2386  }
2387 
2388  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
2389  return Builder.saveIP();
2390 }
2391 
2393  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
2394  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
2395  AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
2396  if (!updateToLocation(Loc))
2397  return Loc.IP;
2398 
2399  LLVM_DEBUG({
2400  Type *XTy = X.Var->getType();
2401  assert(XTy->isPointerTy() &&
2402  "OMP Atomic expects a pointer to target memory");
2403  Type *XElemTy = XTy->getPointerElementType();
2404  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
2405  XElemTy->isPointerTy()) &&
2406  "OMP atomic update expected a scalar type");
2407  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
2408  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
2409  "OpenMP atomic does not support LT or GT operations");
2410  });
2411 
2412  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
2413  IsXLHSInRHSPart);
2414  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
2415  return Builder.saveIP();
2416 }
2417 
2418 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2419  AtomicRMWInst::BinOp RMWOp) {
2420  switch (RMWOp) {
2421  case AtomicRMWInst::Add:
2422  return Builder.CreateAdd(Src1, Src2);
2423  case AtomicRMWInst::Sub:
2424  return Builder.CreateSub(Src1, Src2);
2425  case AtomicRMWInst::And:
2426  return Builder.CreateAnd(Src1, Src2);
2427  case AtomicRMWInst::Nand:
2428  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
2429  case AtomicRMWInst::Or:
2430  return Builder.CreateOr(Src1, Src2);
2431  case AtomicRMWInst::Xor:
2432  return Builder.CreateXor(Src1, Src2);
2433  case AtomicRMWInst::Xchg:
2434  case AtomicRMWInst::FAdd:
2435  case AtomicRMWInst::FSub:
2437  case AtomicRMWInst::Max:
2438  case AtomicRMWInst::Min:
2439  case AtomicRMWInst::UMax:
2440  case AtomicRMWInst::UMin:
2441  llvm_unreachable("Unsupported atomic update operation");
2442  }
2443  llvm_unreachable("Unsupported atomic update operation");
2444 }
2445 
2446 std::pair<Value *, Value *>
2447 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
2449  AtomicUpdateCallbackTy &UpdateOp,
2450  bool VolatileX, bool IsXLHSInRHSPart) {
2451  Type *XElemTy = X->getType()->getPointerElementType();
2452 
2453  bool DoCmpExch =
2454  ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
2455  (RMWOp == AtomicRMWInst::FSub) ||
2456  (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
2457 
2458  std::pair<Value *, Value *> Res;
2459  if (XElemTy->isIntegerTy() && !DoCmpExch) {
2460  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
2461  // not needed except in case of postfix captures. Generate anyway for
2462  // consistency with the else part. Will be removed with any DCE pass.
2463  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
2464  } else {
2465  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
2466  IntegerType *IntCastTy =
2467  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2468  Value *XBCast =
2469  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
2470  LoadInst *OldVal =
2471  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
2472  OldVal->setAtomic(AO);
2473  // CurBB
2474  // | /---\
2475  // ContBB |
2476  // | \---/
2477  // ExitBB
2478  BasicBlock *CurBB = Builder.GetInsertBlock();
2479  Instruction *CurBBTI = CurBB->getTerminator();
2480  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
2481  BasicBlock *ExitBB =
2482  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
2483  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
2484  X->getName() + ".atomic.cont");
2485  ContBB->getTerminator()->eraseFromParent();
2486  Builder.SetInsertPoint(ContBB);
2487  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
2488  PHI->addIncoming(OldVal, CurBB);
2489  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
2490  NewAtomicAddr->setName(X->getName() + "x.new.val");
2491  NewAtomicAddr->moveBefore(AllocIP);
2492  IntegerType *NewAtomicCastTy =
2493  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
2494  bool IsIntTy = XElemTy->isIntegerTy();
2495  Value *NewAtomicIntAddr =
2496  (IsIntTy)
2497  ? NewAtomicAddr
2498  : Builder.CreateBitCast(NewAtomicAddr,
2499  NewAtomicCastTy->getPointerTo(Addrspace));
2500  Value *OldExprVal = PHI;
2501  if (!IsIntTy) {
2502  if (XElemTy->isFloatingPointTy()) {
2503  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
2504  X->getName() + ".atomic.fltCast");
2505  } else {
2506  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
2507  X->getName() + ".atomic.ptrCast");
2508  }
2509  }
2510 
2511  Value *Upd = UpdateOp(OldExprVal, Builder);
2512  Builder.CreateStore(Upd, NewAtomicAddr);
2513  LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
2514  Value *XAddr =
2515  (IsIntTy)
2516  ? X
2517  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
2520  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
2521  XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
2522  Result->setVolatile(VolatileX);
2523  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
2524  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
2525  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
2526  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
2527 
2528  Res.first = OldExprVal;
2529  Res.second = Upd;
2530 
2531  // set Insertion point in exit block
2532  if (UnreachableInst *ExitTI =
2533  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
2534  CurBBTI->eraseFromParent();
2535  Builder.SetInsertPoint(ExitBB);
2536  } else {
2537  Builder.SetInsertPoint(ExitTI);
2538  }
2539  }
2540 
2541  return Res;
2542 }
2543 
2545  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
2546  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
2548  bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
2549  if (!updateToLocation(Loc))
2550  return Loc.IP;
2551 
2552  LLVM_DEBUG({
2553  Type *XTy = X.Var->getType();
2554  assert(XTy->isPointerTy() &&
2555  "OMP Atomic expects a pointer to target memory");
2556  Type *XElemTy = XTy->getPointerElementType();
2557  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
2558  XElemTy->isPointerTy()) &&
2559  "OMP atomic capture expected a scalar type");
2560  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
2561  "OpenMP atomic does not support LT or GT operations");
2562  });
2563 
2564  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
2565  // 'x' is simply atomically rewritten with 'expr'.
2566  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
2567  std::pair<Value *, Value *> Result =
2568  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
2569  X.IsVolatile, IsXLHSInRHSPart);
2570 
2571  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
2572  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
2573 
2574  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
2575  return Builder.saveIP();
2576 }
2577 
2580  std::string VarName) {
2581  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
2583  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
2584  Names);
2585  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
2586  M, MapNamesArrayInit->getType(),
2587  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
2588  VarName);
2589  return MapNamesArrayGlobal;
2590 }
2591 
2592 // Create all simple and struct types exposed by the runtime and remember
2593 // the llvm::PointerTypes of them for easy access later.
2594 void OpenMPIRBuilder::initializeTypes(Module &M) {
2595  LLVMContext &Ctx = M.getContext();
2596  StructType *T;
2597 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
2598 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2599  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
2600  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
2601 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2602  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
2603  VarName##Ptr = PointerType::getUnqual(VarName);
2604 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
2605  T = StructType::getTypeByName(Ctx, StructName); \
2606  if (!T) \
2607  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
2608  VarName = T; \
2609  VarName##Ptr = PointerType::getUnqual(T);
2610 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2611 }
2612 
2615  SmallVectorImpl<BasicBlock *> &BlockVector) {
2617  BlockSet.insert(EntryBB);
2618  BlockSet.insert(ExitBB);
2619 
2620  Worklist.push_back(EntryBB);
2621  while (!Worklist.empty()) {
2622  BasicBlock *BB = Worklist.pop_back_val();
2623  BlockVector.push_back(BB);
2624  for (BasicBlock *SuccBB : successors(BB))
2625  if (BlockSet.insert(SuccBB).second)
2626  Worklist.push_back(SuccBB);
2627  }
2628 }
2629 
2630 void CanonicalLoopInfo::collectControlBlocks(
2632  // We only count those BBs as control block for which we do not need to
2633  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
2634  // flow. For consistency, this also means we do not add the Body block, which
2635  // is just the entry to the body code.
2636  BBs.reserve(BBs.size() + 6);
2637  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
2638 }
2639 
2641 #ifndef NDEBUG
2642  if (!IsValid)
2643  return;
2644 
2645  // Verify standard control-flow we use for OpenMP loops.
2646  assert(Preheader);
2647  assert(isa<BranchInst>(Preheader->getTerminator()) &&
2648  "Preheader must terminate with unconditional branch");
2649  assert(Preheader->getSingleSuccessor() == Header &&
2650  "Preheader must jump to header");
2651 
2652  assert(Header);
2653  assert(isa<BranchInst>(Header->getTerminator()) &&
2654  "Header must terminate with unconditional branch");
2655  assert(Header->getSingleSuccessor() == Cond &&
2656  "Header must jump to exiting block");
2657 
2658  assert(Cond);
2659  assert(Cond->getSinglePredecessor() == Header &&
2660  "Exiting block only reachable from header");
2661 
2662  assert(isa<BranchInst>(Cond->getTerminator()) &&
2663  "Exiting block must terminate with conditional branch");
2664  assert(size(successors(Cond)) == 2 &&
2665  "Exiting block must have two successors");
2666  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
2667  "Exiting block's first successor jump to the body");
2668  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
2669  "Exiting block's second successor must exit the loop");
2670 
2671  assert(Body);
2672  assert(Body->getSinglePredecessor() == Cond &&
2673  "Body only reachable from exiting block");
2674  assert(!isa<PHINode>(Body->front()));
2675 
2676  assert(Latch);
2677  assert(isa<BranchInst>(Latch->getTerminator()) &&
2678  "Latch must terminate with unconditional branch");
2679  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
2680  // TODO: To support simple redirecting of the end of the body code that has
2681  // multiple; introduce another auxiliary basic block like preheader and after.
2682  assert(Latch->getSinglePredecessor() != nullptr);
2683  assert(!isa<PHINode>(Latch->front()));
2684 
2685  assert(Exit);
2686  assert(isa<BranchInst>(Exit->getTerminator()) &&
2687  "Exit block must terminate with unconditional branch");
2688  assert(Exit->getSingleSuccessor() == After &&
2689  "Exit block must jump to after block");
2690 
2691  assert(After);
2692  assert(After->getSinglePredecessor() == Exit &&
2693  "After block only reachable from exit block");
2694  assert(After->empty() || !isa<PHINode>(After->front()));
2695 
2696  Instruction *IndVar = getIndVar();
2697  assert(IndVar && "Canonical induction variable not found?");
2698  assert(isa<IntegerType>(IndVar->getType()) &&
2699  "Induction variable must be an integer");
2700  assert(cast<PHINode>(IndVar)->getParent() == Header &&
2701  "Induction variable must be a PHI in the loop header");
2702  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
2703  assert(
2704  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
2705  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
2706 
2707  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
2708  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
2709  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
2710  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
2711  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
2712  ->isOne());
2713 
2714  Value *TripCount = getTripCount();
2715  assert(TripCount && "Loop trip count not found?");
2716  assert(IndVar->getType() == TripCount->getType() &&
2717  "Trip count and induction variable must have the same type");
2718 
2719  auto *CmpI = cast<CmpInst>(&Cond->front());
2720  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
2721  "Exit condition must be a signed less-than comparison");
2722  assert(CmpI->getOperand(0) == IndVar &&
2723  "Exit condition must compare the induction variable");
2724  assert(CmpI->getOperand(1) == TripCount &&
2725  "Exit condition must compare with the trip count");
2726 #endif
2727 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1180
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:2175
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:150
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:602
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:990
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:1929
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1157
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1072
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
llvm::Function::empty
bool empty() const
Definition: Function.h:784
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:755
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:494
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1325
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:754
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1570
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:125
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:740
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1689
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1905
llvm::OpenMPIRBuilder::createWorkshareLoop
CanonicalLoopInfo * createWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1358
llvm::OpenMPIRBuilder::getLanemaskType
Type * getLanemaskType()
Definition: OMPIRBuilder.cpp:263
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:762
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:457
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2604
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:328
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2220
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:94
Error.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1562
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:327
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:826
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:952
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1190
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:381
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
CodeExtractor.h
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:752
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:953
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:286
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1306
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::OpenMPIRBuilder::createAtomicCapture
InsertPointTy createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
Definition: OMPIRBuilder.cpp:2544
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:195
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:875
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1198
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1568
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2032
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1170
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:141
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
CommandLine.h
llvm::OpenMPIRBuilder::createDynamicWorkshareLoop
InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a dynamically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1397
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:740
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:308
llvm::OpenMPIRBuilder::getOrCreateIdent
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:230
OMPIRBuilder.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:647
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:2640
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1137
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:2161
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:748
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1529
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:335
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:2317
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
IP
Definition: NVPTXLowerArgs.cpp:166
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:226
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:646
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:750
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1266
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1262
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:361
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:898
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1185
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:752
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1537
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:742
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:857
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:2392
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:2145
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:41
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:744
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:602
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:249
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:886
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:762
llvm::cl::opt< bool >
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:1129
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:2244
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:955
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:892
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:746
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2750
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3091
DebugInfo.h
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::DebugLoc::get
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:21
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:440
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1144
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:758
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:752
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:479
llvm::Triple::isAMDGCN
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Definition: Triple.h:707
llvm::omp::OMPScheduleType::Static
@ Static
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:158
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:598
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:132
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1025
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1439
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:851
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1312
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:135
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:599
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1103
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:751
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1080
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:173
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:212
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:747
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:240
llvm::OpenMPIRBuilder::createOffloadMapnames
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Definition: OMPIRBuilder.cpp:2579
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:690
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:517
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:767
uint32_t
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3053
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:585
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
getKmpcForDynamicNextForType
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
Definition: OMPIRBuilder.cpp:1386
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:601
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:252
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::OpenMPIRBuilder::createMasked
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Definition: OMPIRBuilder.cpp:1049
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:1150
llvm::AtomicOrdering::Release
@ Release
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::OpenMPIRBuilder::createStaticWorkshareLoop
CanonicalLoopInfo * createStaticWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1273
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:869
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1512
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:198
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:346
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:768
llvm::OpenMPIRBuilder::createAtomicWrite
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
Definition: OMPIRBuilder.cpp:2359
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:748
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:390
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:728
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:175
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1261
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:584
llvm::CanonicalLoopInfo::getHeader
BasicBlock * getHeader() const
The header is the entry for each iteration.
Definition: OMPIRBuilder.h:1133
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:271
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
getKmpcForDynamicInitForType
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
Definition: OMPIRBuilder.cpp:1370
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:139
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:1147
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:378
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::remarks::Type::Failure
@ Failure
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:765
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:647
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:343
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::PHINode
Definition: Instructions.h:2600
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:367
llvm::SmallVectorImpl< uint64_t >
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:772
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:619
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:1162
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:269
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:843
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:2613
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4680
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:525
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3179
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4054
llvm::cl::desc
Definition: CommandLine.h:414
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:1895
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:760
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:2095
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:1874
llvm::SetVector< Value * >
llvm::omp::OMPScheduleType
OMPScheduleType
Definition: OMPConstants.h:113
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:623
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:74
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:1142
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:676
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:522
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1251
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:102
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:907
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:157
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:756