LLVM  14.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Triple.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/MDBuilder.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/MC/TargetRegistry.h"
31 #include "llvm/Support/Error.h"
39 
40 #include <sstream>
41 
42 #define DEBUG_TYPE "openmp-ir-builder"
43 
44 using namespace llvm;
45 using namespace omp;
46 
47 static cl::opt<bool>
48  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
49  cl::desc("Use optimistic attributes describing "
50  "'as-if' properties of runtime calls."),
51  cl::init(false));
52 
54  "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
55  cl::desc("Factor for the unroll threshold to account for code "
56  "simplifications still taking place"),
57  cl::init(1.5));
58 
60  LLVMContext &Ctx = Fn.getContext();
61 
62  // Get the function's current attributes.
63  auto Attrs = Fn.getAttributes();
64  auto FnAttrs = Attrs.getFnAttrs();
65  auto RetAttrs = Attrs.getRetAttrs();
67  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
68  ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
69 
70 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
71 #include "llvm/Frontend/OpenMP/OMPKinds.def"
72 
73  // Add attributes to the function declaration.
74  switch (FnID) {
75 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
76  case Enum: \
77  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
78  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
79  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
80  ArgAttrs[ArgNo] = \
81  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
82  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
83  break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85  default:
86  // Attributes are optional.
87  break;
88  }
89 }
90 
93  FunctionType *FnTy = nullptr;
94  Function *Fn = nullptr;
95 
96  // Try to find the declation in the module first.
97  switch (FnID) {
98 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
99  case Enum: \
100  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
101  IsVarArg); \
102  Fn = M.getFunction(Str); \
103  break;
104 #include "llvm/Frontend/OpenMP/OMPKinds.def"
105  }
106 
107  if (!Fn) {
108  // Create a new declaration if we need one.
109  switch (FnID) {
110 #define OMP_RTL(Enum, Str, ...) \
111  case Enum: \
112  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
113  break;
114 #include "llvm/Frontend/OpenMP/OMPKinds.def"
115  }
116 
117  // Add information if the runtime function takes a callback function
118  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
119  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
120  LLVMContext &Ctx = Fn->getContext();
121  MDBuilder MDB(Ctx);
122  // Annotate the callback behavior of the runtime function:
123  // - The callback callee is argument number 2 (microtask).
124  // - The first two arguments of the callback callee are unknown (-1).
125  // - All variadic arguments to the runtime function are passed to the
126  // callback callee.
127  Fn->addMetadata(
128  LLVMContext::MD_callback,
130  2, {-1, -1}, /* VarArgsArePassed */ true)}));
131  }
132  }
133 
134  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
135  << " with type " << *Fn->getFunctionType() << "\n");
136  addAttributes(FnID, *Fn);
137 
138  } else {
139  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
140  << " with type " << *Fn->getFunctionType() << "\n");
141  }
142 
143  assert(Fn && "Failed to create OpenMP runtime function");
144 
145  // Cast the function to the expected type if necessary
147  return {FnTy, C};
148 }
149 
151  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
152  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
153  assert(Fn && "Failed to create OpenMP runtime function pointer");
154  return Fn;
155 }
156 
157 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
158 
159 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
160  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
162  SmallVector<OutlineInfo, 16> DeferredOutlines;
163  for (OutlineInfo &OI : OutlineInfos) {
164  // Skip functions that have not finalized yet; may happen with nested
165  // function generation.
166  if (Fn && OI.getFunction() != Fn) {
167  DeferredOutlines.push_back(OI);
168  continue;
169  }
170 
171  ParallelRegionBlockSet.clear();
172  Blocks.clear();
173  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
174 
175  Function *OuterFn = OI.getFunction();
176  CodeExtractorAnalysisCache CEAC(*OuterFn);
177  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
178  /* AggregateArgs */ false,
179  /* BlockFrequencyInfo */ nullptr,
180  /* BranchProbabilityInfo */ nullptr,
181  /* AssumptionCache */ nullptr,
182  /* AllowVarArgs */ true,
183  /* AllowAlloca */ true,
184  /* Suffix */ ".omp_par");
185 
186  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
187  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
188  << " Exit: " << OI.ExitBB->getName() << "\n");
189  assert(Extractor.isEligible() &&
190  "Expected OpenMP outlining to be possible!");
191 
192  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
193 
194  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
195  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
196  assert(OutlinedFn->getReturnType()->isVoidTy() &&
197  "OpenMP outlined functions should not return a value!");
198 
199  // For compability with the clang CG we move the outlined function after the
200  // one with the parallel region.
201  OutlinedFn->removeFromParent();
202  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
203 
204  // Remove the artificial entry introduced by the extractor right away, we
205  // made our own entry block after all.
206  {
207  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
208  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
209  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
210  if (AllowExtractorSinking) {
211  // Move instructions from the to-be-deleted ArtificialEntry to the entry
212  // basic block of the parallel region. CodeExtractor may have sunk
213  // allocas/bitcasts for values that are solely used in the outlined
214  // region and do not escape.
215  assert(!ArtificialEntry.empty() &&
216  "Expected instructions to sink in the outlined region");
217  for (BasicBlock::iterator It = ArtificialEntry.begin(),
218  End = ArtificialEntry.end();
219  It != End;) {
220  Instruction &I = *It;
221  It++;
222 
223  if (I.isTerminator())
224  continue;
225 
226  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
227  }
228  }
229  OI.EntryBB->moveBefore(&ArtificialEntry);
230  ArtificialEntry.eraseFromParent();
231  }
232  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
233  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
234 
235  // Run a user callback, e.g. to add attributes.
236  if (OI.PostOutlineCB)
237  OI.PostOutlineCB(*OutlinedFn);
238  }
239 
240  // Remove work items that have been completed.
241  OutlineInfos = std::move(DeferredOutlines);
242 }
243 
245  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
246 }
247 
249  IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
250  auto *GV =
251  new GlobalVariable(M, I32Ty,
252  /* isConstant = */ true, GlobalValue::WeakODRLinkage,
253  ConstantInt::get(I32Ty, Value), Name);
254 
255  return GV;
256 }
257 
259  IdentFlag LocFlags,
260  unsigned Reserve2Flags) {
261  // Enable "C-mode".
262  LocFlags |= OMP_IDENT_FLAG_KMPC;
263 
264  Value *&Ident =
265  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
266  if (!Ident) {
267  Constant *I32Null = ConstantInt::getNullValue(Int32);
268  Constant *IdentData[] = {
269  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
270  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
271  Constant *Initializer =
272  ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
273 
274  // Look for existing encoding of the location + flags, not needed but
275  // minimizes the difference to the existing solution while we transition.
276  for (GlobalVariable &GV : M.getGlobalList())
277  if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
278  if (GV.getInitializer() == Initializer)
279  Ident = &GV;
280 
281  if (!Ident) {
282  auto *GV = new GlobalVariable(
283  M, OpenMPIRBuilder::Ident,
284  /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
286  M.getDataLayout().getDefaultGlobalsAddressSpace());
287  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
288  GV->setAlignment(Align(8));
289  Ident = GV;
290  }
291  }
292 
293  return Builder.CreatePointerCast(Ident, IdentPtr);
294 }
295 
297  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
298  if (!SrcLocStr) {
299  Constant *Initializer =
300  ConstantDataArray::getString(M.getContext(), LocStr);
301 
302  // Look for existing encoding of the location, not needed but minimizes the
303  // difference to the existing solution while we transition.
304  for (GlobalVariable &GV : M.getGlobalList())
305  if (GV.isConstant() && GV.hasInitializer() &&
306  GV.getInitializer() == Initializer)
307  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
308 
309  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
310  /* AddressSpace */ 0, &M);
311  }
312  return SrcLocStr;
313 }
314 
316  StringRef FileName,
317  unsigned Line,
318  unsigned Column) {
319  SmallString<128> Buffer;
320  Buffer.push_back(';');
321  Buffer.append(FileName);
322  Buffer.push_back(';');
323  Buffer.append(FunctionName);
324  Buffer.push_back(';');
325  Buffer.append(std::to_string(Line));
326  Buffer.push_back(';');
327  Buffer.append(std::to_string(Column));
328  Buffer.push_back(';');
329  Buffer.push_back(';');
330  return getOrCreateSrcLocStr(Buffer.str());
331 }
332 
334  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
335 }
336 
338  DILocation *DIL = DL.get();
339  if (!DIL)
340  return getOrCreateDefaultSrcLocStr();
341  StringRef FileName = M.getName();
342  if (DIFile *DIF = DIL->getFile())
343  if (Optional<StringRef> Source = DIF->getSource())
344  FileName = *Source;
345  StringRef Function = DIL->getScope()->getSubprogram()->getName();
346  if (Function.empty() && F)
347  Function = F->getName();
348  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
349  DIL->getColumn());
350 }
351 
352 Constant *
354  return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
355 }
356 
358  return Builder.CreateCall(
359  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
360  "omp_global_thread_num");
361 }
362 
365  bool ForceSimpleCall, bool CheckCancelFlag) {
366  if (!updateToLocation(Loc))
367  return Loc.IP;
368  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
369 }
370 
373  bool ForceSimpleCall, bool CheckCancelFlag) {
374  // Build call __kmpc_cancel_barrier(loc, thread_id) or
375  // __kmpc_barrier(loc, thread_id);
376 
377  IdentFlag BarrierLocFlags;
378  switch (Kind) {
379  case OMPD_for:
380  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
381  break;
382  case OMPD_sections:
383  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
384  break;
385  case OMPD_single:
386  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
387  break;
388  case OMPD_barrier:
389  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
390  break;
391  default:
392  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
393  break;
394  }
395 
396  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
397  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
398  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
399 
400  // If we are in a cancellable parallel region, barriers are cancellation
401  // points.
402  // TODO: Check why we would force simple calls or to ignore the cancel flag.
403  bool UseCancelBarrier =
404  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
405 
406  Value *Result =
407  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
408  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
409  : OMPRTL___kmpc_barrier),
410  Args);
411 
412  if (UseCancelBarrier && CheckCancelFlag)
413  emitCancelationCheckImpl(Result, OMPD_parallel);
414 
415  return Builder.saveIP();
416 }
417 
420  Value *IfCondition,
421  omp::Directive CanceledDirective) {
422  if (!updateToLocation(Loc))
423  return Loc.IP;
424 
425  // LLVM utilities like blocks with terminators.
426  auto *UI = Builder.CreateUnreachable();
427 
428  Instruction *ThenTI = UI, *ElseTI = nullptr;
429  if (IfCondition)
430  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
431  Builder.SetInsertPoint(ThenTI);
432 
433  Value *CancelKind = nullptr;
434  switch (CanceledDirective) {
435 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
436  case DirectiveEnum: \
437  CancelKind = Builder.getInt32(Value); \
438  break;
439 #include "llvm/Frontend/OpenMP/OMPKinds.def"
440  default:
441  llvm_unreachable("Unknown cancel kind!");
442  }
443 
444  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
445  Value *Ident = getOrCreateIdent(SrcLocStr);
446  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
447  Value *Result = Builder.CreateCall(
448  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
449  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
450  if (CanceledDirective == OMPD_parallel) {
452  Builder.restoreIP(IP);
453  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
454  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
455  /* CheckCancelFlag */ false);
456  }
457  };
458 
459  // The actual cancel logic is shared with others, e.g., cancel_barriers.
460  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
461 
462  // Update the insertion point and remove the terminator we introduced.
463  Builder.SetInsertPoint(UI->getParent());
464  UI->eraseFromParent();
465 
466  return Builder.saveIP();
467 }
468 
470  omp::Directive CanceledDirective,
471  FinalizeCallbackTy ExitCB) {
472  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
473  "Unexpected cancellation!");
474 
475  // For a cancel barrier we create two new blocks.
476  BasicBlock *BB = Builder.GetInsertBlock();
477  BasicBlock *NonCancellationBlock;
478  if (Builder.GetInsertPoint() == BB->end()) {
479  // TODO: This branch will not be needed once we moved to the
480  // OpenMPIRBuilder codegen completely.
481  NonCancellationBlock = BasicBlock::Create(
482  BB->getContext(), BB->getName() + ".cont", BB->getParent());
483  } else {
484  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
485  BB->getTerminator()->eraseFromParent();
486  Builder.SetInsertPoint(BB);
487  }
488  BasicBlock *CancellationBlock = BasicBlock::Create(
489  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
490 
491  // Jump to them based on the return value.
492  Value *Cmp = Builder.CreateIsNull(CancelFlag);
493  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
494  /* TODO weight */ nullptr, nullptr);
495 
496  // From the cancellation block we finalize all variables and go to the
497  // post finalization block that is known to the FiniCB callback.
498  Builder.SetInsertPoint(CancellationBlock);
499  if (ExitCB)
500  ExitCB(Builder.saveIP());
501  auto &FI = FinalizationStack.back();
502  FI.FiniCB(Builder.saveIP());
503 
504  // The continuation block is where code generation continues.
505  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
506 }
507 
509  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
510  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
511  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
512  omp::ProcBindKind ProcBind, bool IsCancellable) {
513  if (!updateToLocation(Loc))
514  return Loc.IP;
515 
516  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
517  Value *Ident = getOrCreateIdent(SrcLocStr);
518  Value *ThreadID = getOrCreateThreadID(Ident);
519 
520  if (NumThreads) {
521  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
522  Value *Args[] = {
523  Ident, ThreadID,
524  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
525  Builder.CreateCall(
526  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
527  }
528 
529  if (ProcBind != OMP_PROC_BIND_default) {
530  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
531  Value *Args[] = {
532  Ident, ThreadID,
533  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
534  Builder.CreateCall(
535  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
536  }
537 
538  BasicBlock *InsertBB = Builder.GetInsertBlock();
539  Function *OuterFn = InsertBB->getParent();
540 
541  // Save the outer alloca block because the insertion iterator may get
542  // invalidated and we still need this later.
543  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
544 
545  // Vector to remember instructions we used only during the modeling but which
546  // we want to delete at the end.
547  SmallVector<Instruction *, 4> ToBeDeleted;
548 
549  // Change the location to the outer alloca insertion point to create and
550  // initialize the allocas we pass into the parallel region.
551  Builder.restoreIP(OuterAllocaIP);
552  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
553  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
554 
555  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
556  // program, otherwise we only need them for modeling purposes to get the
557  // associated arguments in the outlined function. In the former case,
558  // initialize the allocas properly, in the latter case, delete them later.
559  if (IfCondition) {
560  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
561  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
562  } else {
563  ToBeDeleted.push_back(TIDAddr);
564  ToBeDeleted.push_back(ZeroAddr);
565  }
566 
567  // Create an artificial insertion point that will also ensure the blocks we
568  // are about to split are not degenerated.
569  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
570 
571  Instruction *ThenTI = UI, *ElseTI = nullptr;
572  if (IfCondition)
573  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
574 
575  BasicBlock *ThenBB = ThenTI->getParent();
576  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
577  BasicBlock *PRegBodyBB =
578  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
579  BasicBlock *PRegPreFiniBB =
580  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
581  BasicBlock *PRegExitBB =
582  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
583 
584  auto FiniCBWrapper = [&](InsertPointTy IP) {
585  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
586  // target to the region exit block.
587  if (IP.getBlock()->end() == IP.getPoint()) {
589  Builder.restoreIP(IP);
590  Instruction *I = Builder.CreateBr(PRegExitBB);
591  IP = InsertPointTy(I->getParent(), I->getIterator());
592  }
593  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
594  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
595  "Unexpected insertion point for finalization call!");
596  return FiniCB(IP);
597  };
598 
599  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
600 
601  // Generate the privatization allocas in the block that will become the entry
602  // of the outlined function.
603  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
604  InsertPointTy InnerAllocaIP = Builder.saveIP();
605 
606  AllocaInst *PrivTIDAddr =
607  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
608  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
609 
610  // Add some fake uses for OpenMP provided arguments.
611  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
612  Instruction *ZeroAddrUse =
613  Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
614  ToBeDeleted.push_back(ZeroAddrUse);
615 
616  // ThenBB
617  // |
618  // V
619  // PRegionEntryBB <- Privatization allocas are placed here.
620  // |
621  // V
622  // PRegionBodyBB <- BodeGen is invoked here.
623  // |
624  // V
625  // PRegPreFiniBB <- The block we will start finalization from.
626  // |
627  // V
628  // PRegionExitBB <- A common exit to simplify block collection.
629  //
630 
631  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
632 
633  // Let the caller create the body.
634  assert(BodyGenCB && "Expected body generation callback!");
635  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
636  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
637 
638  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
639 
640  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
641  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
642  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
643  llvm::LLVMContext &Ctx = F->getContext();
644  MDBuilder MDB(Ctx);
645  // Annotate the callback behavior of the __kmpc_fork_call:
646  // - The callback callee is argument number 2 (microtask).
647  // - The first two arguments of the callback callee are unknown (-1).
648  // - All variadic arguments to the __kmpc_fork_call are passed to the
649  // callback callee.
650  F->addMetadata(
651  llvm::LLVMContext::MD_callback,
653  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
654  /* VarArgsArePassed */ true)}));
655  }
656  }
657 
658  OutlineInfo OI;
659  OI.PostOutlineCB = [=](Function &OutlinedFn) {
660  // Add some known attributes.
661  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
662  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
663  OutlinedFn.addFnAttr(Attribute::NoUnwind);
664  OutlinedFn.addFnAttr(Attribute::NoRecurse);
665 
666  assert(OutlinedFn.arg_size() >= 2 &&
667  "Expected at least tid and bounded tid as arguments");
668  unsigned NumCapturedVars =
669  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
670 
671  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
672  CI->getParent()->setName("omp_parallel");
673  Builder.SetInsertPoint(CI);
674 
675  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
676  Value *ForkCallArgs[] = {
677  Ident, Builder.getInt32(NumCapturedVars),
678  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
679 
680  SmallVector<Value *, 16> RealArgs;
681  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
682  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
683 
684  Builder.CreateCall(RTLFn, RealArgs);
685 
686  LLVM_DEBUG(dbgs() << "With fork_call placed: "
687  << *Builder.GetInsertBlock()->getParent() << "\n");
688 
689  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
690 
691  // Initialize the local TID stack location with the argument value.
692  Builder.SetInsertPoint(PrivTID);
693  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
694  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
695 
696  // If no "if" clause was present we do not need the call created during
697  // outlining, otherwise we reuse it in the serialized parallel region.
698  if (!ElseTI) {
699  CI->eraseFromParent();
700  } else {
701 
702  // If an "if" clause was present we are now generating the serialized
703  // version into the "else" branch.
704  Builder.SetInsertPoint(ElseTI);
705 
706  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
707  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
708  Builder.CreateCall(
709  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
710  SerializedParallelCallArgs);
711 
712  // OutlinedFn(&GTid, &zero, CapturedStruct);
713  CI->removeFromParent();
714  Builder.Insert(CI);
715 
716  // __kmpc_end_serialized_parallel(&Ident, GTid);
717  Value *EndArgs[] = {Ident, ThreadID};
718  Builder.CreateCall(
719  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
720  EndArgs);
721 
722  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
723  << *Builder.GetInsertBlock()->getParent() << "\n");
724  }
725 
726  for (Instruction *I : ToBeDeleted)
727  I->eraseFromParent();
728  };
729 
730  // Adjust the finalization stack, verify the adjustment, and call the
731  // finalize function a last time to finalize values between the pre-fini
732  // block and the exit block if we left the parallel "the normal way".
733  auto FiniInfo = FinalizationStack.pop_back_val();
734  (void)FiniInfo;
735  assert(FiniInfo.DK == OMPD_parallel &&
736  "Unexpected finalization stack state!");
737 
738  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
739 
740  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
741  FiniCB(PreFiniIP);
742 
743  OI.EntryBB = PRegEntryBB;
744  OI.ExitBB = PRegExitBB;
745 
746  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
748  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
749 
750  // Ensure a single exit node for the outlined region by creating one.
751  // We might have multiple incoming edges to the exit now due to finalizations,
752  // e.g., cancel calls that cause the control flow to leave the region.
753  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
754  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
755  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
756  Blocks.push_back(PRegOutlinedExitBB);
757 
758  CodeExtractorAnalysisCache CEAC(*OuterFn);
759  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
760  /* AggregateArgs */ false,
761  /* BlockFrequencyInfo */ nullptr,
762  /* BranchProbabilityInfo */ nullptr,
763  /* AssumptionCache */ nullptr,
764  /* AllowVarArgs */ true,
765  /* AllowAlloca */ true,
766  /* Suffix */ ".omp_par");
767 
768  // Find inputs to, outputs from the code region.
769  BasicBlock *CommonExit = nullptr;
770  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
771  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
772  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
773 
774  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
775 
776  FunctionCallee TIDRTLFn =
777  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
778 
779  auto PrivHelper = [&](Value &V) {
780  if (&V == TIDAddr || &V == ZeroAddr)
781  return;
782 
784  for (Use &U : V.uses())
785  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
786  if (ParallelRegionBlockSet.count(UserI->getParent()))
787  Uses.insert(&U);
788 
789  // __kmpc_fork_call expects extra arguments as pointers. If the input
790  // already has a pointer type, everything is fine. Otherwise, store the
791  // value onto stack and load it back inside the to-be-outlined region. This
792  // will ensure only the pointer will be passed to the function.
793  // FIXME: if there are more than 15 trailing arguments, they must be
794  // additionally packed in a struct.
795  Value *Inner = &V;
796  if (!V.getType()->isPointerTy()) {
798  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
799 
800  Builder.restoreIP(OuterAllocaIP);
801  Value *Ptr =
802  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
803 
804  // Store to stack at end of the block that currently branches to the entry
805  // block of the to-be-outlined region.
806  Builder.SetInsertPoint(InsertBB,
807  InsertBB->getTerminator()->getIterator());
808  Builder.CreateStore(&V, Ptr);
809 
810  // Load back next to allocations in the to-be-outlined region.
811  Builder.restoreIP(InnerAllocaIP);
812  Inner = Builder.CreateLoad(V.getType(), Ptr);
813  }
814 
815  Value *ReplacementValue = nullptr;
816  CallInst *CI = dyn_cast<CallInst>(&V);
817  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
818  ReplacementValue = PrivTID;
819  } else {
820  Builder.restoreIP(
821  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
822  assert(ReplacementValue &&
823  "Expected copy/create callback to set replacement value!");
824  if (ReplacementValue == &V)
825  return;
826  }
827 
828  for (Use *UPtr : Uses)
829  UPtr->set(ReplacementValue);
830  };
831 
832  // Reset the inner alloca insertion as it will be used for loading the values
833  // wrapped into pointers before passing them into the to-be-outlined region.
834  // Configure it to insert immediately after the fake use of zero address so
835  // that they are available in the generated body and so that the
836  // OpenMP-related values (thread ID and zero address pointers) remain leading
837  // in the argument list.
838  InnerAllocaIP = IRBuilder<>::InsertPoint(
839  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
840 
841  // Reset the outer alloca insertion point to the entry of the relevant block
842  // in case it was invalidated.
843  OuterAllocaIP = IRBuilder<>::InsertPoint(
844  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
845 
846  for (Value *Input : Inputs) {
847  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
848  PrivHelper(*Input);
849  }
850  LLVM_DEBUG({
851  for (Value *Output : Outputs)
852  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
853  });
854  assert(Outputs.empty() &&
855  "OpenMP outlining should not produce live-out values!");
856 
857  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
858  LLVM_DEBUG({
859  for (auto *BB : Blocks)
860  dbgs() << " PBR: " << BB->getName() << "\n";
861  });
862 
863  // Register the outlined info.
864  addOutlineInfo(std::move(OI));
865 
866  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
867  UI->eraseFromParent();
868 
869  return AfterIP;
870 }
871 
873  // Build call void __kmpc_flush(ident_t *loc)
874  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
875  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
876 
877  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
878 }
879 
881  if (!updateToLocation(Loc))
882  return;
883  emitFlush(Loc);
884 }
885 
887  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
888  // global_tid);
889  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
890  Value *Ident = getOrCreateIdent(SrcLocStr);
891  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
892 
893  // Ignore return result until untied tasks are supported.
894  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
895  Args);
896 }
897 
899  if (!updateToLocation(Loc))
900  return;
901  emitTaskwaitImpl(Loc);
902 }
903 
905  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
906  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
907  Value *Ident = getOrCreateIdent(SrcLocStr);
908  Constant *I32Null = ConstantInt::getNullValue(Int32);
909  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
910 
911  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
912  Args);
913 }
914 
916  if (!updateToLocation(Loc))
917  return;
918  emitTaskyieldImpl(Loc);
919 }
920 
922  const LocationDescription &Loc, InsertPointTy AllocaIP,
924  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
925  if (!updateToLocation(Loc))
926  return Loc.IP;
927 
928  auto FiniCBWrapper = [&](InsertPointTy IP) {
929  if (IP.getBlock()->end() != IP.getPoint())
930  return FiniCB(IP);
931  // This must be done otherwise any nested constructs using FinalizeOMPRegion
932  // will fail because that function requires the Finalization Basic Block to
933  // have a terminator, which is already removed by EmitOMPRegionBody.
934  // IP is currently at cancelation block.
935  // We need to backtrack to the condition block to fetch
936  // the exit block and create a branch from cancelation
937  // to exit block.
939  Builder.restoreIP(IP);
940  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
941  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
942  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
943  Instruction *I = Builder.CreateBr(ExitBB);
944  IP = InsertPointTy(I->getParent(), I->getIterator());
945  return FiniCB(IP);
946  };
947 
948  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
949 
950  // Each section is emitted as a switch case
951  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
952  // -> OMP.createSection() which generates the IR for each section
953  // Iterate through all sections and emit a switch construct:
954  // switch (IV) {
955  // case 0:
956  // <SectionStmt[0]>;
957  // break;
958  // ...
959  // case <NumSection> - 1:
960  // <SectionStmt[<NumSection> - 1]>;
961  // break;
962  // }
963  // ...
964  // section_loop.after:
965  // <FiniCB>;
966  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
967  auto *CurFn = CodeGenIP.getBlock()->getParent();
968  auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
969  auto *ForExitBB = CodeGenIP.getBlock()
970  ->getSinglePredecessor()
971  ->getTerminator()
972  ->getSuccessor(1);
973  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
974  Builder.restoreIP(CodeGenIP);
975  unsigned CaseNumber = 0;
976  for (auto SectionCB : SectionCBs) {
977  auto *CaseBB = BasicBlock::Create(M.getContext(),
978  "omp_section_loop.body.case", CurFn);
979  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
980  Builder.SetInsertPoint(CaseBB);
981  SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
982  CaseNumber++;
983  }
984  // remove the existing terminator from body BB since there can be no
985  // terminators after switch/case
986  CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
987  };
988  // Loop body ends here
989  // LowerBound, UpperBound, and STride for createCanonicalLoop
990  Type *I32Ty = Type::getInt32Ty(M.getContext());
991  Value *LB = ConstantInt::get(I32Ty, 0);
992  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
993  Value *ST = ConstantInt::get(I32Ty, 1);
994  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
995  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
996  Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
997  AllocaIP = Builder.saveIP();
998  InsertPointTy AfterIP =
999  applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
1000  BasicBlock *LoopAfterBB = AfterIP.getBlock();
1001  Instruction *SplitPos = LoopAfterBB->getTerminator();
1002  if (!isa_and_nonnull<BranchInst>(SplitPos))
1003  SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
1004  // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
1005  // which requires a BB with branch
1006  BasicBlock *ExitBB =
1007  LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
1008  SplitPos->eraseFromParent();
1009 
1010  // Apply the finalization callback in LoopAfterBB
1011  auto FiniInfo = FinalizationStack.pop_back_val();
1012  assert(FiniInfo.DK == OMPD_sections &&
1013  "Unexpected finalization stack state!");
1014  Builder.SetInsertPoint(LoopAfterBB->getTerminator());
1015  FiniInfo.FiniCB(Builder.saveIP());
1016  Builder.SetInsertPoint(ExitBB);
1017 
1018  return Builder.saveIP();
1019 }
1020 
1023  BodyGenCallbackTy BodyGenCB,
1024  FinalizeCallbackTy FiniCB) {
1025  if (!updateToLocation(Loc))
1026  return Loc.IP;
1027 
1028  auto FiniCBWrapper = [&](InsertPointTy IP) {
1029  if (IP.getBlock()->end() != IP.getPoint())
1030  return FiniCB(IP);
1031  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1032  // will fail because that function requires the Finalization Basic Block to
1033  // have a terminator, which is already removed by EmitOMPRegionBody.
1034  // IP is currently at cancelation block.
1035  // We need to backtrack to the condition block to fetch
1036  // the exit block and create a branch from cancelation
1037  // to exit block.
1039  Builder.restoreIP(IP);
1040  auto *CaseBB = Loc.IP.getBlock();
1041  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1042  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1043  Instruction *I = Builder.CreateBr(ExitBB);
1044  IP = InsertPointTy(I->getParent(), I->getIterator());
1045  return FiniCB(IP);
1046  };
1047 
1048  Directive OMPD = Directive::OMPD_sections;
1049  // Since we are using Finalization Callback here, HasFinalize
1050  // and IsCancellable have to be true
1051  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1052  /*Conditional*/ false, /*hasFinalize*/ true,
1053  /*IsCancellable*/ true);
1054 }
1055 
1056 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1057 /// the given module and return it.
1059  Type *VoidTy = Type::getVoidTy(M.getContext());
1060  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
1061  auto *FuncTy =
1062  FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
1064  M.getDataLayout().getDefaultGlobalsAddressSpace(),
1065  ".omp.reduction.func", &M);
1066 }
1067 
1069  const LocationDescription &Loc, InsertPointTy AllocaIP,
1070  ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
1071  for (const ReductionInfo &RI : ReductionInfos) {
1072  (void)RI;
1073  assert(RI.Variable && "expected non-null variable");
1074  assert(RI.PrivateVariable && "expected non-null private variable");
1075  assert(RI.ReductionGen && "expected non-null reduction generator callback");
1076  assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1077  "expected variables and their private equivalents to have the same "
1078  "type");
1079  assert(RI.Variable->getType()->isPointerTy() &&
1080  "expected variables to be pointers");
1081  }
1082 
1083  if (!updateToLocation(Loc))
1084  return InsertPointTy();
1085 
1086  BasicBlock *InsertBlock = Loc.IP.getBlock();
1087  BasicBlock *ContinuationBlock =
1088  InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
1089  InsertBlock->getTerminator()->eraseFromParent();
1090 
1091  // Create and populate array of type-erased pointers to private reduction
1092  // values.
1093  unsigned NumReductions = ReductionInfos.size();
1094  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
1095  Builder.restoreIP(AllocaIP);
1096  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
1097 
1098  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
1099 
1100  for (auto En : enumerate(ReductionInfos)) {
1101  unsigned Index = En.index();
1102  const ReductionInfo &RI = En.value();
1103  Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
1104  RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
1105  Value *Casted =
1106  Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
1107  "private.red.var." + Twine(Index) + ".casted");
1108  Builder.CreateStore(Casted, RedArrayElemPtr);
1109  }
1110 
1111  // Emit a call to the runtime function that orchestrates the reduction.
1112  // Declare the reduction function in the process.
1113  Function *Func = Builder.GetInsertBlock()->getParent();
1114  Module *Module = Func->getParent();
1115  Value *RedArrayPtr =
1116  Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
1117  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1118  bool CanGenerateAtomic =
1119  llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
1120  return RI.AtomicReductionGen;
1121  });
1122  Value *Ident = getOrCreateIdent(
1123  SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1124  : IdentFlag(0));
1125  Value *ThreadId = getOrCreateThreadID(Ident);
1126  Constant *NumVariables = Builder.getInt32(NumReductions);
1127  const DataLayout &DL = Module->getDataLayout();
1128  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
1129  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
1130  Function *ReductionFunc = getFreshReductionFunc(*Module);
1131  Value *Lock = getOMPCriticalRegionLock(".reduction");
1132  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1133  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1134  : RuntimeFunction::OMPRTL___kmpc_reduce);
1135  CallInst *ReduceCall =
1136  Builder.CreateCall(ReduceFunc,
1137  {Ident, ThreadId, NumVariables, RedArraySize,
1138  RedArrayPtr, ReductionFunc, Lock},
1139  "reduce");
1140 
1141  // Create final reduction entry blocks for the atomic and non-atomic case.
1142  // Emit IR that dispatches control flow to one of the blocks based on the
1143  // reduction supporting the atomic mode.
1144  BasicBlock *NonAtomicRedBlock =
1145  BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
1146  BasicBlock *AtomicRedBlock =
1147  BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
1148  SwitchInst *Switch =
1149  Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
1150  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
1151  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
1152 
1153  // Populate the non-atomic reduction using the elementwise reduction function.
1154  // This loads the elements from the global and private variables and reduces
1155  // them before storing back the result to the global variable.
1156  Builder.SetInsertPoint(NonAtomicRedBlock);
1157  for (auto En : enumerate(ReductionInfos)) {
1158  const ReductionInfo &RI = En.value();
1159  Type *ValueType = RI.getElementType();
1160  Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
1161  "red.value." + Twine(En.index()));
1162  Value *PrivateRedValue =
1163  Builder.CreateLoad(ValueType, RI.PrivateVariable,
1164  "red.private.value." + Twine(En.index()));
1165  Value *Reduced;
1166  Builder.restoreIP(
1167  RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
1168  if (!Builder.GetInsertBlock())
1169  return InsertPointTy();
1170  Builder.CreateStore(Reduced, RI.Variable);
1171  }
1172  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1173  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1174  : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1175  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
1176  Builder.CreateBr(ContinuationBlock);
1177 
1178  // Populate the atomic reduction using the atomic elementwise reduction
1179  // function. There are no loads/stores here because they will be happening
1180  // inside the atomic elementwise reduction.
1181  Builder.SetInsertPoint(AtomicRedBlock);
1182  if (CanGenerateAtomic) {
1183  for (const ReductionInfo &RI : ReductionInfos) {
1184  Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
1185  RI.PrivateVariable));
1186  if (!Builder.GetInsertBlock())
1187  return InsertPointTy();
1188  }
1189  Builder.CreateBr(ContinuationBlock);
1190  } else {
1191  Builder.CreateUnreachable();
1192  }
1193 
1194  // Populate the outlined reduction function using the elementwise reduction
1195  // function. Partial values are extracted from the type-erased array of
1196  // pointers to private variables.
1197  BasicBlock *ReductionFuncBlock =
1198  BasicBlock::Create(Module->getContext(), "", ReductionFunc);
1199  Builder.SetInsertPoint(ReductionFuncBlock);
1200  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
1201  RedArrayTy->getPointerTo());
1202  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
1203  RedArrayTy->getPointerTo());
1204  for (auto En : enumerate(ReductionInfos)) {
1205  const ReductionInfo &RI = En.value();
1206  Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1207  RedArrayTy, LHSArrayPtr, 0, En.index());
1208  Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
1209  Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
1210  Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
1211  Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1212  RedArrayTy, RHSArrayPtr, 0, En.index());
1213  Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
1214  Value *RHSPtr =
1215  Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
1216  Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
1217  Value *Reduced;
1218  Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
1219  if (!Builder.GetInsertBlock())
1220  return InsertPointTy();
1221  Builder.CreateStore(Reduced, LHSPtr);
1222  }
1223  Builder.CreateRetVoid();
1224 
1225  Builder.SetInsertPoint(ContinuationBlock);
1226  return Builder.saveIP();
1227 }
1228 
1231  BodyGenCallbackTy BodyGenCB,
1232  FinalizeCallbackTy FiniCB) {
1233 
1234  if (!updateToLocation(Loc))
1235  return Loc.IP;
1236 
1237  Directive OMPD = Directive::OMPD_master;
1238  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1239  Value *Ident = getOrCreateIdent(SrcLocStr);
1240  Value *ThreadId = getOrCreateThreadID(Ident);
1241  Value *Args[] = {Ident, ThreadId};
1242 
1243  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1244  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1245 
1246  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1247  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1248 
1249  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1250  /*Conditional*/ true, /*hasFinalize*/ true);
1251 }
1252 
1255  BodyGenCallbackTy BodyGenCB,
1256  FinalizeCallbackTy FiniCB, Value *Filter) {
1257  if (!updateToLocation(Loc))
1258  return Loc.IP;
1259 
1260  Directive OMPD = Directive::OMPD_masked;
1261  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1262  Value *Ident = getOrCreateIdent(SrcLocStr);
1263  Value *ThreadId = getOrCreateThreadID(Ident);
1264  Value *Args[] = {Ident, ThreadId, Filter};
1265  Value *ArgsEnd[] = {Ident, ThreadId};
1266 
1267  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1268  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1269 
1270  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1271  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1272 
1273  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1274  /*Conditional*/ true, /*hasFinalize*/ true);
1275 }
1276 
1278  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1279  BasicBlock *PostInsertBefore, const Twine &Name) {
1280  Module *M = F->getParent();
1281  LLVMContext &Ctx = M->getContext();
1282  Type *IndVarTy = TripCount->getType();
1283 
1284  // Create the basic block structure.
1285  BasicBlock *Preheader =
1286  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1287  BasicBlock *Header =
1288  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1289  BasicBlock *Cond =
1290  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1291  BasicBlock *Body =
1292  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1293  BasicBlock *Latch =
1294  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1295  BasicBlock *Exit =
1296  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1297  BasicBlock *After =
1298  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1299 
1300  // Use specified DebugLoc for new instructions.
1301  Builder.SetCurrentDebugLocation(DL);
1302 
1303  Builder.SetInsertPoint(Preheader);
1304  Builder.CreateBr(Header);
1305 
1306  Builder.SetInsertPoint(Header);
1307  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1308  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1309  Builder.CreateBr(Cond);
1310 
1311  Builder.SetInsertPoint(Cond);
1312  Value *Cmp =
1313  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1314  Builder.CreateCondBr(Cmp, Body, Exit);
1315 
1316  Builder.SetInsertPoint(Body);
1317  Builder.CreateBr(Latch);
1318 
1319  Builder.SetInsertPoint(Latch);
1320  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1321  "omp_" + Name + ".next", /*HasNUW=*/true);
1322  Builder.CreateBr(Header);
1323  IndVarPHI->addIncoming(Next, Latch);
1324 
1325  Builder.SetInsertPoint(Exit);
1326  Builder.CreateBr(After);
1327 
1328  // Remember and return the canonical control flow.
1329  LoopInfos.emplace_front();
1330  CanonicalLoopInfo *CL = &LoopInfos.front();
1331 
1332  CL->Preheader = Preheader;
1333  CL->Header = Header;
1334  CL->Cond = Cond;
1335  CL->Body = Body;
1336  CL->Latch = Latch;
1337  CL->Exit = Exit;
1338  CL->After = After;
1339 
1340 #ifndef NDEBUG
1341  CL->assertOK();
1342 #endif
1343  return CL;
1344 }
1345 
1348  LoopBodyGenCallbackTy BodyGenCB,
1349  Value *TripCount, const Twine &Name) {
1350  BasicBlock *BB = Loc.IP.getBlock();
1351  BasicBlock *NextBB = BB->getNextNode();
1352 
1353  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1354  NextBB, NextBB, Name);
1355  BasicBlock *After = CL->getAfter();
1356 
1357  // If location is not set, don't connect the loop.
1358  if (updateToLocation(Loc)) {
1359  // Split the loop at the insertion point: Branch to the preheader and move
1360  // every following instruction to after the loop (the After BB). Also, the
1361  // new successor is the loop's after block.
1362  Builder.CreateBr(CL->Preheader);
1363  After->getInstList().splice(After->begin(), BB->getInstList(),
1364  Builder.GetInsertPoint(), BB->end());
1365  After->replaceSuccessorsPhiUsesWith(BB, After);
1366  }
1367 
1368  // Emit the body content. We do it after connecting the loop to the CFG to
1369  // avoid that the callback encounters degenerate BBs.
1370  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1371 
1372 #ifndef NDEBUG
1373  CL->assertOK();
1374 #endif
1375  return CL;
1376 }
1377 
1379  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1380  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1381  InsertPointTy ComputeIP, const Twine &Name) {
1382 
1383  // Consider the following difficulties (assuming 8-bit signed integers):
1384  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1385  // DO I = 1, 100, 50
1386  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1387  // DO I = 100, 0, -128
1388 
1389  // Start, Stop and Step must be of the same integer type.
1390  auto *IndVarTy = cast<IntegerType>(Start->getType());
1391  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1392  assert(IndVarTy == Step->getType() && "Step type mismatch");
1393 
1394  LocationDescription ComputeLoc =
1395  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1396  updateToLocation(ComputeLoc);
1397 
1398  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1399  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1400 
1401  // Like Step, but always positive.
1402  Value *Incr = Step;
1403 
1404  // Distance between Start and Stop; always positive.
1405  Value *Span;
1406 
1407  // Condition whether there are no iterations are executed at all, e.g. because
1408  // UB < LB.
1409  Value *ZeroCmp;
1410 
1411  if (IsSigned) {
1412  // Ensure that increment is positive. If not, negate and invert LB and UB.
1413  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1414  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1415  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1416  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1417  Span = Builder.CreateSub(UB, LB, "", false, true);
1418  ZeroCmp = Builder.CreateICmp(
1419  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1420  } else {
1421  Span = Builder.CreateSub(Stop, Start, "", true);
1422  ZeroCmp = Builder.CreateICmp(
1423  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1424  }
1425 
1426  Value *CountIfLooping;
1427  if (InclusiveStop) {
1428  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1429  } else {
1430  // Avoid incrementing past stop since it could overflow.
1431  Value *CountIfTwo = Builder.CreateAdd(
1432  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1433  Value *OneCmp = Builder.CreateICmp(
1434  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1435  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1436  }
1437  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1438  "omp_" + Name + ".tripcount");
1439 
1440  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1441  Builder.restoreIP(CodeGenIP);
1442  Value *Span = Builder.CreateMul(IV, Step);
1443  Value *IndVar = Builder.CreateAdd(Span, Start);
1444  BodyGenCB(Builder.saveIP(), IndVar);
1445  };
1446  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1447  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1448 }
1449 
1450 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1451 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1452 // runtime. Always interpret integers as unsigned similarly to
1453 // CanonicalLoopInfo.
1455  OpenMPIRBuilder &OMPBuilder) {
1456  unsigned Bitwidth = Ty->getIntegerBitWidth();
1457  if (Bitwidth == 32)
1458  return OMPBuilder.getOrCreateRuntimeFunction(
1459  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1460  if (Bitwidth == 64)
1461  return OMPBuilder.getOrCreateRuntimeFunction(
1462  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1463  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1464 }
1465 
1466 // Sets the number of loop iterations to the given value. This value must be
1467 // valid in the condition block (i.e., defined in the preheader) and is
1468 // interpreted as an unsigned integer.
1470  Instruction *CmpI = &CLI->getCond()->front();
1471  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1472  CmpI->setOperand(1, TripCount);
1473  CLI->assertOK();
1474 }
1475 
1478  InsertPointTy AllocaIP,
1479  bool NeedsBarrier, Value *Chunk) {
1480  assert(CLI->isValid() && "Requires a valid canonical loop");
1481 
1482  // Set up the source location value for OpenMP runtime.
1483  Builder.restoreIP(CLI->getPreheaderIP());
1484  Builder.SetCurrentDebugLocation(DL);
1485 
1486  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1487  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1488 
1489  // Declare useful OpenMP runtime functions.
1490  Value *IV = CLI->getIndVar();
1491  Type *IVTy = IV->getType();
1492  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1493  FunctionCallee StaticFini =
1494  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1495 
1496  // Allocate space for computed loop bounds as expected by the "init" function.
1497  Builder.restoreIP(AllocaIP);
1498  Type *I32Type = Type::getInt32Ty(M.getContext());
1499  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1500  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1501  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1502  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1503 
1504  // At the end of the preheader, prepare for calling the "init" function by
1505  // storing the current loop bounds into the allocated space. A canonical loop
1506  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1507  // and produces an inclusive upper bound.
1508  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1509  Constant *Zero = ConstantInt::get(IVTy, 0);
1510  Constant *One = ConstantInt::get(IVTy, 1);
1511  Builder.CreateStore(Zero, PLowerBound);
1512  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1513  Builder.CreateStore(UpperBound, PUpperBound);
1514  Builder.CreateStore(One, PStride);
1515 
1516  // FIXME: schedule(static) is NOT the same as schedule(static,1)
1517  if (!Chunk)
1518  Chunk = One;
1519 
1520  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1521 
1522  Constant *SchedulingType =
1523  ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1524 
1525  // Call the "init" function and update the trip count of the loop with the
1526  // value it produced.
1527  Builder.CreateCall(StaticInit,
1528  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1529  PUpperBound, PStride, One, Chunk});
1530  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1531  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1532  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1533  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1534  setCanonicalLoopTripCount(CLI, TripCount);
1535 
1536  // Update all uses of the induction variable except the one in the condition
1537  // block that compares it with the actual upper bound, and the increment in
1538  // the latch block.
1539  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1540  // CanonicalLoopInfoUpdater interface.
1541  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1542  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1543  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1544  auto *Instr = dyn_cast<Instruction>(U.getUser());
1545  return !Instr ||
1546  (Instr->getParent() != CLI->getCond() &&
1547  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1548  });
1549 
1550  // In the "exit" block, call the "fini" function.
1551  Builder.SetInsertPoint(CLI->getExit(),
1552  CLI->getExit()->getTerminator()->getIterator());
1553  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1554 
1555  // Add the barrier if requested.
1556  if (NeedsBarrier)
1557  createBarrier(LocationDescription(Builder.saveIP(), DL),
1558  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1559  /* CheckCancelFlag */ false);
1560 
1561  InsertPointTy AfterIP = CLI->getAfterIP();
1562  CLI->invalidate();
1563 
1564  return AfterIP;
1565 }
1566 
1569  InsertPointTy AllocaIP, bool NeedsBarrier) {
1570  // Currently only supports static schedules.
1571  return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
1572 }
1573 
1574 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1575 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1576 /// the runtime. Always interpret integers as unsigned similarly to
1577 /// CanonicalLoopInfo.
1578 static FunctionCallee
1580  unsigned Bitwidth = Ty->getIntegerBitWidth();
1581  if (Bitwidth == 32)
1582  return OMPBuilder.getOrCreateRuntimeFunction(
1583  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1584  if (Bitwidth == 64)
1585  return OMPBuilder.getOrCreateRuntimeFunction(
1586  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1587  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1588 }
1589 
1590 /// Returns an LLVM function to call for updating the next loop using OpenMP
1591 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1592 /// the runtime. Always interpret integers as unsigned similarly to
1593 /// CanonicalLoopInfo.
1594 static FunctionCallee
1596  unsigned Bitwidth = Ty->getIntegerBitWidth();
1597  if (Bitwidth == 32)
1598  return OMPBuilder.getOrCreateRuntimeFunction(
1599  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1600  if (Bitwidth == 64)
1601  return OMPBuilder.getOrCreateRuntimeFunction(
1602  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1603  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1604 }
1605 
1607  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1608  OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
1609  assert(CLI->isValid() && "Requires a valid canonical loop");
1610 
1611  // Set up the source location value for OpenMP runtime.
1612  Builder.SetCurrentDebugLocation(DL);
1613 
1614  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1615  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1616 
1617  // Declare useful OpenMP runtime functions.
1618  Value *IV = CLI->getIndVar();
1619  Type *IVTy = IV->getType();
1620  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1621  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1622 
1623  // Allocate space for computed loop bounds as expected by the "init" function.
1624  Builder.restoreIP(AllocaIP);
1625  Type *I32Type = Type::getInt32Ty(M.getContext());
1626  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1627  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1628  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1629  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1630 
1631  // At the end of the preheader, prepare for calling the "init" function by
1632  // storing the current loop bounds into the allocated space. A canonical loop
1633  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1634  // and produces an inclusive upper bound.
1635  BasicBlock *PreHeader = CLI->getPreheader();
1636  Builder.SetInsertPoint(PreHeader->getTerminator());
1637  Constant *One = ConstantInt::get(IVTy, 1);
1638  Builder.CreateStore(One, PLowerBound);
1639  Value *UpperBound = CLI->getTripCount();
1640  Builder.CreateStore(UpperBound, PUpperBound);
1641  Builder.CreateStore(One, PStride);
1642 
1643  BasicBlock *Header = CLI->getHeader();
1644  BasicBlock *Exit = CLI->getExit();
1645  BasicBlock *Cond = CLI->getCond();
1646  InsertPointTy AfterIP = CLI->getAfterIP();
1647 
1648  // The CLI will be "broken" in the code below, as the loop is no longer
1649  // a valid canonical loop.
1650 
1651  if (!Chunk)
1652  Chunk = One;
1653 
1654  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1655 
1656  Constant *SchedulingType =
1657  ConstantInt::get(I32Type, static_cast<int>(SchedType));
1658 
1659  // Call the "init" function.
1660  Builder.CreateCall(DynamicInit,
1661  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1662  UpperBound, /* step */ One, Chunk});
1663 
1664  // An outer loop around the existing one.
1665  BasicBlock *OuterCond = BasicBlock::Create(
1666  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1667  PreHeader->getParent());
1668  // This needs to be 32-bit always, so can't use the IVTy Zero above.
1669  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1670  Value *Res =
1671  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1672  PLowerBound, PUpperBound, PStride});
1673  Constant *Zero32 = ConstantInt::get(I32Type, 0);
1674  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1675  Value *LowerBound =
1676  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1677  Builder.CreateCondBr(MoreWork, Header, Exit);
1678 
1679  // Change PHI-node in loop header to use outer cond rather than preheader,
1680  // and set IV to the LowerBound.
1681  Instruction *Phi = &Header->front();
1682  auto *PI = cast<PHINode>(Phi);
1683  PI->setIncomingBlock(0, OuterCond);
1684  PI->setIncomingValue(0, LowerBound);
1685 
1686  // Then set the pre-header to jump to the OuterCond
1687  Instruction *Term = PreHeader->getTerminator();
1688  auto *Br = cast<BranchInst>(Term);
1689  Br->setSuccessor(0, OuterCond);
1690 
1691  // Modify the inner condition:
1692  // * Use the UpperBound returned from the DynamicNext call.
1693  // * jump to the loop outer loop when done with one of the inner loops.
1694  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1695  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1696  Instruction *Comp = &*Builder.GetInsertPoint();
1697  auto *CI = cast<CmpInst>(Comp);
1698  CI->setOperand(1, UpperBound);
1699  // Redirect the inner exit to branch to outer condition.
1700  Instruction *Branch = &Cond->back();
1701  auto *BI = cast<BranchInst>(Branch);
1702  assert(BI->getSuccessor(1) == Exit);
1703  BI->setSuccessor(1, OuterCond);
1704 
1705  // Add the barrier if requested.
1706  if (NeedsBarrier) {
1707  Builder.SetInsertPoint(&Exit->back());
1708  createBarrier(LocationDescription(Builder.saveIP(), DL),
1709  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1710  /* CheckCancelFlag */ false);
1711  }
1712 
1713  CLI->invalidate();
1714  return AfterIP;
1715 }
1716 
1717 /// Make \p Source branch to \p Target.
1718 ///
1719 /// Handles two situations:
1720 /// * \p Source already has an unconditional branch.
1721 /// * \p Source is a degenerate block (no terminator because the BB is
1722 /// the current head of the IR construction).
1724  if (Instruction *Term = Source->getTerminator()) {
1725  auto *Br = cast<BranchInst>(Term);
1726  assert(!Br->isConditional() &&
1727  "BB's terminator must be an unconditional branch (or degenerate)");
1728  BasicBlock *Succ = Br->getSuccessor(0);
1729  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1730  Br->setSuccessor(0, Target);
1731  return;
1732  }
1733 
1734  auto *NewBr = BranchInst::Create(Target, Source);
1735  NewBr->setDebugLoc(DL);
1736 }
1737 
1738 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1739 /// after this \p OldTarget will be orphaned.
1740 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1741  BasicBlock *NewTarget, DebugLoc DL) {
1742  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1743  redirectTo(Pred, NewTarget, DL);
1744 }
1745 
1746 /// Determine which blocks in \p BBs are reachable from outside and remove the
1747 /// ones that are not reachable from the function.
1749  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1750  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1751  for (Use &U : BB->uses()) {
1752  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1753  if (!UseInst)
1754  continue;
1755  if (BBsToErase.count(UseInst->getParent()))
1756  continue;
1757  return true;
1758  }
1759  return false;
1760  };
1761 
1762  while (true) {
1763  bool Changed = false;
1764  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1765  if (HasRemainingUses(BB)) {
1766  BBsToErase.erase(BB);
1767  Changed = true;
1768  }
1769  }
1770  if (!Changed)
1771  break;
1772  }
1773 
1774  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1775  DeleteDeadBlocks(BBVec);
1776 }
1777 
1780  InsertPointTy ComputeIP) {
1781  assert(Loops.size() >= 1 && "At least one loop required");
1782  size_t NumLoops = Loops.size();
1783 
1784  // Nothing to do if there is already just one loop.
1785  if (NumLoops == 1)
1786  return Loops.front();
1787 
1788  CanonicalLoopInfo *Outermost = Loops.front();
1789  CanonicalLoopInfo *Innermost = Loops.back();
1790  BasicBlock *OrigPreheader = Outermost->getPreheader();
1791  BasicBlock *OrigAfter = Outermost->getAfter();
1792  Function *F = OrigPreheader->getParent();
1793 
1794  // Setup the IRBuilder for inserting the trip count computation.
1795  Builder.SetCurrentDebugLocation(DL);
1796  if (ComputeIP.isSet())
1797  Builder.restoreIP(ComputeIP);
1798  else
1799  Builder.restoreIP(Outermost->getPreheaderIP());
1800 
1801  // Derive the collapsed' loop trip count.
1802  // TODO: Find common/largest indvar type.
1803  Value *CollapsedTripCount = nullptr;
1804  for (CanonicalLoopInfo *L : Loops) {
1805  assert(L->isValid() &&
1806  "All loops to collapse must be valid canonical loops");
1807  Value *OrigTripCount = L->getTripCount();
1808  if (!CollapsedTripCount) {
1809  CollapsedTripCount = OrigTripCount;
1810  continue;
1811  }
1812 
1813  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1814  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1815  {}, /*HasNUW=*/true);
1816  }
1817 
1818  // Create the collapsed loop control flow.
1819  CanonicalLoopInfo *Result =
1820  createLoopSkeleton(DL, CollapsedTripCount, F,
1821  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1822 
1823  // Build the collapsed loop body code.
1824  // Start with deriving the input loop induction variables from the collapsed
1825  // one, using a divmod scheme. To preserve the original loops' order, the
1826  // innermost loop use the least significant bits.
1827  Builder.restoreIP(Result->getBodyIP());
1828 
1829  Value *Leftover = Result->getIndVar();
1830  SmallVector<Value *> NewIndVars;
1831  NewIndVars.set_size(NumLoops);
1832  for (int i = NumLoops - 1; i >= 1; --i) {
1833  Value *OrigTripCount = Loops[i]->getTripCount();
1834 
1835  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1836  NewIndVars[i] = NewIndVar;
1837 
1838  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1839  }
1840  // Outermost loop gets all the remaining bits.
1841  NewIndVars[0] = Leftover;
1842 
1843  // Construct the loop body control flow.
1844  // We progressively construct the branch structure following in direction of
1845  // the control flow, from the leading in-between code, the loop nest body, the
1846  // trailing in-between code, and rejoining the collapsed loop's latch.
1847  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1848  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1849  // its predecessors as sources.
1850  BasicBlock *ContinueBlock = Result->getBody();
1851  BasicBlock *ContinuePred = nullptr;
1852  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1853  BasicBlock *NextSrc) {
1854  if (ContinueBlock)
1855  redirectTo(ContinueBlock, Dest, DL);
1856  else
1857  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1858 
1859  ContinueBlock = nullptr;
1860  ContinuePred = NextSrc;
1861  };
1862 
1863  // The code before the nested loop of each level.
1864  // Because we are sinking it into the nest, it will be executed more often
1865  // that the original loop. More sophisticated schemes could keep track of what
1866  // the in-between code is and instantiate it only once per thread.
1867  for (size_t i = 0; i < NumLoops - 1; ++i)
1868  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1869 
1870  // Connect the loop nest body.
1871  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1872 
1873  // The code after the nested loop at each level.
1874  for (size_t i = NumLoops - 1; i > 0; --i)
1875  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1876 
1877  // Connect the finished loop to the collapsed loop latch.
1878  ContinueWith(Result->getLatch(), nullptr);
1879 
1880  // Replace the input loops with the new collapsed loop.
1881  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1882  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1883 
1884  // Replace the input loop indvars with the derived ones.
1885  for (size_t i = 0; i < NumLoops; ++i)
1886  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1887 
1888  // Remove unused parts of the input loops.
1889  SmallVector<BasicBlock *, 12> OldControlBBs;
1890  OldControlBBs.reserve(6 * Loops.size());
1891  for (CanonicalLoopInfo *Loop : Loops)
1892  Loop->collectControlBlocks(OldControlBBs);
1893  removeUnusedBlocksFromParent(OldControlBBs);
1894 
1895  for (CanonicalLoopInfo *L : Loops)
1896  L->invalidate();
1897 
1898 #ifndef NDEBUG
1899  Result->assertOK();
1900 #endif
1901  return Result;
1902 }
1903 
1904 std::vector<CanonicalLoopInfo *>
1906  ArrayRef<Value *> TileSizes) {
1907  assert(TileSizes.size() == Loops.size() &&
1908  "Must pass as many tile sizes as there are loops");
1909  int NumLoops = Loops.size();
1910  assert(NumLoops >= 1 && "At least one loop to tile required");
1911 
1912  CanonicalLoopInfo *OutermostLoop = Loops.front();
1913  CanonicalLoopInfo *InnermostLoop = Loops.back();
1914  Function *F = OutermostLoop->getBody()->getParent();
1915  BasicBlock *InnerEnter = InnermostLoop->getBody();
1916  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1917 
1918  // Collect original trip counts and induction variable to be accessible by
1919  // index. Also, the structure of the original loops is not preserved during
1920  // the construction of the tiled loops, so do it before we scavenge the BBs of
1921  // any original CanonicalLoopInfo.
1922  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1923  for (CanonicalLoopInfo *L : Loops) {
1924  assert(L->isValid() && "All input loops must be valid canonical loops");
1925  OrigTripCounts.push_back(L->getTripCount());
1926  OrigIndVars.push_back(L->getIndVar());
1927  }
1928 
1929  // Collect the code between loop headers. These may contain SSA definitions
1930  // that are used in the loop nest body. To be usable with in the innermost
1931  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1932  // these instructions may be executed more often than before the tiling.
1933  // TODO: It would be sufficient to only sink them into body of the
1934  // corresponding tile loop.
1936  for (int i = 0; i < NumLoops - 1; ++i) {
1937  CanonicalLoopInfo *Surrounding = Loops[i];
1938  CanonicalLoopInfo *Nested = Loops[i + 1];
1939 
1940  BasicBlock *EnterBB = Surrounding->getBody();
1941  BasicBlock *ExitBB = Nested->getHeader();
1942  InbetweenCode.emplace_back(EnterBB, ExitBB);
1943  }
1944 
1945  // Compute the trip counts of the floor loops.
1946  Builder.SetCurrentDebugLocation(DL);
1947  Builder.restoreIP(OutermostLoop->getPreheaderIP());
1948  SmallVector<Value *, 4> FloorCount, FloorRems;
1949  for (int i = 0; i < NumLoops; ++i) {
1950  Value *TileSize = TileSizes[i];
1951  Value *OrigTripCount = OrigTripCounts[i];
1952  Type *IVType = OrigTripCount->getType();
1953 
1954  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1955  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1956 
1957  // 0 if tripcount divides the tilesize, 1 otherwise.
1958  // 1 means we need an additional iteration for a partial tile.
1959  //
1960  // Unfortunately we cannot just use the roundup-formula
1961  // (tripcount + tilesize - 1)/tilesize
1962  // because the summation might overflow. We do not want introduce undefined
1963  // behavior when the untiled loop nest did not.
1964  Value *FloorTripOverflow =
1965  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1966 
1967  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1968  FloorTripCount =
1969  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1970  "omp_floor" + Twine(i) + ".tripcount", true);
1971 
1972  // Remember some values for later use.
1973  FloorCount.push_back(FloorTripCount);
1974  FloorRems.push_back(FloorTripRem);
1975  }
1976 
1977  // Generate the new loop nest, from the outermost to the innermost.
1978  std::vector<CanonicalLoopInfo *> Result;
1979  Result.reserve(NumLoops * 2);
1980 
1981  // The basic block of the surrounding loop that enters the nest generated
1982  // loop.
1983  BasicBlock *Enter = OutermostLoop->getPreheader();
1984 
1985  // The basic block of the surrounding loop where the inner code should
1986  // continue.
1987  BasicBlock *Continue = OutermostLoop->getAfter();
1988 
1989  // Where the next loop basic block should be inserted.
1990  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1991 
1992  auto EmbeddNewLoop =
1993  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1994  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1995  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1996  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1997  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1998  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1999 
2000  // Setup the position where the next embedded loop connects to this loop.
2001  Enter = EmbeddedLoop->getBody();
2002  Continue = EmbeddedLoop->getLatch();
2003  OutroInsertBefore = EmbeddedLoop->getLatch();
2004  return EmbeddedLoop;
2005  };
2006 
2007  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
2008  const Twine &NameBase) {
2009  for (auto P : enumerate(TripCounts)) {
2010  CanonicalLoopInfo *EmbeddedLoop =
2011  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
2012  Result.push_back(EmbeddedLoop);
2013  }
2014  };
2015 
2016  EmbeddNewLoops(FloorCount, "floor");
2017 
2018  // Within the innermost floor loop, emit the code that computes the tile
2019  // sizes.
2020  Builder.SetInsertPoint(Enter->getTerminator());
2021  SmallVector<Value *, 4> TileCounts;
2022  for (int i = 0; i < NumLoops; ++i) {
2023  CanonicalLoopInfo *FloorLoop = Result[i];
2024  Value *TileSize = TileSizes[i];
2025 
2026  Value *FloorIsEpilogue =
2027  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
2028  Value *TileTripCount =
2029  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
2030 
2031  TileCounts.push_back(TileTripCount);
2032  }
2033 
2034  // Create the tile loops.
2035  EmbeddNewLoops(TileCounts, "tile");
2036 
2037  // Insert the inbetween code into the body.
2038  BasicBlock *BodyEnter = Enter;
2039  BasicBlock *BodyEntered = nullptr;
2040  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
2041  BasicBlock *EnterBB = P.first;
2042  BasicBlock *ExitBB = P.second;
2043 
2044  if (BodyEnter)
2045  redirectTo(BodyEnter, EnterBB, DL);
2046  else
2047  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
2048 
2049  BodyEnter = nullptr;
2050  BodyEntered = ExitBB;
2051  }
2052 
2053  // Append the original loop nest body into the generated loop nest body.
2054  if (BodyEnter)
2055  redirectTo(BodyEnter, InnerEnter, DL);
2056  else
2057  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
2058  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
2059 
2060  // Replace the original induction variable with an induction variable computed
2061  // from the tile and floor induction variables.
2062  Builder.restoreIP(Result.back()->getBodyIP());
2063  for (int i = 0; i < NumLoops; ++i) {
2064  CanonicalLoopInfo *FloorLoop = Result[i];
2065  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
2066  Value *OrigIndVar = OrigIndVars[i];
2067  Value *Size = TileSizes[i];
2068 
2069  Value *Scale =
2070  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
2071  Value *Shift =
2072  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
2073  OrigIndVar->replaceAllUsesWith(Shift);
2074  }
2075 
2076  // Remove unused parts of the original loops.
2077  SmallVector<BasicBlock *, 12> OldControlBBs;
2078  OldControlBBs.reserve(6 * Loops.size());
2079  for (CanonicalLoopInfo *Loop : Loops)
2080  Loop->collectControlBlocks(OldControlBBs);
2081  removeUnusedBlocksFromParent(OldControlBBs);
2082 
2083  for (CanonicalLoopInfo *L : Loops)
2084  L->invalidate();
2085 
2086 #ifndef NDEBUG
2087  for (CanonicalLoopInfo *GenL : Result)
2088  GenL->assertOK();
2089 #endif
2090  return Result;
2091 }
2092 
2093 /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
2094 /// loop already has metadata, the loop properties are appended.
2096  ArrayRef<Metadata *> Properties) {
2097  assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
2098 
2099  // Nothing to do if no property to attach.
2100  if (Properties.empty())
2101  return;
2102 
2103  LLVMContext &Ctx = Loop->getFunction()->getContext();
2104  SmallVector<Metadata *> NewLoopProperties;
2105  NewLoopProperties.push_back(nullptr);
2106 
2107  // If the loop already has metadata, prepend it to the new metadata.
2108  BasicBlock *Latch = Loop->getLatch();
2109  assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
2110  MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
2111  if (Existing)
2112  append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
2113 
2114  append_range(NewLoopProperties, Properties);
2115  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
2116  LoopID->replaceOperandWith(0, LoopID);
2117 
2118  Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
2119 }
2120 
2122  LLVMContext &Ctx = Builder.getContext();
2124  Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2125  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
2126 }
2127 
2129  LLVMContext &Ctx = Builder.getContext();
2131  Loop, {
2132  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2133  });
2134 }
2135 
2136 /// Create the TargetMachine object to query the backend for optimization
2137 /// preferences.
2138 ///
2139 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
2140 /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
2141 /// needed for the LLVM pass pipline. We use some default options to avoid
2142 /// having to pass too many settings from the frontend that probably do not
2143 /// matter.
2144 ///
2145 /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
2146 /// method. If we are going to use TargetMachine for more purposes, especially
2147 /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
2148 /// might become be worth requiring front-ends to pass on their TargetMachine,
2149 /// or at least cache it between methods. Note that while fontends such as Clang
2150 /// have just a single main TargetMachine per translation unit, "target-cpu" and
2151 /// "target-features" that determine the TargetMachine are per-function and can
2152 /// be overrided using __attribute__((target("OPTIONS"))).
2153 static std::unique_ptr<TargetMachine>
2155  Module *M = F->getParent();
2156 
2157  StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
2158  StringRef Features = F->getFnAttribute("target-features").getValueAsString();
2159  const std::string &Triple = M->getTargetTriple();
2160 
2161  std::string Error;
2163  if (!TheTarget)
2164  return {};
2165 
2167  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
2168  Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
2169  OptLevel));
2170 }
2171 
2172 /// Heuristically determine the best-performant unroll factor for \p CLI. This
2173 /// depends on the target processor. We are re-using the same heuristics as the
2174 /// LoopUnrollPass.
2176  Function *F = CLI->getFunction();
2177 
2178  // Assume the user requests the most aggressive unrolling, even if the rest of
2179  // the code is optimized using a lower setting.
2181  std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
2182 
2184  FAM.registerPass([]() { return TargetLibraryAnalysis(); });
2185  FAM.registerPass([]() { return AssumptionAnalysis(); });
2186  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2187  FAM.registerPass([]() { return LoopAnalysis(); });
2188  FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
2189  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2190  TargetIRAnalysis TIRA;
2191  if (TM)
2192  TIRA = TargetIRAnalysis(
2193  [&](const Function &F) { return TM->getTargetTransformInfo(F); });
2194  FAM.registerPass([&]() { return TIRA; });
2195 
2196  TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
2198  ScalarEvolution &&SE = SEA.run(*F, FAM);
2200  DominatorTree &&DT = DTA.run(*F, FAM);
2201  LoopAnalysis LIA;
2202  LoopInfo &&LI = LIA.run(*F, FAM);
2203  AssumptionAnalysis ACT;
2204  AssumptionCache &&AC = ACT.run(*F, FAM);
2206 
2207  Loop *L = LI.getLoopFor(CLI->getHeader());
2208  assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
2209 
2212  /*BlockFrequencyInfo=*/nullptr,
2213  /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
2214  /*UserThreshold=*/None,
2215  /*UserCount=*/None,
2216  /*UserAllowPartial=*/true,
2217  /*UserAllowRuntime=*/true,
2218  /*UserUpperBound=*/None,
2219  /*UserFullUnrollMaxCount=*/None);
2220 
2221  UP.Force = true;
2222 
2223  // Account for additional optimizations taking place before the LoopUnrollPass
2224  // would unroll the loop.
2227 
2228  // Use normal unroll factors even if the rest of the code is optimized for
2229  // size.
2230  UP.OptSizeThreshold = UP.Threshold;
2232 
2233  LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
2234  << " Threshold=" << UP.Threshold << "\n"
2235  << " PartialThreshold=" << UP.PartialThreshold << "\n"
2236  << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
2237  << " PartialOptSizeThreshold="
2238  << UP.PartialOptSizeThreshold << "\n");
2239 
2240  // Disable peeling.
2243  /*UserAllowPeeling=*/false,
2244  /*UserAllowProfileBasedPeeling=*/false,
2245  /*UserUnrollingSpecficValues=*/false);
2246 
2248  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
2249 
2250  // Assume that reads and writes to stack variables can be eliminated by
2251  // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
2252  // size.
2253  for (BasicBlock *BB : L->blocks()) {
2254  for (Instruction &I : *BB) {
2255  Value *Ptr;
2256  if (auto *Load = dyn_cast<LoadInst>(&I)) {
2257  Ptr = Load->getPointerOperand();
2258  } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
2259  Ptr = Store->getPointerOperand();
2260  } else
2261  continue;
2262 
2263  Ptr = Ptr->stripPointerCasts();
2264 
2265  if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2266  if (Alloca->getParent() == &F->getEntryBlock())
2267  EphValues.insert(&I);
2268  }
2269  }
2270  }
2271 
2272  unsigned NumInlineCandidates;
2273  bool NotDuplicatable;
2274  bool Convergent;
2275  unsigned LoopSize =
2276  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
2277  TTI, EphValues, UP.BEInsns);
2278  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
2279 
2280  // Loop is not unrollable if the loop contains certain instructions.
2281  if (NotDuplicatable || Convergent) {
2282  LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
2283  return 1;
2284  }
2285 
2286  // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
2287  // be able to use it.
2288  int TripCount = 0;
2289  int MaxTripCount = 0;
2290  bool MaxOrZero = false;
2291  unsigned TripMultiple = 0;
2292 
2293  bool UseUpperBound = false;
2294  computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
2295  MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2296  UseUpperBound);
2297  unsigned Factor = UP.Count;
2298  LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
2299 
2300  // This function returns 1 to signal to not unroll a loop.
2301  if (Factor == 0)
2302  return 1;
2303  return Factor;
2304 }
2305 
2307  int32_t Factor,
2308  CanonicalLoopInfo **UnrolledCLI) {
2309  assert(Factor >= 0 && "Unroll factor must not be negative");
2310 
2311  Function *F = Loop->getFunction();
2312  LLVMContext &Ctx = F->getContext();
2313 
2314  // If the unrolled loop is not used for another loop-associated directive, it
2315  // is sufficient to add metadata for the LoopUnrollPass.
2316  if (!UnrolledCLI) {
2317  SmallVector<Metadata *, 2> LoopMetadata;
2318  LoopMetadata.push_back(
2319  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
2320 
2321  if (Factor >= 1) {
2323  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2324  LoopMetadata.push_back(MDNode::get(
2325  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
2326  }
2327 
2328  addLoopMetadata(Loop, LoopMetadata);
2329  return;
2330  }
2331 
2332  // Heuristically determine the unroll factor.
2333  if (Factor == 0)
2335 
2336  // No change required with unroll factor 1.
2337  if (Factor == 1) {
2338  *UnrolledCLI = Loop;
2339  return;
2340  }
2341 
2342  assert(Factor >= 2 &&
2343  "unrolling only makes sense with a factor of 2 or larger");
2344 
2345  Type *IndVarTy = Loop->getIndVarType();
2346 
2347  // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
2348  // unroll the inner loop.
2349  Value *FactorVal =
2350  ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
2351  /*isSigned=*/false));
2352  std::vector<CanonicalLoopInfo *> LoopNest =
2353  tileLoops(DL, {Loop}, {FactorVal});
2354  assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
2355  *UnrolledCLI = LoopNest[0];
2356  CanonicalLoopInfo *InnerLoop = LoopNest[1];
2357 
2358  // LoopUnrollPass can only fully unroll loops with constant trip count.
2359  // Unroll by the unroll factor with a fallback epilog for the remainder
2360  // iterations if necessary.
2362  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2364  InnerLoop,
2365  {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2366  MDNode::get(
2367  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
2368 
2369 #ifndef NDEBUG
2370  (*UnrolledCLI)->assertOK();
2371 #endif
2372 }
2373 
2376  llvm::Value *BufSize, llvm::Value *CpyBuf,
2377  llvm::Value *CpyFn, llvm::Value *DidIt) {
2378  if (!updateToLocation(Loc))
2379  return Loc.IP;
2380 
2381  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2382  Value *Ident = getOrCreateIdent(SrcLocStr);
2383  Value *ThreadId = getOrCreateThreadID(Ident);
2384 
2385  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
2386 
2387  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2388 
2389  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2390  Builder.CreateCall(Fn, Args);
2391 
2392  return Builder.saveIP();
2393 }
2394 
2397  BodyGenCallbackTy BodyGenCB,
2398  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
2399 
2400  if (!updateToLocation(Loc))
2401  return Loc.IP;
2402 
2403  // If needed (i.e. not null), initialize `DidIt` with 0
2404  if (DidIt) {
2405  Builder.CreateStore(Builder.getInt32(0), DidIt);
2406  }
2407 
2408  Directive OMPD = Directive::OMPD_single;
2409  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2410  Value *Ident = getOrCreateIdent(SrcLocStr);
2411  Value *ThreadId = getOrCreateThreadID(Ident);
2412  Value *Args[] = {Ident, ThreadId};
2413 
2414  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2415  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2416 
2417  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
2418  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2419 
2420  // generates the following:
2421  // if (__kmpc_single()) {
2422  // .... single region ...
2423  // __kmpc_end_single
2424  // }
2425 
2426  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2427  /*Conditional*/ true, /*hasFinalize*/ true);
2428 }
2429 
2431  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2432  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
2433 
2434  if (!updateToLocation(Loc))
2435  return Loc.IP;
2436 
2437  Directive OMPD = Directive::OMPD_critical;
2438  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2439  Value *Ident = getOrCreateIdent(SrcLocStr);
2440  Value *ThreadId = getOrCreateThreadID(Ident);
2441  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
2442  Value *Args[] = {Ident, ThreadId, LockVar};
2443 
2445  Function *RTFn = nullptr;
2446  if (HintInst) {
2447  // Add Hint to entry Args and create call
2448  EnterArgs.push_back(HintInst);
2449  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
2450  } else {
2451  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
2452  }
2453  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
2454 
2455  Function *ExitRTLFn =
2456  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
2457  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2458 
2459  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2460  /*Conditional*/ false, /*hasFinalize*/ true);
2461 }
2462 
2465  InsertPointTy AllocaIP, unsigned NumLoops,
2466  ArrayRef<llvm::Value *> StoreValues,
2467  const Twine &Name, bool IsDependSource) {
2468  if (!updateToLocation(Loc))
2469  return Loc.IP;
2470 
2471  // Allocate space for vector and generate alloc instruction.
2472  auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
2473  Builder.restoreIP(AllocaIP);
2474  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
2475  ArgsBase->setAlignment(Align(8));
2476  Builder.restoreIP(Loc.IP);
2477 
2478  // Store the index value with offset in depend vector.
2479  for (unsigned I = 0; I < NumLoops; ++I) {
2480  Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
2481  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
2482  Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
2483  }
2484 
2485  Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
2486  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
2487 
2488  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2489  Value *Ident = getOrCreateIdent(SrcLocStr);
2490  Value *ThreadId = getOrCreateThreadID(Ident);
2491  Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
2492 
2493  Function *RTLFn = nullptr;
2494  if (IsDependSource)
2495  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
2496  else
2497  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
2498  Builder.CreateCall(RTLFn, Args);
2499 
2500  return Builder.saveIP();
2501 }
2502 
2504  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2505  FinalizeCallbackTy FiniCB, bool IsThreads) {
2506  if (!updateToLocation(Loc))
2507  return Loc.IP;
2508 
2509  Directive OMPD = Directive::OMPD_ordered;
2510  Instruction *EntryCall = nullptr;
2511  Instruction *ExitCall = nullptr;
2512 
2513  if (IsThreads) {
2514  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2515  Value *Ident = getOrCreateIdent(SrcLocStr);
2516  Value *ThreadId = getOrCreateThreadID(Ident);
2517  Value *Args[] = {Ident, ThreadId};
2518 
2519  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
2520  EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2521 
2522  Function *ExitRTLFn =
2523  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
2524  ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2525  }
2526 
2527  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2528  /*Conditional*/ false, /*hasFinalize*/ true);
2529 }
2530 
2531 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
2532  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
2533  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
2534  bool HasFinalize, bool IsCancellable) {
2535 
2536  if (HasFinalize)
2537  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
2538 
2539  // Create inlined region's entry and body blocks, in preparation
2540  // for conditional creation
2541  BasicBlock *EntryBB = Builder.GetInsertBlock();
2542  Instruction *SplitPos = EntryBB->getTerminator();
2543  if (!isa_and_nonnull<BranchInst>(SplitPos))
2544  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
2545  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
2546  BasicBlock *FiniBB =
2547  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
2548 
2549  Builder.SetInsertPoint(EntryBB->getTerminator());
2550  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
2551 
2552  // generate body
2553  BodyGenCB(/* AllocaIP */ InsertPointTy(),
2554  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
2555 
2556  // If we didn't emit a branch to FiniBB during body generation, it means
2557  // FiniBB is unreachable (e.g. while(1);). stop generating all the
2558  // unreachable blocks, and remove anything we are not going to use.
2559  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
2560  if (SkipEmittingRegion) {
2561  FiniBB->eraseFromParent();
2562  ExitCall->eraseFromParent();
2563  // Discard finalization if we have it.
2564  if (HasFinalize) {
2565  assert(!FinalizationStack.empty() &&
2566  "Unexpected finalization stack state!");
2567  FinalizationStack.pop_back();
2568  }
2569  } else {
2570  // emit exit call and do any needed finalization.
2571  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2572  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
2573  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
2574  "Unexpected control flow graph state!!");
2575  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2576  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
2577  "Unexpected Control Flow State!");
2578  MergeBlockIntoPredecessor(FiniBB);
2579  }
2580 
2581  // If we are skipping the region of a non conditional, remove the exit
2582  // block, and clear the builder's insertion point.
2583  assert(SplitPos->getParent() == ExitBB &&
2584  "Unexpected Insertion point location!");
2585  if (!Conditional && SkipEmittingRegion) {
2586  ExitBB->eraseFromParent();
2587  Builder.ClearInsertionPoint();
2588  } else {
2589  auto merged = MergeBlockIntoPredecessor(ExitBB);
2590  BasicBlock *ExitPredBB = SplitPos->getParent();
2591  auto InsertBB = merged ? ExitPredBB : ExitBB;
2592  if (!isa_and_nonnull<BranchInst>(SplitPos))
2593  SplitPos->eraseFromParent();
2594  Builder.SetInsertPoint(InsertBB);
2595  }
2596 
2597  return Builder.saveIP();
2598 }
2599 
2600 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2601  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2602  // if nothing to do, Return current insertion point.
2603  if (!Conditional || !EntryCall)
2604  return Builder.saveIP();
2605 
2606  BasicBlock *EntryBB = Builder.GetInsertBlock();
2607  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2608  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2609  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2610 
2611  // Emit thenBB and set the Builder's insertion point there for
2612  // body generation next. Place the block after the current block.
2613  Function *CurFn = EntryBB->getParent();
2614  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2615 
2616  // Move Entry branch to end of ThenBB, and replace with conditional
2617  // branch (If-stmt)
2618  Instruction *EntryBBTI = EntryBB->getTerminator();
2619  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2620  EntryBBTI->removeFromParent();
2621  Builder.SetInsertPoint(UI);
2622  Builder.Insert(EntryBBTI);
2623  UI->eraseFromParent();
2624  Builder.SetInsertPoint(ThenBB->getTerminator());
2625 
2626  // return an insertion point to ExitBB.
2627  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2628 }
2629 
2630 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2631  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2632  bool HasFinalize) {
2633 
2634  Builder.restoreIP(FinIP);
2635 
2636  // If there is finalization to do, emit it before the exit call
2637  if (HasFinalize) {
2638  assert(!FinalizationStack.empty() &&
2639  "Unexpected finalization stack state!");
2640 
2641  FinalizationInfo Fi = FinalizationStack.pop_back_val();
2642  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
2643 
2644  Fi.FiniCB(FinIP);
2645 
2646  BasicBlock *FiniBB = FinIP.getBlock();
2647  Instruction *FiniBBTI = FiniBB->getTerminator();
2648 
2649  // set Builder IP for call creation
2650  Builder.SetInsertPoint(FiniBBTI);
2651  }
2652 
2653  if (!ExitCall)
2654  return Builder.saveIP();
2655 
2656  // place the Exitcall as last instruction before Finalization block terminator
2657  ExitCall->removeFromParent();
2658  Builder.Insert(ExitCall);
2659 
2660  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2661  ExitCall->getIterator());
2662 }
2663 
2665  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2666  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2667  if (!IP.isSet())
2668  return IP;
2669 
2671 
2672  // creates the following CFG structure
2673  // OMP_Entry : (MasterAddr != PrivateAddr)?
2674  // F T
2675  // | \
2676  // | copin.not.master
2677  // | /
2678  // v /
2679  // copyin.not.master.end
2680  // |
2681  // v
2682  // OMP.Entry.Next
2683 
2684  BasicBlock *OMP_Entry = IP.getBlock();
2685  Function *CurFn = OMP_Entry->getParent();
2686  BasicBlock *CopyBegin =
2687  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2688  BasicBlock *CopyEnd = nullptr;
2689 
2690  // If entry block is terminated, split to preserve the branch to following
2691  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2692  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2693  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2694  "copyin.not.master.end");
2695  OMP_Entry->getTerminator()->eraseFromParent();
2696  } else {
2697  CopyEnd =
2698  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2699  }
2700 
2701  Builder.SetInsertPoint(OMP_Entry);
2702  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2703  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2704  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2705  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2706 
2707  Builder.SetInsertPoint(CopyBegin);
2708  if (BranchtoEnd)
2709  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2710 
2711  return Builder.saveIP();
2712 }
2713 
2716  std::string Name) {
2718  Builder.restoreIP(Loc.IP);
2719 
2720  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2721  Value *Ident = getOrCreateIdent(SrcLocStr);
2722  Value *ThreadId = getOrCreateThreadID(Ident);
2723  Value *Args[] = {ThreadId, Size, Allocator};
2724 
2725  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2726 
2727  return Builder.CreateCall(Fn, Args, Name);
2728 }
2729 
2732  std::string Name) {
2734  Builder.restoreIP(Loc.IP);
2735 
2736  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2737  Value *Ident = getOrCreateIdent(SrcLocStr);
2738  Value *ThreadId = getOrCreateThreadID(Ident);
2739  Value *Args[] = {ThreadId, Addr, Allocator};
2740  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2741  return Builder.CreateCall(Fn, Args, Name);
2742 }
2743 
2745  const LocationDescription &Loc, llvm::Value *Pointer,
2748  Builder.restoreIP(Loc.IP);
2749 
2750  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2751  Value *Ident = getOrCreateIdent(SrcLocStr);
2752  Value *ThreadId = getOrCreateThreadID(Ident);
2753  Constant *ThreadPrivateCache =
2754  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2755  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2756 
2757  Function *Fn =
2758  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2759 
2760  return Builder.CreateCall(Fn, Args);
2761 }
2762 
2765  bool RequiresFullRuntime) {
2766  if (!updateToLocation(Loc))
2767  return Loc.IP;
2768 
2769  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2770  Value *Ident = getOrCreateIdent(SrcLocStr);
2771  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
2772  IntegerType::getInt8Ty(Int8->getContext()),
2774  ConstantInt *UseGenericStateMachine =
2775  ConstantInt::getBool(Int32->getContext(), !IsSPMD);
2776  ConstantInt *RequiresFullRuntimeVal =
2777  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2778 
2779  Function *Fn = getOrCreateRuntimeFunctionPtr(
2780  omp::RuntimeFunction::OMPRTL___kmpc_target_init);
2781 
2782  CallInst *ThreadKind = Builder.CreateCall(
2783  Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
2784 
2785  Value *ExecUserCode = Builder.CreateICmpEQ(
2786  ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
2787  "exec_user_code");
2788 
2789  // ThreadKind = __kmpc_target_init(...)
2790  // if (ThreadKind == -1)
2791  // user_code
2792  // else
2793  // return;
2794 
2795  auto *UI = Builder.CreateUnreachable();
2796  BasicBlock *CheckBB = UI->getParent();
2797  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
2798 
2799  BasicBlock *WorkerExitBB = BasicBlock::Create(
2800  CheckBB->getContext(), "worker.exit", CheckBB->getParent());
2801  Builder.SetInsertPoint(WorkerExitBB);
2802  Builder.CreateRetVoid();
2803 
2804  auto *CheckBBTI = CheckBB->getTerminator();
2805  Builder.SetInsertPoint(CheckBBTI);
2806  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
2807 
2808  CheckBBTI->eraseFromParent();
2809  UI->eraseFromParent();
2810 
2811  // Continue in the "user_code" block, see diagram above and in
2812  // openmp/libomptarget/deviceRTLs/common/include/target.h .
2813  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
2814 }
2815 
2817  bool IsSPMD,
2818  bool RequiresFullRuntime) {
2819  if (!updateToLocation(Loc))
2820  return;
2821 
2822  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2823  Value *Ident = getOrCreateIdent(SrcLocStr);
2824  ConstantInt *IsSPMDVal = ConstantInt::getSigned(
2825  IntegerType::getInt8Ty(Int8->getContext()),
2827  ConstantInt *RequiresFullRuntimeVal =
2828  ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2829 
2830  Function *Fn = getOrCreateRuntimeFunctionPtr(
2831  omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
2832 
2833  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
2834 }
2835 
2836 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2837  StringRef FirstSeparator,
2838  StringRef Separator) {
2839  SmallString<128> Buffer;
2840  llvm::raw_svector_ostream OS(Buffer);
2841  StringRef Sep = FirstSeparator;
2842  for (StringRef Part : Parts) {
2843  OS << Sep << Part;
2844  Sep = Separator;
2845  }
2846  return OS.str().str();
2847 }
2848 
2849 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2850  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2851  // TODO: Replace the twine arg with stringref to get rid of the conversion
2852  // logic. However This is taken from current implementation in clang as is.
2853  // Since this method is used in many places exclusively for OMP internal use
2854  // we will keep it as is for temporarily until we move all users to the
2855  // builder and then, if possible, fix it everywhere in one go.
2856  SmallString<256> Buffer;
2857  llvm::raw_svector_ostream Out(Buffer);
2858  Out << Name;
2859  StringRef RuntimeName = Out.str();
2860  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2861  if (Elem.second) {
2862  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2863  "OMP internal variable has different type than requested");
2864  } else {
2865  // TODO: investigate the appropriate linkage type used for the global
2866  // variable for possibly changing that to internal or private, or maybe
2867  // create different versions of the function for different OMP internal
2868  // variables.
2869  Elem.second = new llvm::GlobalVariable(
2870  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2871  llvm::Constant::getNullValue(Ty), Elem.first(),
2872  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2873  AddressSpace);
2874  }
2875 
2876  return Elem.second;
2877 }
2878 
2879 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
2880  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2881  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
2882  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
2883 }
2884 
2887  std::string VarName) {
2888  llvm::Constant *MaptypesArrayInit =
2889  llvm::ConstantDataArray::get(M.getContext(), Mappings);
2890  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
2891  M, MaptypesArrayInit->getType(),
2892  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
2893  VarName);
2894  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2895  return MaptypesArrayGlobal;
2896 }
2897 
2899  InsertPointTy AllocaIP,
2900  unsigned NumOperands,
2901  struct MapperAllocas &MapperAllocas) {
2902  if (!updateToLocation(Loc))
2903  return;
2904 
2905  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2906  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2907  Builder.restoreIP(AllocaIP);
2908  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
2909  AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
2910  AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
2911  Builder.restoreIP(Loc.IP);
2912  MapperAllocas.ArgsBase = ArgsBase;
2914  MapperAllocas.ArgSizes = ArgSizes;
2915 }
2916 
2918  Function *MapperFunc, Value *SrcLocInfo,
2919  Value *MaptypesArg, Value *MapnamesArg,
2920  struct MapperAllocas &MapperAllocas,
2921  int64_t DeviceID, unsigned NumOperands) {
2922  if (!updateToLocation(Loc))
2923  return;
2924 
2925  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2926  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2927  Value *ArgsBaseGEP =
2928  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
2929  {Builder.getInt32(0), Builder.getInt32(0)});
2930  Value *ArgsGEP =
2931  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
2932  {Builder.getInt32(0), Builder.getInt32(0)});
2933  Value *ArgSizesGEP =
2934  Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
2935  {Builder.getInt32(0), Builder.getInt32(0)});
2936  Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
2937  Builder.CreateCall(MapperFunc,
2938  {SrcLocInfo, Builder.getInt64(DeviceID),
2939  Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
2940  ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
2941 }
2942 
2943 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2944  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
2947  "Unexpected Atomic Ordering.");
2948 
2949  bool Flush = false;
2951 
2952  switch (AK) {
2953  case Read:
2956  FlushAO = AtomicOrdering::Acquire;
2957  Flush = true;
2958  }
2959  break;
2960  case Write:
2961  case Update:
2964  FlushAO = AtomicOrdering::Release;
2965  Flush = true;
2966  }
2967  break;
2968  case Capture:
2969  switch (AO) {
2971  FlushAO = AtomicOrdering::Acquire;
2972  Flush = true;
2973  break;
2975  FlushAO = AtomicOrdering::Release;
2976  Flush = true;
2977  break;
2981  Flush = true;
2982  break;
2983  default:
2984  // do nothing - leave silently.
2985  break;
2986  }
2987  }
2988 
2989  if (Flush) {
2990  // Currently Flush RT call still doesn't take memory_ordering, so for when
2991  // that happens, this tries to do the resolution of which atomic ordering
2992  // to use with but issue the flush call
2993  // TODO: pass `FlushAO` after memory ordering support is added
2994  (void)FlushAO;
2995  emitFlush(Loc);
2996  }
2997 
2998  // for AO == AtomicOrdering::Monotonic and all other case combinations
2999  // do nothing
3000  return Flush;
3001 }
3002 
3006  AtomicOrdering AO) {
3007  if (!updateToLocation(Loc))
3008  return Loc.IP;
3009 
3010  Type *XTy = X.Var->getType();
3011  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3012  Type *XElemTy = XTy->getPointerElementType();
3013  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3014  XElemTy->isPointerTy()) &&
3015  "OMP atomic read expected a scalar type");
3016 
3017  Value *XRead = nullptr;
3018 
3019  if (XElemTy->isIntegerTy()) {
3020  LoadInst *XLD =
3021  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
3022  XLD->setAtomic(AO);
3023  XRead = cast<Value>(XLD);
3024  } else {
3025  // We need to bitcast and perform atomic op as integer
3026  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3027  IntegerType *IntCastTy =
3028  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3029  Value *XBCast = Builder.CreateBitCast(
3030  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
3031  LoadInst *XLoad =
3032  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
3033  XLoad->setAtomic(AO);
3034  if (XElemTy->isFloatingPointTy()) {
3035  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
3036  } else {
3037  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
3038  }
3039  }
3040  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3041  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
3042  return Builder.saveIP();
3043 }
3044 
3047  AtomicOpValue &X, Value *Expr,
3048  AtomicOrdering AO) {
3049  if (!updateToLocation(Loc))
3050  return Loc.IP;
3051 
3052  Type *XTy = X.Var->getType();
3053  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3054  Type *XElemTy = XTy->getPointerElementType();
3055  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3056  XElemTy->isPointerTy()) &&
3057  "OMP atomic write expected a scalar type");
3058 
3059  if (XElemTy->isIntegerTy()) {
3060  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
3061  XSt->setAtomic(AO);
3062  } else {
3063  // We need to bitcast and perform atomic op as integers
3064  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3065  IntegerType *IntCastTy =
3066  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3067  Value *XBCast = Builder.CreateBitCast(
3068  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
3069  Value *ExprCast =
3070  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
3071  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
3072  XSt->setAtomic(AO);
3073  }
3074 
3075  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3076  return Builder.saveIP();
3077 }
3078 
3080  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3081  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3082  AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
3083  if (!updateToLocation(Loc))
3084  return Loc.IP;
3085 
3086  LLVM_DEBUG({
3087  Type *XTy = X.Var->getType();
3088  assert(XTy->isPointerTy() &&
3089  "OMP Atomic expects a pointer to target memory");
3090  Type *XElemTy = XTy->getPointerElementType();
3091  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3092  XElemTy->isPointerTy()) &&
3093  "OMP atomic update expected a scalar type");
3094  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3095  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
3096  "OpenMP atomic does not support LT or GT operations");
3097  });
3098 
3099  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
3100  IsXLHSInRHSPart);
3101  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3102  return Builder.saveIP();
3103 }
3104 
3105 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3106  AtomicRMWInst::BinOp RMWOp) {
3107  switch (RMWOp) {
3108  case AtomicRMWInst::Add:
3109  return Builder.CreateAdd(Src1, Src2);
3110  case AtomicRMWInst::Sub:
3111  return Builder.CreateSub(Src1, Src2);
3112  case AtomicRMWInst::And:
3113  return Builder.CreateAnd(Src1, Src2);
3114  case AtomicRMWInst::Nand:
3115  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
3116  case AtomicRMWInst::Or:
3117  return Builder.CreateOr(Src1, Src2);
3118  case AtomicRMWInst::Xor:
3119  return Builder.CreateXor(Src1, Src2);
3120  case AtomicRMWInst::Xchg:
3121  case AtomicRMWInst::FAdd:
3122  case AtomicRMWInst::FSub:
3124  case AtomicRMWInst::Max:
3125  case AtomicRMWInst::Min:
3126  case AtomicRMWInst::UMax:
3127  case AtomicRMWInst::UMin:
3128  llvm_unreachable("Unsupported atomic update operation");
3129  }
3130  llvm_unreachable("Unsupported atomic update operation");
3131 }
3132 
3133 std::pair<Value *, Value *>
3134 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
3136  AtomicUpdateCallbackTy &UpdateOp,
3137  bool VolatileX, bool IsXLHSInRHSPart) {
3138  Type *XElemTy = X->getType()->getPointerElementType();
3139 
3140  bool DoCmpExch =
3141  ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
3142  (RMWOp == AtomicRMWInst::FSub) ||
3143  (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
3144 
3145  std::pair<Value *, Value *> Res;
3146  if (XElemTy->isIntegerTy() && !DoCmpExch) {
3147  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
3148  // not needed except in case of postfix captures. Generate anyway for
3149  // consistency with the else part. Will be removed with any DCE pass.
3150  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3151  } else {
3152  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
3153  IntegerType *IntCastTy =
3154  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3155  Value *XBCast =
3156  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3157  LoadInst *OldVal =
3158  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
3159  OldVal->setAtomic(AO);
3160  // CurBB
3161  // | /---\
3162  // ContBB |
3163  // | \---/
3164  // ExitBB
3165  BasicBlock *CurBB = Builder.GetInsertBlock();
3166  Instruction *CurBBTI = CurBB->getTerminator();
3167  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
3168  BasicBlock *ExitBB =
3169  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
3170  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
3171  X->getName() + ".atomic.cont");
3172  ContBB->getTerminator()->eraseFromParent();
3173  Builder.SetInsertPoint(ContBB);
3174  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
3175  PHI->addIncoming(OldVal, CurBB);
3176  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
3177  NewAtomicAddr->setName(X->getName() + "x.new.val");
3178  NewAtomicAddr->moveBefore(AllocIP);
3179  IntegerType *NewAtomicCastTy =
3180  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3181  bool IsIntTy = XElemTy->isIntegerTy();
3182  Value *NewAtomicIntAddr =
3183  (IsIntTy)
3184  ? NewAtomicAddr
3185  : Builder.CreateBitCast(NewAtomicAddr,
3186  NewAtomicCastTy->getPointerTo(Addrspace));
3187  Value *OldExprVal = PHI;
3188  if (!IsIntTy) {
3189  if (XElemTy->isFloatingPointTy()) {
3190  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
3191  X->getName() + ".atomic.fltCast");
3192  } else {
3193  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
3194  X->getName() + ".atomic.ptrCast");
3195  }
3196  }
3197 
3198  Value *Upd = UpdateOp(OldExprVal, Builder);
3199  Builder.CreateStore(Upd, NewAtomicAddr);
3200  LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
3201  Value *XAddr =
3202  (IsIntTy)
3203  ? X
3204  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3207  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
3208  XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
3209  Result->setVolatile(VolatileX);
3210  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
3211  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
3212  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
3213  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3214 
3215  Res.first = OldExprVal;
3216  Res.second = Upd;
3217 
3218  // set Insertion point in exit block
3219  if (UnreachableInst *ExitTI =
3220  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
3221  CurBBTI->eraseFromParent();
3222  Builder.SetInsertPoint(ExitBB);
3223  } else {
3224  Builder.SetInsertPoint(ExitTI);
3225  }
3226  }
3227 
3228  return Res;
3229 }
3230 
3232  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3233  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3235  bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
3236  if (!updateToLocation(Loc))
3237  return Loc.IP;
3238 
3239  LLVM_DEBUG({
3240  Type *XTy = X.Var->getType();
3241  assert(XTy->isPointerTy() &&
3242  "OMP Atomic expects a pointer to target memory");
3243  Type *XElemTy = XTy->getPointerElementType();
3244  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3245  XElemTy->isPointerTy()) &&
3246  "OMP atomic capture expected a scalar type");
3247  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3248  "OpenMP atomic does not support LT or GT operations");
3249  });
3250 
3251  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
3252  // 'x' is simply atomically rewritten with 'expr'.
3253  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
3254  std::pair<Value *, Value *> Result =
3255  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
3256  X.IsVolatile, IsXLHSInRHSPart);
3257 
3258  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3259  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
3260 
3261  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3262  return Builder.saveIP();
3263 }
3264 
3267  std::string VarName) {
3268  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
3270  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
3271  Names);
3272  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
3273  M, MapNamesArrayInit->getType(),
3274  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
3275  VarName);
3276  return MapNamesArrayGlobal;
3277 }
3278 
3279 // Create all simple and struct types exposed by the runtime and remember
3280 // the llvm::PointerTypes of them for easy access later.
3281 void OpenMPIRBuilder::initializeTypes(Module &M) {
3282  LLVMContext &Ctx = M.getContext();
3283  StructType *T;
3284 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
3285 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3286  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
3287  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
3288 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3289  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
3290  VarName##Ptr = PointerType::getUnqual(VarName);
3291 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
3292  T = StructType::getTypeByName(Ctx, StructName); \
3293  if (!T) \
3294  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
3295  VarName = T; \
3296  VarName##Ptr = PointerType::getUnqual(T);
3297 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3298 }
3299 
3302  SmallVectorImpl<BasicBlock *> &BlockVector) {
3304  BlockSet.insert(EntryBB);
3305  BlockSet.insert(ExitBB);
3306 
3307  Worklist.push_back(EntryBB);
3308  while (!Worklist.empty()) {
3309  BasicBlock *BB = Worklist.pop_back_val();
3310  BlockVector.push_back(BB);
3311  for (BasicBlock *SuccBB : successors(BB))
3312  if (BlockSet.insert(SuccBB).second)
3313  Worklist.push_back(SuccBB);
3314  }
3315 }
3316 
3317 void CanonicalLoopInfo::collectControlBlocks(
3319  // We only count those BBs as control block for which we do not need to
3320  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
3321  // flow. For consistency, this also means we do not add the Body block, which
3322  // is just the entry to the body code.
3323  BBs.reserve(BBs.size() + 6);
3324  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
3325 }
3326 
3328 #ifndef NDEBUG
3329  // No constraints if this object currently does not describe a loop.
3330  if (!isValid())
3331  return;
3332 
3333  // Verify standard control-flow we use for OpenMP loops.
3334  assert(Preheader);
3335  assert(isa<BranchInst>(Preheader->getTerminator()) &&
3336  "Preheader must terminate with unconditional branch");
3337  assert(Preheader->getSingleSuccessor() == Header &&
3338  "Preheader must jump to header");
3339 
3340  assert(Header);
3341  assert(isa<BranchInst>(Header->getTerminator()) &&
3342  "Header must terminate with unconditional branch");
3343  assert(Header->getSingleSuccessor() == Cond &&
3344  "Header must jump to exiting block");
3345 
3346  assert(Cond);
3347  assert(Cond->getSinglePredecessor() == Header &&
3348  "Exiting block only reachable from header");
3349 
3350  assert(isa<BranchInst>(Cond->getTerminator()) &&
3351  "Exiting block must terminate with conditional branch");
3352  assert(size(successors(Cond)) == 2 &&
3353  "Exiting block must have two successors");
3354  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
3355  "Exiting block's first successor jump to the body");
3356  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
3357  "Exiting block's second successor must exit the loop");
3358 
3359  assert(Body);
3360  assert(Body->getSinglePredecessor() == Cond &&
3361  "Body only reachable from exiting block");
3362  assert(!isa<PHINode>(Body->front()));
3363 
3364  assert(Latch);
3365  assert(isa<BranchInst>(Latch->getTerminator()) &&
3366  "Latch must terminate with unconditional branch");
3367  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
3368  // TODO: To support simple redirecting of the end of the body code that has
3369  // multiple; introduce another auxiliary basic block like preheader and after.
3370  assert(Latch->getSinglePredecessor() != nullptr);
3371  assert(!isa<PHINode>(Latch->front()));
3372 
3373  assert(Exit);
3374  assert(isa<BranchInst>(Exit->getTerminator()) &&
3375  "Exit block must terminate with unconditional branch");
3376  assert(Exit->getSingleSuccessor() == After &&
3377  "Exit block must jump to after block");
3378 
3379  assert(After);
3380  assert(After->getSinglePredecessor() == Exit &&
3381  "After block only reachable from exit block");
3382  assert(After->empty() || !isa<PHINode>(After->front()));
3383 
3384  Instruction *IndVar = getIndVar();
3385  assert(IndVar && "Canonical induction variable not found?");
3386  assert(isa<IntegerType>(IndVar->getType()) &&
3387  "Induction variable must be an integer");
3388  assert(cast<PHINode>(IndVar)->getParent() == Header &&
3389  "Induction variable must be a PHI in the loop header");
3390  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
3391  assert(
3392  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
3393  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
3394 
3395  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
3396  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
3397  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
3398  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
3399  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
3400  ->isOne());
3401 
3402  Value *TripCount = getTripCount();
3403  assert(TripCount && "Loop trip count not found?");
3404  assert(IndVar->getType() == TripCount->getType() &&
3405  "Trip count and induction variable must have the same type");
3406 
3407  auto *CmpI = cast<CmpInst>(&Cond->front());
3408  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
3409  "Exit condition must be a signed less-than comparison");
3410  assert(CmpI->getOperand(0) == IndVar &&
3411  "Exit condition must compare the induction variable");
3412  assert(CmpI->getOperand(1) == TripCount &&
3413  "Exit condition must compare with the trip count");
3414 #endif
3415 }
3416 
3418  Preheader = nullptr;
3419  Header = nullptr;
3420  Cond = nullptr;
3421  Body = nullptr;
3422  Latch = nullptr;
3423  Exit = nullptr;
3424  After = nullptr;
3425 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1516
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:482
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:461
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:2744
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2372
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:150
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2093
addLoopMetadata
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Definition: OMPIRBuilder.cpp:2095
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:761
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:457
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:1022
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:2430
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1485
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
createTargetMachine
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOpt::Level OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
Definition: OMPIRBuilder.cpp:2154
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1277
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
llvm::Function::empty
bool empty() const
Definition: Function.h:729
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:700
T
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:495
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1352
llvm::OpenMPIRBuilder::ReductionInfo::getElementType
Type * getElementType() const
Returns the type of the element being reduced.
Definition: OMPIRBuilder.h:556
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:752
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1589
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1905
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2031
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:707
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:461
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2532
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:357
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:743
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2233
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:101
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
Error.h
OptimizationRemarkEmitter.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:466
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:771
ScalarEvolution.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:133
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:1190
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:539
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1528
getTripCount
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:1047
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:385
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional
Definition: APInt.h:33
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
CodeExtractor.h
llvm::OpenMPIRBuilder::ReductionInfo::Variable
Value * Variable
Reduction variable of pointer type.
Definition: OMPIRBuilder.h:561
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:751
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:890
llvm::CanonicalLoopInfo::getFunction
Function * getFunction() const
Definition: OMPIRBuilder.h:1533
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:1191
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:290
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1318
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::OpenMPIRBuilder::createAtomicCapture
InsertPointTy createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
Definition: OMPIRBuilder.cpp:3231
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::TargetRegistry::lookupTarget
static const Target * lookupTarget(const std::string &Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Definition: TargetRegistry.cpp:62
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:904
llvm::OpenMPIRBuilder::createReductions
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
Definition: OMPIRBuilder.cpp:1068
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1779
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1135
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2045
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:589
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1502
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
llvm::OpenMPIRBuilder::createOrderedDepend
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
Definition: OMPIRBuilder.cpp:2464
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:159
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:771
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1600
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:333
TargetMachine.h
llvm::OpenMPIRBuilder::emitMapperCall
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
Definition: OMPIRBuilder.cpp:2917
llvm::OpenMPIRBuilder::getOrCreateIdent
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:258
OMPIRBuilder.h
llvm::omp::OMP_TGT_EXEC_MODE_GENERIC
@ OMP_TGT_EXEC_MODE_GENERIC
Definition: OMPConstants.h:136
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:3327
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1453
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:2730
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:497
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:747
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::OpenMPIRBuilder::createMapperAllocas
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Definition: OMPIRBuilder.cpp:2898
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1740
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:364
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3004
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:166
TargetLibraryInfo.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:253
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:244
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::CanonicalLoopInfo::isValid
bool isValid() const
Returns whether this object currently represents the IR of a loop.
Definition: OMPIRBuilder.h:1433
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:647
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::pdb::Int8
@ Int8
Definition: PDBTypes.h:396
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1469
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1268
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:376
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1522
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:783
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1748
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::OpenMPIRBuilder::MapperAllocas::Args
AllocaInst * Args
Definition: OMPIRBuilder.h:800
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:740
llvm::OpenMPIRBuilder::unrollLoopFull
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
Definition: OMPIRBuilder.cpp:2121
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:886
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:3079
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:244
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:2714
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:59
llvm::None
const NoneType None
Definition: None.h:23
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::OpenMPIRBuilder::applyDynamicWorkshareLoop
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a dynamically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1606
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:761
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:316
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:915
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:760
llvm::cl::opt< bool >
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:1439
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:2886
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:1193
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:921
llvm::OpenMPIRBuilder::MapperAllocas::ArgsBase
AllocaInst * ArgsBase
Definition: OMPIRBuilder.h:799
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::OpenMPIRBuilder::ReductionInfo::PrivateVariable
Value * PrivateVariable
Thread-private partial reduction variable.
Definition: OMPIRBuilder.h:564
llvm::OpenMPIRBuilder::createOrderedThreadsSimd
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Definition: OMPIRBuilder.cpp:2503
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:182
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:744
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::ScalarEvolutionAnalysis::run
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
Definition: ScalarEvolution.cpp:13182
llvm::AssumptionAnalysis::run
AssumptionCache run(Function &F, FunctionAnalysisManager &)
Definition: AssumptionCache.cpp:260
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2798
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3139
DebugInfo.h
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:432
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:469
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1347
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:756
llvm::OpenMPIRBuilder::createTargetInit
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
The omp target interface.
Definition: OMPIRBuilder.cpp:2764
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:750
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:508
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:139
llvm::omp::OMPScheduleType::Static
@ Static
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:158
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::OpenMPIRBuilder::unrollLoopHeuristic
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
Definition: OMPIRBuilder.cpp:2128
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:640
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:150
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1230
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1465
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:880
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1324
llvm::DominatorTreeAnalysis::run
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:360
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:137
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:758
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1407
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:750
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1581
llvm::OpenMPIRBuilder::createGlobalFlag
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a global flag Namein the module with initial value Value.
Definition: OMPIRBuilder.cpp:248
TargetOptions.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:180
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:746
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::OpenMPIRBuilder::createOffloadMapnames
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Definition: OMPIRBuilder.cpp:3266
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:276
llvm::GlobalValue::WeakODRLinkage
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:53
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:870
uint32_t
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1797
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3066
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:588
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
getKmpcForDynamicNextForType
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
Definition: OMPIRBuilder.cpp:1595
llvm::OpenMPIRBuilder::MapperAllocas::ArgSizes
AllocaInst * ArgSizes
Definition: OMPIRBuilder.h:801
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:760
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:253
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::MDNode::getDistinct
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1241
llvm::OpenMPIRBuilder::unrollLoopPartial
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
Definition: OMPIRBuilder.cpp:2306
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::OpenMPIRBuilder::createMasked
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Definition: OMPIRBuilder.cpp:1254
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::OpenMPIRBuilder::applyWorkshareLoop
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1568
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::LoopAnalysis::run
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopInfo.cpp:962
computeHeuristicUnrollFactor
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
Definition: OMPIRBuilder.cpp:2175
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:1475
llvm::AtomicOrdering::Release
@ Release
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
llvm::ConstantAsMetadata
Definition: Metadata.h:412
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:898
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1723
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OpenMPIRBuilder::ReductionInfo::ReductionGen
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Definition: OMPIRBuilder.h:569
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:766
llvm::OpenMPIRBuilder::createAtomicWrite
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3046
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:887
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:756
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:746
llvm::CanonicalLoopInfo::invalidate
void invalidate()
Invalidate this loop.
Definition: OMPIRBuilder.cpp:3417
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:261
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:939
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:419
llvm::omp::GV
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57
PassManager.h
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:177
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1288
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1151
llvm::CanonicalLoopInfo::getHeader
BasicBlock * getHeader() const
The header is the entry for each iteration.
Definition: OMPIRBuilder.h:1446
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:296
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:845
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
UnrollThresholdFactor
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
llvm::omp::OMP_TGT_EXEC_MODE_SPMD
@ OMP_TGT_EXEC_MODE_SPMD
Definition: OMPConstants.h:137
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
getKmpcForDynamicInitForType
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
Definition: OMPIRBuilder.cpp:1579
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::MDNode::replaceOperandWith
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:877
llvm::OpenMPIRBuilder::createTargetDeinit
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
Create a runtime call for kmpc_target_deinit.
Definition: OMPIRBuilder.cpp:2816
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::OpenMPIRBuilder::ReductionInfo
Information about an OpenMP reduction.
Definition: OMPIRBuilder.h:548
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:124
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:157
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:217
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:1469
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:369
ModuleUtils.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:440
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::remarks::Type::Failure
@ Failure
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
getFreshReductionFunc
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
Definition: OMPIRBuilder.cpp:1058
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:763
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:372
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2648
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:325
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:361
llvm::SmallVectorImpl< uint64_t >
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:770
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:620
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:1493
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::PassInstrumentationAnalysis
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:603
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:454
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:872
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1469
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:3300
llvm::OpenMPIRBuilder::MapperAllocas
Definition: OMPIRBuilder.h:798
llvm::OpenMPIRBuilder::applyStaticWorkshareLoop
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1477
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4724
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3227
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4294
llvm::cl::desc
Definition: CommandLine.h:412
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:2396
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:758
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:2664
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:2375
llvm::SetVector< Value * >
llvm::omp::OMPScheduleType
OMPScheduleType
Definition: OMPConstants.h:113
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:92
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:839
Value.h
llvm::OpenMPIRBuilder::ReductionInfo::AtomicReductionGen
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Definition: OMPIRBuilder.h:575
TargetRegistry.h
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:1461
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:674
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1454
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:671
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:152
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Target::createTargetMachine
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM=None, CodeGenOpt::Level OL=CodeGenOpt::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Definition: TargetRegistry.h:449
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:157
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:754