LLVM  14.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
12 ///
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Triple.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/DebugInfo.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/MDBuilder.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/IR/Value.h"
30 #include "llvm/Support/Error.h"
39 
40 #include <sstream>
41 
42 #define DEBUG_TYPE "openmp-ir-builder"
43 
44 using namespace llvm;
45 using namespace omp;
46 
47 static cl::opt<bool>
48  OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
49  cl::desc("Use optimistic attributes describing "
50  "'as-if' properties of runtime calls."),
51  cl::init(false));
52 
54  "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
55  cl::desc("Factor for the unroll threshold to account for code "
56  "simplifications still taking place"),
57  cl::init(1.5));
58 
60  LLVMContext &Ctx = Fn.getContext();
61 
62  // Get the function's current attributes.
63  auto Attrs = Fn.getAttributes();
64  auto FnAttrs = Attrs.getFnAttrs();
65  auto RetAttrs = Attrs.getRetAttrs();
67  for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
68  ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
69 
70 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
71 #include "llvm/Frontend/OpenMP/OMPKinds.def"
72 
73  // Add attributes to the function declaration.
74  switch (FnID) {
75 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
76  case Enum: \
77  FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
78  RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
79  for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
80  ArgAttrs[ArgNo] = \
81  ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
82  Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
83  break;
84 #include "llvm/Frontend/OpenMP/OMPKinds.def"
85  default:
86  // Attributes are optional.
87  break;
88  }
89 }
90 
93  FunctionType *FnTy = nullptr;
94  Function *Fn = nullptr;
95 
96  // Try to find the declation in the module first.
97  switch (FnID) {
98 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
99  case Enum: \
100  FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
101  IsVarArg); \
102  Fn = M.getFunction(Str); \
103  break;
104 #include "llvm/Frontend/OpenMP/OMPKinds.def"
105  }
106 
107  if (!Fn) {
108  // Create a new declaration if we need one.
109  switch (FnID) {
110 #define OMP_RTL(Enum, Str, ...) \
111  case Enum: \
112  Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
113  break;
114 #include "llvm/Frontend/OpenMP/OMPKinds.def"
115  }
116 
117  // Add information if the runtime function takes a callback function
118  if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
119  if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
120  LLVMContext &Ctx = Fn->getContext();
121  MDBuilder MDB(Ctx);
122  // Annotate the callback behavior of the runtime function:
123  // - The callback callee is argument number 2 (microtask).
124  // - The first two arguments of the callback callee are unknown (-1).
125  // - All variadic arguments to the runtime function are passed to the
126  // callback callee.
127  Fn->addMetadata(
128  LLVMContext::MD_callback,
130  2, {-1, -1}, /* VarArgsArePassed */ true)}));
131  }
132  }
133 
134  LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
135  << " with type " << *Fn->getFunctionType() << "\n");
136  addAttributes(FnID, *Fn);
137 
138  } else {
139  LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
140  << " with type " << *Fn->getFunctionType() << "\n");
141  }
142 
143  assert(Fn && "Failed to create OpenMP runtime function");
144 
145  // Cast the function to the expected type if necessary
147  return {FnTy, C};
148 }
149 
151  FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
152  auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
153  assert(Fn && "Failed to create OpenMP runtime function pointer");
154  return Fn;
155 }
156 
157 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
158 
159 void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
160  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
162  SmallVector<OutlineInfo, 16> DeferredOutlines;
163  for (OutlineInfo &OI : OutlineInfos) {
164  // Skip functions that have not finalized yet; may happen with nested
165  // function generation.
166  if (Fn && OI.getFunction() != Fn) {
167  DeferredOutlines.push_back(OI);
168  continue;
169  }
170 
171  ParallelRegionBlockSet.clear();
172  Blocks.clear();
173  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
174 
175  Function *OuterFn = OI.getFunction();
176  CodeExtractorAnalysisCache CEAC(*OuterFn);
177  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
178  /* AggregateArgs */ false,
179  /* BlockFrequencyInfo */ nullptr,
180  /* BranchProbabilityInfo */ nullptr,
181  /* AssumptionCache */ nullptr,
182  /* AllowVarArgs */ true,
183  /* AllowAlloca */ true,
184  /* Suffix */ ".omp_par");
185 
186  LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
187  LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
188  << " Exit: " << OI.ExitBB->getName() << "\n");
189  assert(Extractor.isEligible() &&
190  "Expected OpenMP outlining to be possible!");
191 
192  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
193 
194  LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
195  LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
196  assert(OutlinedFn->getReturnType()->isVoidTy() &&
197  "OpenMP outlined functions should not return a value!");
198 
199  // For compability with the clang CG we move the outlined function after the
200  // one with the parallel region.
201  OutlinedFn->removeFromParent();
202  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
203 
204  // Remove the artificial entry introduced by the extractor right away, we
205  // made our own entry block after all.
206  {
207  BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
208  assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
209  assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
210  if (AllowExtractorSinking) {
211  // Move instructions from the to-be-deleted ArtificialEntry to the entry
212  // basic block of the parallel region. CodeExtractor may have sunk
213  // allocas/bitcasts for values that are solely used in the outlined
214  // region and do not escape.
215  assert(!ArtificialEntry.empty() &&
216  "Expected instructions to sink in the outlined region");
217  for (BasicBlock::iterator It = ArtificialEntry.begin(),
218  End = ArtificialEntry.end();
219  It != End;) {
220  Instruction &I = *It;
221  It++;
222 
223  if (I.isTerminator())
224  continue;
225 
226  I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
227  }
228  }
229  OI.EntryBB->moveBefore(&ArtificialEntry);
230  ArtificialEntry.eraseFromParent();
231  }
232  assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
233  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
234 
235  // Run a user callback, e.g. to add attributes.
236  if (OI.PostOutlineCB)
237  OI.PostOutlineCB(*OutlinedFn);
238  }
239 
240  // Remove work items that have been completed.
241  OutlineInfos = std::move(DeferredOutlines);
242 }
243 
245  assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
246 }
247 
249  IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
250  auto *GV = new GlobalVariable(
251  M, I32Ty,
252  /* isConstant = */ true, GlobalValue::PrivateLinkage,
253  ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind");
254 
255  llvm::appendToUsed(M, {GV});
256 
257  return GV;
258 }
259 
261  IdentFlag LocFlags,
262  unsigned Reserve2Flags) {
263  // Enable "C-mode".
264  LocFlags |= OMP_IDENT_FLAG_KMPC;
265 
266  Value *&Ident =
267  IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
268  if (!Ident) {
269  Constant *I32Null = ConstantInt::getNullValue(Int32);
270  Constant *IdentData[] = {
271  I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
272  ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
273  Constant *Initializer =
274  ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
275 
276  // Look for existing encoding of the location + flags, not needed but
277  // minimizes the difference to the existing solution while we transition.
278  for (GlobalVariable &GV : M.getGlobalList())
279  if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
280  if (GV.getInitializer() == Initializer)
281  return Ident = &GV;
282 
283  auto *GV = new GlobalVariable(M, OpenMPIRBuilder::Ident,
284  /* isConstant = */ true,
285  GlobalValue::PrivateLinkage, Initializer);
286  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
287  GV->setAlignment(Align(8));
288  Ident = GV;
289  }
290  return Builder.CreatePointerCast(Ident, IdentPtr);
291 }
292 
294  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
295  if (!SrcLocStr) {
296  Constant *Initializer =
297  ConstantDataArray::getString(M.getContext(), LocStr);
298 
299  // Look for existing encoding of the location, not needed but minimizes the
300  // difference to the existing solution while we transition.
301  for (GlobalVariable &GV : M.getGlobalList())
302  if (GV.isConstant() && GV.hasInitializer() &&
303  GV.getInitializer() == Initializer)
304  return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
305 
306  SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
307  /* AddressSpace */ 0, &M);
308  }
309  return SrcLocStr;
310 }
311 
313  StringRef FileName,
314  unsigned Line,
315  unsigned Column) {
316  SmallString<128> Buffer;
317  Buffer.push_back(';');
318  Buffer.append(FileName);
319  Buffer.push_back(';');
320  Buffer.append(FunctionName);
321  Buffer.push_back(';');
322  Buffer.append(std::to_string(Line));
323  Buffer.push_back(';');
324  Buffer.append(std::to_string(Column));
325  Buffer.push_back(';');
326  Buffer.push_back(';');
327  return getOrCreateSrcLocStr(Buffer.str());
328 }
329 
331  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
332 }
333 
335  DILocation *DIL = DL.get();
336  if (!DIL)
337  return getOrCreateDefaultSrcLocStr();
338  StringRef FileName = M.getName();
339  if (DIFile *DIF = DIL->getFile())
340  if (Optional<StringRef> Source = DIF->getSource())
341  FileName = *Source;
342  StringRef Function = DIL->getScope()->getSubprogram()->getName();
343  if (Function.empty() && F)
344  Function = F->getName();
345  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
346  DIL->getColumn());
347 }
348 
349 Constant *
351  return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
352 }
353 
355  return Builder.CreateCall(
356  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
357  "omp_global_thread_num");
358 }
359 
362  bool ForceSimpleCall, bool CheckCancelFlag) {
363  if (!updateToLocation(Loc))
364  return Loc.IP;
365  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
366 }
367 
370  bool ForceSimpleCall, bool CheckCancelFlag) {
371  // Build call __kmpc_cancel_barrier(loc, thread_id) or
372  // __kmpc_barrier(loc, thread_id);
373 
374  IdentFlag BarrierLocFlags;
375  switch (Kind) {
376  case OMPD_for:
377  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
378  break;
379  case OMPD_sections:
380  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
381  break;
382  case OMPD_single:
383  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
384  break;
385  case OMPD_barrier:
386  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
387  break;
388  default:
389  BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
390  break;
391  }
392 
393  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
394  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
395  getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
396 
397  // If we are in a cancellable parallel region, barriers are cancellation
398  // points.
399  // TODO: Check why we would force simple calls or to ignore the cancel flag.
400  bool UseCancelBarrier =
401  !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
402 
403  Value *Result =
404  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
405  UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
406  : OMPRTL___kmpc_barrier),
407  Args);
408 
409  if (UseCancelBarrier && CheckCancelFlag)
410  emitCancelationCheckImpl(Result, OMPD_parallel);
411 
412  return Builder.saveIP();
413 }
414 
417  Value *IfCondition,
418  omp::Directive CanceledDirective) {
419  if (!updateToLocation(Loc))
420  return Loc.IP;
421 
422  // LLVM utilities like blocks with terminators.
423  auto *UI = Builder.CreateUnreachable();
424 
425  Instruction *ThenTI = UI, *ElseTI = nullptr;
426  if (IfCondition)
427  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
428  Builder.SetInsertPoint(ThenTI);
429 
430  Value *CancelKind = nullptr;
431  switch (CanceledDirective) {
432 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
433  case DirectiveEnum: \
434  CancelKind = Builder.getInt32(Value); \
435  break;
436 #include "llvm/Frontend/OpenMP/OMPKinds.def"
437  default:
438  llvm_unreachable("Unknown cancel kind!");
439  }
440 
441  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
442  Value *Ident = getOrCreateIdent(SrcLocStr);
443  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
444  Value *Result = Builder.CreateCall(
445  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
446  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
447  if (CanceledDirective == OMPD_parallel) {
449  Builder.restoreIP(IP);
450  createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
451  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
452  /* CheckCancelFlag */ false);
453  }
454  };
455 
456  // The actual cancel logic is shared with others, e.g., cancel_barriers.
457  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
458 
459  // Update the insertion point and remove the terminator we introduced.
460  Builder.SetInsertPoint(UI->getParent());
461  UI->eraseFromParent();
462 
463  return Builder.saveIP();
464 }
465 
467  omp::Directive CanceledDirective,
468  FinalizeCallbackTy ExitCB) {
469  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
470  "Unexpected cancellation!");
471 
472  // For a cancel barrier we create two new blocks.
473  BasicBlock *BB = Builder.GetInsertBlock();
474  BasicBlock *NonCancellationBlock;
475  if (Builder.GetInsertPoint() == BB->end()) {
476  // TODO: This branch will not be needed once we moved to the
477  // OpenMPIRBuilder codegen completely.
478  NonCancellationBlock = BasicBlock::Create(
479  BB->getContext(), BB->getName() + ".cont", BB->getParent());
480  } else {
481  NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
482  BB->getTerminator()->eraseFromParent();
483  Builder.SetInsertPoint(BB);
484  }
485  BasicBlock *CancellationBlock = BasicBlock::Create(
486  BB->getContext(), BB->getName() + ".cncl", BB->getParent());
487 
488  // Jump to them based on the return value.
489  Value *Cmp = Builder.CreateIsNull(CancelFlag);
490  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
491  /* TODO weight */ nullptr, nullptr);
492 
493  // From the cancellation block we finalize all variables and go to the
494  // post finalization block that is known to the FiniCB callback.
495  Builder.SetInsertPoint(CancellationBlock);
496  if (ExitCB)
497  ExitCB(Builder.saveIP());
498  auto &FI = FinalizationStack.back();
499  FI.FiniCB(Builder.saveIP());
500 
501  // The continuation block is where code generation continues.
502  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
503 }
504 
506  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
507  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
508  FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
509  omp::ProcBindKind ProcBind, bool IsCancellable) {
510  if (!updateToLocation(Loc))
511  return Loc.IP;
512 
513  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
514  Value *Ident = getOrCreateIdent(SrcLocStr);
515  Value *ThreadID = getOrCreateThreadID(Ident);
516 
517  if (NumThreads) {
518  // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
519  Value *Args[] = {
520  Ident, ThreadID,
521  Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
522  Builder.CreateCall(
523  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
524  }
525 
526  if (ProcBind != OMP_PROC_BIND_default) {
527  // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
528  Value *Args[] = {
529  Ident, ThreadID,
530  ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
531  Builder.CreateCall(
532  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
533  }
534 
535  BasicBlock *InsertBB = Builder.GetInsertBlock();
536  Function *OuterFn = InsertBB->getParent();
537 
538  // Save the outer alloca block because the insertion iterator may get
539  // invalidated and we still need this later.
540  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
541 
542  // Vector to remember instructions we used only during the modeling but which
543  // we want to delete at the end.
544  SmallVector<Instruction *, 4> ToBeDeleted;
545 
546  // Change the location to the outer alloca insertion point to create and
547  // initialize the allocas we pass into the parallel region.
548  Builder.restoreIP(OuterAllocaIP);
549  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
550  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
551 
552  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
553  // program, otherwise we only need them for modeling purposes to get the
554  // associated arguments in the outlined function. In the former case,
555  // initialize the allocas properly, in the latter case, delete them later.
556  if (IfCondition) {
557  Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
558  Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
559  } else {
560  ToBeDeleted.push_back(TIDAddr);
561  ToBeDeleted.push_back(ZeroAddr);
562  }
563 
564  // Create an artificial insertion point that will also ensure the blocks we
565  // are about to split are not degenerated.
566  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
567 
568  Instruction *ThenTI = UI, *ElseTI = nullptr;
569  if (IfCondition)
570  SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
571 
572  BasicBlock *ThenBB = ThenTI->getParent();
573  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
574  BasicBlock *PRegBodyBB =
575  PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
576  BasicBlock *PRegPreFiniBB =
577  PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
578  BasicBlock *PRegExitBB =
579  PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
580 
581  auto FiniCBWrapper = [&](InsertPointTy IP) {
582  // Hide "open-ended" blocks from the given FiniCB by setting the right jump
583  // target to the region exit block.
584  if (IP.getBlock()->end() == IP.getPoint()) {
586  Builder.restoreIP(IP);
587  Instruction *I = Builder.CreateBr(PRegExitBB);
588  IP = InsertPointTy(I->getParent(), I->getIterator());
589  }
590  assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
591  IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
592  "Unexpected insertion point for finalization call!");
593  return FiniCB(IP);
594  };
595 
596  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
597 
598  // Generate the privatization allocas in the block that will become the entry
599  // of the outlined function.
600  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
601  InsertPointTy InnerAllocaIP = Builder.saveIP();
602 
603  AllocaInst *PrivTIDAddr =
604  Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
605  Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
606 
607  // Add some fake uses for OpenMP provided arguments.
608  ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
609  Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
610  "zero.addr.use");
611  ToBeDeleted.push_back(ZeroAddrUse);
612 
613  // ThenBB
614  // |
615  // V
616  // PRegionEntryBB <- Privatization allocas are placed here.
617  // |
618  // V
619  // PRegionBodyBB <- BodeGen is invoked here.
620  // |
621  // V
622  // PRegPreFiniBB <- The block we will start finalization from.
623  // |
624  // V
625  // PRegionExitBB <- A common exit to simplify block collection.
626  //
627 
628  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
629 
630  // Let the caller create the body.
631  assert(BodyGenCB && "Expected body generation callback!");
632  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
633  BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);
634 
635  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
636 
637  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
638  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
639  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
640  llvm::LLVMContext &Ctx = F->getContext();
641  MDBuilder MDB(Ctx);
642  // Annotate the callback behavior of the __kmpc_fork_call:
643  // - The callback callee is argument number 2 (microtask).
644  // - The first two arguments of the callback callee are unknown (-1).
645  // - All variadic arguments to the __kmpc_fork_call are passed to the
646  // callback callee.
647  F->addMetadata(
648  llvm::LLVMContext::MD_callback,
650  Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
651  /* VarArgsArePassed */ true)}));
652  }
653  }
654 
655  OutlineInfo OI;
656  OI.PostOutlineCB = [=](Function &OutlinedFn) {
657  // Add some known attributes.
658  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
659  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
660  OutlinedFn.addFnAttr(Attribute::NoUnwind);
661  OutlinedFn.addFnAttr(Attribute::NoRecurse);
662 
663  assert(OutlinedFn.arg_size() >= 2 &&
664  "Expected at least tid and bounded tid as arguments");
665  unsigned NumCapturedVars =
666  OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
667 
668  CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
669  CI->getParent()->setName("omp_parallel");
670  Builder.SetInsertPoint(CI);
671 
672  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
673  Value *ForkCallArgs[] = {
674  Ident, Builder.getInt32(NumCapturedVars),
675  Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
676 
677  SmallVector<Value *, 16> RealArgs;
678  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
679  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
680 
681  Builder.CreateCall(RTLFn, RealArgs);
682 
683  LLVM_DEBUG(dbgs() << "With fork_call placed: "
684  << *Builder.GetInsertBlock()->getParent() << "\n");
685 
686  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
687 
688  // Initialize the local TID stack location with the argument value.
689  Builder.SetInsertPoint(PrivTID);
690  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
691  Builder.CreateStore(Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
692 
693  // If no "if" clause was present we do not need the call created during
694  // outlining, otherwise we reuse it in the serialized parallel region.
695  if (!ElseTI) {
696  CI->eraseFromParent();
697  } else {
698 
699  // If an "if" clause was present we are now generating the serialized
700  // version into the "else" branch.
701  Builder.SetInsertPoint(ElseTI);
702 
703  // Build calls __kmpc_serialized_parallel(&Ident, GTid);
704  Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
705  Builder.CreateCall(
706  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
707  SerializedParallelCallArgs);
708 
709  // OutlinedFn(&GTid, &zero, CapturedStruct);
710  CI->removeFromParent();
711  Builder.Insert(CI);
712 
713  // __kmpc_end_serialized_parallel(&Ident, GTid);
714  Value *EndArgs[] = {Ident, ThreadID};
715  Builder.CreateCall(
716  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
717  EndArgs);
718 
719  LLVM_DEBUG(dbgs() << "With serialized parallel region: "
720  << *Builder.GetInsertBlock()->getParent() << "\n");
721  }
722 
723  for (Instruction *I : ToBeDeleted)
724  I->eraseFromParent();
725  };
726 
727  // Adjust the finalization stack, verify the adjustment, and call the
728  // finalize function a last time to finalize values between the pre-fini
729  // block and the exit block if we left the parallel "the normal way".
730  auto FiniInfo = FinalizationStack.pop_back_val();
731  (void)FiniInfo;
732  assert(FiniInfo.DK == OMPD_parallel &&
733  "Unexpected finalization stack state!");
734 
735  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
736 
737  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
738  FiniCB(PreFiniIP);
739 
740  OI.EntryBB = PRegEntryBB;
741  OI.ExitBB = PRegExitBB;
742 
743  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
745  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
746 
747  // Ensure a single exit node for the outlined region by creating one.
748  // We might have multiple incoming edges to the exit now due to finalizations,
749  // e.g., cancel calls that cause the control flow to leave the region.
750  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
751  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
752  PRegOutlinedExitBB->setName("omp.par.outlined.exit");
753  Blocks.push_back(PRegOutlinedExitBB);
754 
755  CodeExtractorAnalysisCache CEAC(*OuterFn);
756  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
757  /* AggregateArgs */ false,
758  /* BlockFrequencyInfo */ nullptr,
759  /* BranchProbabilityInfo */ nullptr,
760  /* AssumptionCache */ nullptr,
761  /* AllowVarArgs */ true,
762  /* AllowAlloca */ true,
763  /* Suffix */ ".omp_par");
764 
765  // Find inputs to, outputs from the code region.
766  BasicBlock *CommonExit = nullptr;
767  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
768  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
769  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
770 
771  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
772 
773  FunctionCallee TIDRTLFn =
774  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
775 
776  auto PrivHelper = [&](Value &V) {
777  if (&V == TIDAddr || &V == ZeroAddr)
778  return;
779 
781  for (Use &U : V.uses())
782  if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
783  if (ParallelRegionBlockSet.count(UserI->getParent()))
784  Uses.insert(&U);
785 
786  // __kmpc_fork_call expects extra arguments as pointers. If the input
787  // already has a pointer type, everything is fine. Otherwise, store the
788  // value onto stack and load it back inside the to-be-outlined region. This
789  // will ensure only the pointer will be passed to the function.
790  // FIXME: if there are more than 15 trailing arguments, they must be
791  // additionally packed in a struct.
792  Value *Inner = &V;
793  if (!V.getType()->isPointerTy()) {
795  LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
796 
797  Builder.restoreIP(OuterAllocaIP);
798  Value *Ptr =
799  Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
800 
801  // Store to stack at end of the block that currently branches to the entry
802  // block of the to-be-outlined region.
803  Builder.SetInsertPoint(InsertBB,
804  InsertBB->getTerminator()->getIterator());
805  Builder.CreateStore(&V, Ptr);
806 
807  // Load back next to allocations in the to-be-outlined region.
808  Builder.restoreIP(InnerAllocaIP);
809  Inner = Builder.CreateLoad(V.getType(), Ptr);
810  }
811 
812  Value *ReplacementValue = nullptr;
813  CallInst *CI = dyn_cast<CallInst>(&V);
814  if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
815  ReplacementValue = PrivTID;
816  } else {
817  Builder.restoreIP(
818  PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
819  assert(ReplacementValue &&
820  "Expected copy/create callback to set replacement value!");
821  if (ReplacementValue == &V)
822  return;
823  }
824 
825  for (Use *UPtr : Uses)
826  UPtr->set(ReplacementValue);
827  };
828 
829  // Reset the inner alloca insertion as it will be used for loading the values
830  // wrapped into pointers before passing them into the to-be-outlined region.
831  // Configure it to insert immediately after the fake use of zero address so
832  // that they are available in the generated body and so that the
833  // OpenMP-related values (thread ID and zero address pointers) remain leading
834  // in the argument list.
835  InnerAllocaIP = IRBuilder<>::InsertPoint(
836  ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
837 
838  // Reset the outer alloca insertion point to the entry of the relevant block
839  // in case it was invalidated.
840  OuterAllocaIP = IRBuilder<>::InsertPoint(
841  OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
842 
843  for (Value *Input : Inputs) {
844  LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
845  PrivHelper(*Input);
846  }
847  LLVM_DEBUG({
848  for (Value *Output : Outputs)
849  LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
850  });
851  assert(Outputs.empty() &&
852  "OpenMP outlining should not produce live-out values!");
853 
854  LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
855  LLVM_DEBUG({
856  for (auto *BB : Blocks)
857  dbgs() << " PBR: " << BB->getName() << "\n";
858  });
859 
860  // Register the outlined info.
861  addOutlineInfo(std::move(OI));
862 
863  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
864  UI->eraseFromParent();
865 
866  return AfterIP;
867 }
868 
870  // Build call void __kmpc_flush(ident_t *loc)
871  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
872  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
873 
874  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
875 }
876 
878  if (!updateToLocation(Loc))
879  return;
880  emitFlush(Loc);
881 }
882 
884  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
885  // global_tid);
886  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
887  Value *Ident = getOrCreateIdent(SrcLocStr);
888  Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
889 
890  // Ignore return result until untied tasks are supported.
891  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
892  Args);
893 }
894 
896  if (!updateToLocation(Loc))
897  return;
898  emitTaskwaitImpl(Loc);
899 }
900 
902  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
903  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
904  Value *Ident = getOrCreateIdent(SrcLocStr);
905  Constant *I32Null = ConstantInt::getNullValue(Int32);
906  Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
907 
908  Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
909  Args);
910 }
911 
913  if (!updateToLocation(Loc))
914  return;
915  emitTaskyieldImpl(Loc);
916 }
917 
919  const LocationDescription &Loc, InsertPointTy AllocaIP,
921  FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
922  if (!updateToLocation(Loc))
923  return Loc.IP;
924 
925  auto FiniCBWrapper = [&](InsertPointTy IP) {
926  if (IP.getBlock()->end() != IP.getPoint())
927  return FiniCB(IP);
928  // This must be done otherwise any nested constructs using FinalizeOMPRegion
929  // will fail because that function requires the Finalization Basic Block to
930  // have a terminator, which is already removed by EmitOMPRegionBody.
931  // IP is currently at cancelation block.
932  // We need to backtrack to the condition block to fetch
933  // the exit block and create a branch from cancelation
934  // to exit block.
936  Builder.restoreIP(IP);
937  auto *CaseBB = IP.getBlock()->getSinglePredecessor();
938  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
939  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
940  Instruction *I = Builder.CreateBr(ExitBB);
941  IP = InsertPointTy(I->getParent(), I->getIterator());
942  return FiniCB(IP);
943  };
944 
945  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
946 
947  // Each section is emitted as a switch case
948  // Each finalization callback is handled from clang.EmitOMPSectionDirective()
949  // -> OMP.createSection() which generates the IR for each section
950  // Iterate through all sections and emit a switch construct:
951  // switch (IV) {
952  // case 0:
953  // <SectionStmt[0]>;
954  // break;
955  // ...
956  // case <NumSection> - 1:
957  // <SectionStmt[<NumSection> - 1]>;
958  // break;
959  // }
960  // ...
961  // section_loop.after:
962  // <FiniCB>;
963  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
964  auto *CurFn = CodeGenIP.getBlock()->getParent();
965  auto *ForIncBB = CodeGenIP.getBlock()->getSingleSuccessor();
966  auto *ForExitBB = CodeGenIP.getBlock()
967  ->getSinglePredecessor()
968  ->getTerminator()
969  ->getSuccessor(1);
970  SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, ForIncBB);
971  Builder.restoreIP(CodeGenIP);
972  unsigned CaseNumber = 0;
973  for (auto SectionCB : SectionCBs) {
974  auto *CaseBB = BasicBlock::Create(M.getContext(),
975  "omp_section_loop.body.case", CurFn);
976  SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
977  Builder.SetInsertPoint(CaseBB);
978  SectionCB(InsertPointTy(), Builder.saveIP(), *ForExitBB);
979  CaseNumber++;
980  }
981  // remove the existing terminator from body BB since there can be no
982  // terminators after switch/case
983  CodeGenIP.getBlock()->getTerminator()->eraseFromParent();
984  };
985  // Loop body ends here
986  // LowerBound, UpperBound, and STride for createCanonicalLoop
987  Type *I32Ty = Type::getInt32Ty(M.getContext());
988  Value *LB = ConstantInt::get(I32Ty, 0);
989  Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
990  Value *ST = ConstantInt::get(I32Ty, 1);
991  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
992  Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
993  InsertPointTy AfterIP =
994  applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
995  BasicBlock *LoopAfterBB = AfterIP.getBlock();
996  Instruction *SplitPos = LoopAfterBB->getTerminator();
997  if (!isa_and_nonnull<BranchInst>(SplitPos))
998  SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
999  // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
1000  // which requires a BB with branch
1001  BasicBlock *ExitBB =
1002  LoopAfterBB->splitBasicBlock(SplitPos, "omp_sections.end");
1003  SplitPos->eraseFromParent();
1004 
1005  // Apply the finalization callback in LoopAfterBB
1006  auto FiniInfo = FinalizationStack.pop_back_val();
1007  assert(FiniInfo.DK == OMPD_sections &&
1008  "Unexpected finalization stack state!");
1009  Builder.SetInsertPoint(LoopAfterBB->getTerminator());
1010  FiniInfo.FiniCB(Builder.saveIP());
1011  Builder.SetInsertPoint(ExitBB);
1012 
1013  return Builder.saveIP();
1014 }
1015 
1018  BodyGenCallbackTy BodyGenCB,
1019  FinalizeCallbackTy FiniCB) {
1020  if (!updateToLocation(Loc))
1021  return Loc.IP;
1022 
1023  auto FiniCBWrapper = [&](InsertPointTy IP) {
1024  if (IP.getBlock()->end() != IP.getPoint())
1025  return FiniCB(IP);
1026  // This must be done otherwise any nested constructs using FinalizeOMPRegion
1027  // will fail because that function requires the Finalization Basic Block to
1028  // have a terminator, which is already removed by EmitOMPRegionBody.
1029  // IP is currently at cancelation block.
1030  // We need to backtrack to the condition block to fetch
1031  // the exit block and create a branch from cancelation
1032  // to exit block.
1034  Builder.restoreIP(IP);
1035  auto *CaseBB = Loc.IP.getBlock();
1036  auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1037  auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1038  Instruction *I = Builder.CreateBr(ExitBB);
1039  IP = InsertPointTy(I->getParent(), I->getIterator());
1040  return FiniCB(IP);
1041  };
1042 
1043  Directive OMPD = Directive::OMPD_sections;
1044  // Since we are using Finalization Callback here, HasFinalize
1045  // and IsCancellable have to be true
1046  return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
1047  /*Conditional*/ false, /*hasFinalize*/ true,
1048  /*IsCancellable*/ true);
1049 }
1050 
1051 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1052 /// the given module and return it.
1054  Type *VoidTy = Type::getVoidTy(M.getContext());
1055  Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
1056  auto *FuncTy =
1057  FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
1059  M.getDataLayout().getDefaultGlobalsAddressSpace(),
1060  ".omp.reduction.func", &M);
1061 }
1062 
1064  const LocationDescription &Loc, InsertPointTy AllocaIP,
1065  ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
1066  for (const ReductionInfo &RI : ReductionInfos) {
1067  (void)RI;
1068  assert(RI.Variable && "expected non-null variable");
1069  assert(RI.PrivateVariable && "expected non-null private variable");
1070  assert(RI.ReductionGen && "expected non-null reduction generator callback");
1071  assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1072  "expected variables and their private equivalents to have the same "
1073  "type");
1074  assert(RI.Variable->getType()->isPointerTy() &&
1075  "expected variables to be pointers");
1076  }
1077 
1078  if (!updateToLocation(Loc))
1079  return InsertPointTy();
1080 
1081  BasicBlock *InsertBlock = Loc.IP.getBlock();
1082  BasicBlock *ContinuationBlock =
1083  InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
1084  InsertBlock->getTerminator()->eraseFromParent();
1085 
1086  // Create and populate array of type-erased pointers to private reduction
1087  // values.
1088  unsigned NumReductions = ReductionInfos.size();
1089  Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
1090  Builder.restoreIP(AllocaIP);
1091  Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
1092 
1093  Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
1094 
1095  for (auto En : enumerate(ReductionInfos)) {
1096  unsigned Index = En.index();
1097  const ReductionInfo &RI = En.value();
1098  Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
1099  RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
1100  Value *Casted =
1101  Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
1102  "private.red.var." + Twine(Index) + ".casted");
1103  Builder.CreateStore(Casted, RedArrayElemPtr);
1104  }
1105 
1106  // Emit a call to the runtime function that orchestrates the reduction.
1107  // Declare the reduction function in the process.
1108  Function *Func = Builder.GetInsertBlock()->getParent();
1109  Module *Module = Func->getParent();
1110  Value *RedArrayPtr =
1111  Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
1112  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1113  bool CanGenerateAtomic =
1114  llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
1115  return RI.AtomicReductionGen;
1116  });
1117  Value *Ident = getOrCreateIdent(
1118  SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1119  : IdentFlag(0));
1120  Value *ThreadId = getOrCreateThreadID(Ident);
1121  Constant *NumVariables = Builder.getInt32(NumReductions);
1122  const DataLayout &DL = Module->getDataLayout();
1123  unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
1124  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
1125  Function *ReductionFunc = getFreshReductionFunc(*Module);
1126  Value *Lock = getOMPCriticalRegionLock(".reduction");
1127  Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1128  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1129  : RuntimeFunction::OMPRTL___kmpc_reduce);
1130  CallInst *ReduceCall =
1131  Builder.CreateCall(ReduceFunc,
1132  {Ident, ThreadId, NumVariables, RedArraySize,
1133  RedArrayPtr, ReductionFunc, Lock},
1134  "reduce");
1135 
1136  // Create final reduction entry blocks for the atomic and non-atomic case.
1137  // Emit IR that dispatches control flow to one of the blocks based on the
1138  // reduction supporting the atomic mode.
1139  BasicBlock *NonAtomicRedBlock =
1140  BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
1141  BasicBlock *AtomicRedBlock =
1142  BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
1143  SwitchInst *Switch =
1144  Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
1145  Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
1146  Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
1147 
1148  // Populate the non-atomic reduction using the elementwise reduction function.
1149  // This loads the elements from the global and private variables and reduces
1150  // them before storing back the result to the global variable.
1151  Builder.SetInsertPoint(NonAtomicRedBlock);
1152  for (auto En : enumerate(ReductionInfos)) {
1153  const ReductionInfo &RI = En.value();
1154  Type *ValueType = RI.getElementType();
1155  Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
1156  "red.value." + Twine(En.index()));
1157  Value *PrivateRedValue =
1158  Builder.CreateLoad(ValueType, RI.PrivateVariable,
1159  "red.private.value." + Twine(En.index()));
1160  Value *Reduced;
1161  Builder.restoreIP(
1162  RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
1163  if (!Builder.GetInsertBlock())
1164  return InsertPointTy();
1165  Builder.CreateStore(Reduced, RI.Variable);
1166  }
1167  Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1168  IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1169  : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1170  Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
1171  Builder.CreateBr(ContinuationBlock);
1172 
1173  // Populate the atomic reduction using the atomic elementwise reduction
1174  // function. There are no loads/stores here because they will be happening
1175  // inside the atomic elementwise reduction.
1176  Builder.SetInsertPoint(AtomicRedBlock);
1177  if (CanGenerateAtomic) {
1178  for (const ReductionInfo &RI : ReductionInfos) {
1179  Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
1180  RI.PrivateVariable));
1181  if (!Builder.GetInsertBlock())
1182  return InsertPointTy();
1183  }
1184  Builder.CreateBr(ContinuationBlock);
1185  } else {
1186  Builder.CreateUnreachable();
1187  }
1188 
1189  // Populate the outlined reduction function using the elementwise reduction
1190  // function. Partial values are extracted from the type-erased array of
1191  // pointers to private variables.
1192  BasicBlock *ReductionFuncBlock =
1193  BasicBlock::Create(Module->getContext(), "", ReductionFunc);
1194  Builder.SetInsertPoint(ReductionFuncBlock);
1195  Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
1196  RedArrayTy->getPointerTo());
1197  Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
1198  RedArrayTy->getPointerTo());
1199  for (auto En : enumerate(ReductionInfos)) {
1200  const ReductionInfo &RI = En.value();
1201  Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1202  RedArrayTy, LHSArrayPtr, 0, En.index());
1203  Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
1204  Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
1205  Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
1206  Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
1207  RedArrayTy, RHSArrayPtr, 0, En.index());
1208  Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
1209  Value *RHSPtr =
1210  Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
1211  Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
1212  Value *Reduced;
1213  Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
1214  if (!Builder.GetInsertBlock())
1215  return InsertPointTy();
1216  Builder.CreateStore(Reduced, LHSPtr);
1217  }
1218  Builder.CreateRetVoid();
1219 
1220  Builder.SetInsertPoint(ContinuationBlock);
1221  return Builder.saveIP();
1222 }
1223 
1226  BodyGenCallbackTy BodyGenCB,
1227  FinalizeCallbackTy FiniCB) {
1228 
1229  if (!updateToLocation(Loc))
1230  return Loc.IP;
1231 
1232  Directive OMPD = Directive::OMPD_master;
1233  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1234  Value *Ident = getOrCreateIdent(SrcLocStr);
1235  Value *ThreadId = getOrCreateThreadID(Ident);
1236  Value *Args[] = {Ident, ThreadId};
1237 
1238  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1239  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1240 
1241  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1242  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
1243 
1244  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1245  /*Conditional*/ true, /*hasFinalize*/ true);
1246 }
1247 
1250  BodyGenCallbackTy BodyGenCB,
1251  FinalizeCallbackTy FiniCB, Value *Filter) {
1252  if (!updateToLocation(Loc))
1253  return Loc.IP;
1254 
1255  Directive OMPD = Directive::OMPD_masked;
1256  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
1257  Value *Ident = getOrCreateIdent(SrcLocStr);
1258  Value *ThreadId = getOrCreateThreadID(Ident);
1259  Value *Args[] = {Ident, ThreadId, Filter};
1260  Value *ArgsEnd[] = {Ident, ThreadId};
1261 
1262  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1263  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
1264 
1265  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1266  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
1267 
1268  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1269  /*Conditional*/ true, /*hasFinalize*/ true);
1270 }
1271 
1273  DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
1274  BasicBlock *PostInsertBefore, const Twine &Name) {
1275  Module *M = F->getParent();
1276  LLVMContext &Ctx = M->getContext();
1277  Type *IndVarTy = TripCount->getType();
1278 
1279  // Create the basic block structure.
1280  BasicBlock *Preheader =
1281  BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
1282  BasicBlock *Header =
1283  BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
1284  BasicBlock *Cond =
1285  BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
1286  BasicBlock *Body =
1287  BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
1288  BasicBlock *Latch =
1289  BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
1290  BasicBlock *Exit =
1291  BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
1292  BasicBlock *After =
1293  BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
1294 
1295  // Use specified DebugLoc for new instructions.
1296  Builder.SetCurrentDebugLocation(DL);
1297 
1298  Builder.SetInsertPoint(Preheader);
1299  Builder.CreateBr(Header);
1300 
1301  Builder.SetInsertPoint(Header);
1302  PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
1303  IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
1304  Builder.CreateBr(Cond);
1305 
1306  Builder.SetInsertPoint(Cond);
1307  Value *Cmp =
1308  Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
1309  Builder.CreateCondBr(Cmp, Body, Exit);
1310 
1311  Builder.SetInsertPoint(Body);
1312  Builder.CreateBr(Latch);
1313 
1314  Builder.SetInsertPoint(Latch);
1315  Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
1316  "omp_" + Name + ".next", /*HasNUW=*/true);
1317  Builder.CreateBr(Header);
1318  IndVarPHI->addIncoming(Next, Latch);
1319 
1320  Builder.SetInsertPoint(Exit);
1321  Builder.CreateBr(After);
1322 
1323  // Remember and return the canonical control flow.
1324  LoopInfos.emplace_front();
1325  CanonicalLoopInfo *CL = &LoopInfos.front();
1326 
1327  CL->Preheader = Preheader;
1328  CL->Header = Header;
1329  CL->Cond = Cond;
1330  CL->Body = Body;
1331  CL->Latch = Latch;
1332  CL->Exit = Exit;
1333  CL->After = After;
1334 
1335 #ifndef NDEBUG
1336  CL->assertOK();
1337 #endif
1338  return CL;
1339 }
1340 
1343  LoopBodyGenCallbackTy BodyGenCB,
1344  Value *TripCount, const Twine &Name) {
1345  BasicBlock *BB = Loc.IP.getBlock();
1346  BasicBlock *NextBB = BB->getNextNode();
1347 
1348  CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
1349  NextBB, NextBB, Name);
1350  BasicBlock *After = CL->getAfter();
1351 
1352  // If location is not set, don't connect the loop.
1353  if (updateToLocation(Loc)) {
1354  // Split the loop at the insertion point: Branch to the preheader and move
1355  // every following instruction to after the loop (the After BB). Also, the
1356  // new successor is the loop's after block.
1357  Builder.CreateBr(CL->Preheader);
1358  After->getInstList().splice(After->begin(), BB->getInstList(),
1359  Builder.GetInsertPoint(), BB->end());
1360  After->replaceSuccessorsPhiUsesWith(BB, After);
1361  }
1362 
1363  // Emit the body content. We do it after connecting the loop to the CFG to
1364  // avoid that the callback encounters degenerate BBs.
1365  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
1366 
1367 #ifndef NDEBUG
1368  CL->assertOK();
1369 #endif
1370  return CL;
1371 }
1372 
1374  const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
1375  Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
1376  InsertPointTy ComputeIP, const Twine &Name) {
1377 
1378  // Consider the following difficulties (assuming 8-bit signed integers):
1379  // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1380  // DO I = 1, 100, 50
1381  /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1382  // DO I = 100, 0, -128
1383 
1384  // Start, Stop and Step must be of the same integer type.
1385  auto *IndVarTy = cast<IntegerType>(Start->getType());
1386  assert(IndVarTy == Stop->getType() && "Stop type mismatch");
1387  assert(IndVarTy == Step->getType() && "Step type mismatch");
1388 
1389  LocationDescription ComputeLoc =
1390  ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
1391  updateToLocation(ComputeLoc);
1392 
1393  ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
1394  ConstantInt *One = ConstantInt::get(IndVarTy, 1);
1395 
1396  // Like Step, but always positive.
1397  Value *Incr = Step;
1398 
1399  // Distance between Start and Stop; always positive.
1400  Value *Span;
1401 
1402  // Condition whether there are no iterations are executed at all, e.g. because
1403  // UB < LB.
1404  Value *ZeroCmp;
1405 
1406  if (IsSigned) {
1407  // Ensure that increment is positive. If not, negate and invert LB and UB.
1408  Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
1409  Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
1410  Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
1411  Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
1412  Span = Builder.CreateSub(UB, LB, "", false, true);
1413  ZeroCmp = Builder.CreateICmp(
1414  InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
1415  } else {
1416  Span = Builder.CreateSub(Stop, Start, "", true);
1417  ZeroCmp = Builder.CreateICmp(
1418  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
1419  }
1420 
1421  Value *CountIfLooping;
1422  if (InclusiveStop) {
1423  CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
1424  } else {
1425  // Avoid incrementing past stop since it could overflow.
1426  Value *CountIfTwo = Builder.CreateAdd(
1427  Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
1428  Value *OneCmp = Builder.CreateICmp(
1429  InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
1430  CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
1431  }
1432  Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1433  "omp_" + Name + ".tripcount");
1434 
1435  auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
1436  Builder.restoreIP(CodeGenIP);
1437  Value *Span = Builder.CreateMul(IV, Step);
1438  Value *IndVar = Builder.CreateAdd(Span, Start);
1439  BodyGenCB(Builder.saveIP(), IndVar);
1440  };
1441  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
1442  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
1443 }
1444 
1445 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1446 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1447 // runtime. Always interpret integers as unsigned similarly to
1448 // CanonicalLoopInfo.
1450  OpenMPIRBuilder &OMPBuilder) {
1451  unsigned Bitwidth = Ty->getIntegerBitWidth();
1452  if (Bitwidth == 32)
1453  return OMPBuilder.getOrCreateRuntimeFunction(
1454  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1455  if (Bitwidth == 64)
1456  return OMPBuilder.getOrCreateRuntimeFunction(
1457  M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1458  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1459 }
1460 
1461 // Sets the number of loop iterations to the given value. This value must be
1462 // valid in the condition block (i.e., defined in the preheader) and is
1463 // interpreted as an unsigned integer.
1465  Instruction *CmpI = &CLI->getCond()->front();
1466  assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1467  CmpI->setOperand(1, TripCount);
1468  CLI->assertOK();
1469 }
1470 
1473  InsertPointTy AllocaIP,
1474  bool NeedsBarrier, Value *Chunk) {
1475  assert(CLI->isValid() && "Requires a valid canonical loop");
1476 
1477  // Set up the source location value for OpenMP runtime.
1478  Builder.restoreIP(CLI->getPreheaderIP());
1479  Builder.SetCurrentDebugLocation(DL);
1480 
1481  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1482  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1483 
1484  // Declare useful OpenMP runtime functions.
1485  Value *IV = CLI->getIndVar();
1486  Type *IVTy = IV->getType();
1487  FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
1488  FunctionCallee StaticFini =
1489  getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1490 
1491  // Allocate space for computed loop bounds as expected by the "init" function.
1492  Builder.restoreIP(AllocaIP);
1493  Type *I32Type = Type::getInt32Ty(M.getContext());
1494  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1495  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1496  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1497  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1498 
1499  // At the end of the preheader, prepare for calling the "init" function by
1500  // storing the current loop bounds into the allocated space. A canonical loop
1501  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1502  // and produces an inclusive upper bound.
1503  Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
1504  Constant *Zero = ConstantInt::get(IVTy, 0);
1505  Constant *One = ConstantInt::get(IVTy, 1);
1506  Builder.CreateStore(Zero, PLowerBound);
1507  Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
1508  Builder.CreateStore(UpperBound, PUpperBound);
1509  Builder.CreateStore(One, PStride);
1510 
1511  // FIXME: schedule(static) is NOT the same as schedule(static,1)
1512  if (!Chunk)
1513  Chunk = One;
1514 
1515  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1516 
1517  Constant *SchedulingType =
1518  ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
1519 
1520  // Call the "init" function and update the trip count of the loop with the
1521  // value it produced.
1522  Builder.CreateCall(StaticInit,
1523  {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1524  PUpperBound, PStride, One, Chunk});
1525  Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
1526  Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
1527  Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
1528  Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
1529  setCanonicalLoopTripCount(CLI, TripCount);
1530 
1531  // Update all uses of the induction variable except the one in the condition
1532  // block that compares it with the actual upper bound, and the increment in
1533  // the latch block.
1534  // TODO: this can eventually move to CanonicalLoopInfo or to a new
1535  // CanonicalLoopInfoUpdater interface.
1536  Builder.SetInsertPoint(CLI->getBody(), CLI->getBody()->getFirstInsertionPt());
1537  Value *UpdatedIV = Builder.CreateAdd(IV, LowerBound);
1538  IV->replaceUsesWithIf(UpdatedIV, [&](Use &U) {
1539  auto *Instr = dyn_cast<Instruction>(U.getUser());
1540  return !Instr ||
1541  (Instr->getParent() != CLI->getCond() &&
1542  Instr->getParent() != CLI->getLatch() && Instr != UpdatedIV);
1543  });
1544 
1545  // In the "exit" block, call the "fini" function.
1546  Builder.SetInsertPoint(CLI->getExit(),
1547  CLI->getExit()->getTerminator()->getIterator());
1548  Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1549 
1550  // Add the barrier if requested.
1551  if (NeedsBarrier)
1552  createBarrier(LocationDescription(Builder.saveIP(), DL),
1553  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1554  /* CheckCancelFlag */ false);
1555 
1556  InsertPointTy AfterIP = CLI->getAfterIP();
1557  CLI->invalidate();
1558 
1559  return AfterIP;
1560 }
1561 
1564  InsertPointTy AllocaIP, bool NeedsBarrier) {
1565  // Currently only supports static schedules.
1566  return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
1567 }
1568 
1569 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1570 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1571 /// the runtime. Always interpret integers as unsigned similarly to
1572 /// CanonicalLoopInfo.
1573 static FunctionCallee
1575  unsigned Bitwidth = Ty->getIntegerBitWidth();
1576  if (Bitwidth == 32)
1577  return OMPBuilder.getOrCreateRuntimeFunction(
1578  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
1579  if (Bitwidth == 64)
1580  return OMPBuilder.getOrCreateRuntimeFunction(
1581  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
1582  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1583 }
1584 
1585 /// Returns an LLVM function to call for updating the next loop using OpenMP
1586 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1587 /// the runtime. Always interpret integers as unsigned similarly to
1588 /// CanonicalLoopInfo.
1589 static FunctionCallee
1591  unsigned Bitwidth = Ty->getIntegerBitWidth();
1592  if (Bitwidth == 32)
1593  return OMPBuilder.getOrCreateRuntimeFunction(
1594  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
1595  if (Bitwidth == 64)
1596  return OMPBuilder.getOrCreateRuntimeFunction(
1597  M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
1598  llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1599 }
1600 
1602  DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
1603  OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
1604  assert(CLI->isValid() && "Requires a valid canonical loop");
1605 
1606  // Set up the source location value for OpenMP runtime.
1607  Builder.SetCurrentDebugLocation(DL);
1608 
1609  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
1610  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
1611 
1612  // Declare useful OpenMP runtime functions.
1613  Value *IV = CLI->getIndVar();
1614  Type *IVTy = IV->getType();
1615  FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
1616  FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
1617 
1618  // Allocate space for computed loop bounds as expected by the "init" function.
1619  Builder.restoreIP(AllocaIP);
1620  Type *I32Type = Type::getInt32Ty(M.getContext());
1621  Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
1622  Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
1623  Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
1624  Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
1625 
1626  // At the end of the preheader, prepare for calling the "init" function by
1627  // storing the current loop bounds into the allocated space. A canonical loop
1628  // always iterates from 0 to trip-count with step 1. Note that "init" expects
1629  // and produces an inclusive upper bound.
1630  BasicBlock *PreHeader = CLI->getPreheader();
1631  Builder.SetInsertPoint(PreHeader->getTerminator());
1632  Constant *One = ConstantInt::get(IVTy, 1);
1633  Builder.CreateStore(One, PLowerBound);
1634  Value *UpperBound = CLI->getTripCount();
1635  Builder.CreateStore(UpperBound, PUpperBound);
1636  Builder.CreateStore(One, PStride);
1637 
1638  BasicBlock *Header = CLI->getHeader();
1639  BasicBlock *Exit = CLI->getExit();
1640  BasicBlock *Cond = CLI->getCond();
1641  InsertPointTy AfterIP = CLI->getAfterIP();
1642 
1643  // The CLI will be "broken" in the code below, as the loop is no longer
1644  // a valid canonical loop.
1645 
1646  if (!Chunk)
1647  Chunk = One;
1648 
1649  Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1650 
1651  Constant *SchedulingType =
1652  ConstantInt::get(I32Type, static_cast<int>(SchedType));
1653 
1654  // Call the "init" function.
1655  Builder.CreateCall(DynamicInit,
1656  {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
1657  UpperBound, /* step */ One, Chunk});
1658 
1659  // An outer loop around the existing one.
1660  BasicBlock *OuterCond = BasicBlock::Create(
1661  PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
1662  PreHeader->getParent());
1663  // This needs to be 32-bit always, so can't use the IVTy Zero above.
1664  Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
1665  Value *Res =
1666  Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
1667  PLowerBound, PUpperBound, PStride});
1668  Constant *Zero32 = ConstantInt::get(I32Type, 0);
1669  Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
1670  Value *LowerBound =
1671  Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
1672  Builder.CreateCondBr(MoreWork, Header, Exit);
1673 
1674  // Change PHI-node in loop header to use outer cond rather than preheader,
1675  // and set IV to the LowerBound.
1676  Instruction *Phi = &Header->front();
1677  auto *PI = cast<PHINode>(Phi);
1678  PI->setIncomingBlock(0, OuterCond);
1679  PI->setIncomingValue(0, LowerBound);
1680 
1681  // Then set the pre-header to jump to the OuterCond
1682  Instruction *Term = PreHeader->getTerminator();
1683  auto *Br = cast<BranchInst>(Term);
1684  Br->setSuccessor(0, OuterCond);
1685 
1686  // Modify the inner condition:
1687  // * Use the UpperBound returned from the DynamicNext call.
1688  // * jump to the loop outer loop when done with one of the inner loops.
1689  Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
1690  UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
1691  Instruction *Comp = &*Builder.GetInsertPoint();
1692  auto *CI = cast<CmpInst>(Comp);
1693  CI->setOperand(1, UpperBound);
1694  // Redirect the inner exit to branch to outer condition.
1695  Instruction *Branch = &Cond->back();
1696  auto *BI = cast<BranchInst>(Branch);
1697  assert(BI->getSuccessor(1) == Exit);
1698  BI->setSuccessor(1, OuterCond);
1699 
1700  // Add the barrier if requested.
1701  if (NeedsBarrier) {
1702  Builder.SetInsertPoint(&Exit->back());
1703  createBarrier(LocationDescription(Builder.saveIP(), DL),
1704  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
1705  /* CheckCancelFlag */ false);
1706  }
1707 
1708  CLI->invalidate();
1709  return AfterIP;
1710 }
1711 
1712 /// Make \p Source branch to \p Target.
1713 ///
1714 /// Handles two situations:
1715 /// * \p Source already has an unconditional branch.
1716 /// * \p Source is a degenerate block (no terminator because the BB is
1717 /// the current head of the IR construction).
1719  if (Instruction *Term = Source->getTerminator()) {
1720  auto *Br = cast<BranchInst>(Term);
1721  assert(!Br->isConditional() &&
1722  "BB's terminator must be an unconditional branch (or degenerate)");
1723  BasicBlock *Succ = Br->getSuccessor(0);
1724  Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
1725  Br->setSuccessor(0, Target);
1726  return;
1727  }
1728 
1729  auto *NewBr = BranchInst::Create(Target, Source);
1730  NewBr->setDebugLoc(DL);
1731 }
1732 
1733 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1734 /// after this \p OldTarget will be orphaned.
1735 static void redirectAllPredecessorsTo(BasicBlock *OldTarget,
1736  BasicBlock *NewTarget, DebugLoc DL) {
1737  for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
1738  redirectTo(Pred, NewTarget, DL);
1739 }
1740 
1741 /// Determine which blocks in \p BBs are reachable from outside and remove the
1742 /// ones that are not reachable from the function.
1744  SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
1745  auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
1746  for (Use &U : BB->uses()) {
1747  auto *UseInst = dyn_cast<Instruction>(U.getUser());
1748  if (!UseInst)
1749  continue;
1750  if (BBsToErase.count(UseInst->getParent()))
1751  continue;
1752  return true;
1753  }
1754  return false;
1755  };
1756 
1757  while (true) {
1758  bool Changed = false;
1759  for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
1760  if (HasRemainingUses(BB)) {
1761  BBsToErase.erase(BB);
1762  Changed = true;
1763  }
1764  }
1765  if (!Changed)
1766  break;
1767  }
1768 
1769  SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
1770  DeleteDeadBlocks(BBVec);
1771 }
1772 
1775  InsertPointTy ComputeIP) {
1776  assert(Loops.size() >= 1 && "At least one loop required");
1777  size_t NumLoops = Loops.size();
1778 
1779  // Nothing to do if there is already just one loop.
1780  if (NumLoops == 1)
1781  return Loops.front();
1782 
1783  CanonicalLoopInfo *Outermost = Loops.front();
1784  CanonicalLoopInfo *Innermost = Loops.back();
1785  BasicBlock *OrigPreheader = Outermost->getPreheader();
1786  BasicBlock *OrigAfter = Outermost->getAfter();
1787  Function *F = OrigPreheader->getParent();
1788 
1789  // Setup the IRBuilder for inserting the trip count computation.
1790  Builder.SetCurrentDebugLocation(DL);
1791  if (ComputeIP.isSet())
1792  Builder.restoreIP(ComputeIP);
1793  else
1794  Builder.restoreIP(Outermost->getPreheaderIP());
1795 
1796  // Derive the collapsed' loop trip count.
1797  // TODO: Find common/largest indvar type.
1798  Value *CollapsedTripCount = nullptr;
1799  for (CanonicalLoopInfo *L : Loops) {
1800  assert(L->isValid() &&
1801  "All loops to collapse must be valid canonical loops");
1802  Value *OrigTripCount = L->getTripCount();
1803  if (!CollapsedTripCount) {
1804  CollapsedTripCount = OrigTripCount;
1805  continue;
1806  }
1807 
1808  // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1809  CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
1810  {}, /*HasNUW=*/true);
1811  }
1812 
1813  // Create the collapsed loop control flow.
1814  CanonicalLoopInfo *Result =
1815  createLoopSkeleton(DL, CollapsedTripCount, F,
1816  OrigPreheader->getNextNode(), OrigAfter, "collapsed");
1817 
1818  // Build the collapsed loop body code.
1819  // Start with deriving the input loop induction variables from the collapsed
1820  // one, using a divmod scheme. To preserve the original loops' order, the
1821  // innermost loop use the least significant bits.
1822  Builder.restoreIP(Result->getBodyIP());
1823 
1824  Value *Leftover = Result->getIndVar();
1825  SmallVector<Value *> NewIndVars;
1826  NewIndVars.set_size(NumLoops);
1827  for (int i = NumLoops - 1; i >= 1; --i) {
1828  Value *OrigTripCount = Loops[i]->getTripCount();
1829 
1830  Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
1831  NewIndVars[i] = NewIndVar;
1832 
1833  Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
1834  }
1835  // Outermost loop gets all the remaining bits.
1836  NewIndVars[0] = Leftover;
1837 
1838  // Construct the loop body control flow.
1839  // We progressively construct the branch structure following in direction of
1840  // the control flow, from the leading in-between code, the loop nest body, the
1841  // trailing in-between code, and rejoining the collapsed loop's latch.
1842  // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1843  // the ContinueBlock is set, continue with that block. If ContinuePred, use
1844  // its predecessors as sources.
1845  BasicBlock *ContinueBlock = Result->getBody();
1846  BasicBlock *ContinuePred = nullptr;
1847  auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
1848  BasicBlock *NextSrc) {
1849  if (ContinueBlock)
1850  redirectTo(ContinueBlock, Dest, DL);
1851  else
1852  redirectAllPredecessorsTo(ContinuePred, Dest, DL);
1853 
1854  ContinueBlock = nullptr;
1855  ContinuePred = NextSrc;
1856  };
1857 
1858  // The code before the nested loop of each level.
1859  // Because we are sinking it into the nest, it will be executed more often
1860  // that the original loop. More sophisticated schemes could keep track of what
1861  // the in-between code is and instantiate it only once per thread.
1862  for (size_t i = 0; i < NumLoops - 1; ++i)
1863  ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
1864 
1865  // Connect the loop nest body.
1866  ContinueWith(Innermost->getBody(), Innermost->getLatch());
1867 
1868  // The code after the nested loop at each level.
1869  for (size_t i = NumLoops - 1; i > 0; --i)
1870  ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
1871 
1872  // Connect the finished loop to the collapsed loop latch.
1873  ContinueWith(Result->getLatch(), nullptr);
1874 
1875  // Replace the input loops with the new collapsed loop.
1876  redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
1877  redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
1878 
1879  // Replace the input loop indvars with the derived ones.
1880  for (size_t i = 0; i < NumLoops; ++i)
1881  Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
1882 
1883  // Remove unused parts of the input loops.
1884  SmallVector<BasicBlock *, 12> OldControlBBs;
1885  OldControlBBs.reserve(6 * Loops.size());
1886  for (CanonicalLoopInfo *Loop : Loops)
1887  Loop->collectControlBlocks(OldControlBBs);
1888  removeUnusedBlocksFromParent(OldControlBBs);
1889 
1890  for (CanonicalLoopInfo *L : Loops)
1891  L->invalidate();
1892 
1893 #ifndef NDEBUG
1894  Result->assertOK();
1895 #endif
1896  return Result;
1897 }
1898 
1899 std::vector<CanonicalLoopInfo *>
1901  ArrayRef<Value *> TileSizes) {
1902  assert(TileSizes.size() == Loops.size() &&
1903  "Must pass as many tile sizes as there are loops");
1904  int NumLoops = Loops.size();
1905  assert(NumLoops >= 1 && "At least one loop to tile required");
1906 
1907  CanonicalLoopInfo *OutermostLoop = Loops.front();
1908  CanonicalLoopInfo *InnermostLoop = Loops.back();
1909  Function *F = OutermostLoop->getBody()->getParent();
1910  BasicBlock *InnerEnter = InnermostLoop->getBody();
1911  BasicBlock *InnerLatch = InnermostLoop->getLatch();
1912 
1913  // Collect original trip counts and induction variable to be accessible by
1914  // index. Also, the structure of the original loops is not preserved during
1915  // the construction of the tiled loops, so do it before we scavenge the BBs of
1916  // any original CanonicalLoopInfo.
1917  SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
1918  for (CanonicalLoopInfo *L : Loops) {
1919  assert(L->isValid() && "All input loops must be valid canonical loops");
1920  OrigTripCounts.push_back(L->getTripCount());
1921  OrigIndVars.push_back(L->getIndVar());
1922  }
1923 
1924  // Collect the code between loop headers. These may contain SSA definitions
1925  // that are used in the loop nest body. To be usable with in the innermost
1926  // body, these BasicBlocks will be sunk into the loop nest body. That is,
1927  // these instructions may be executed more often than before the tiling.
1928  // TODO: It would be sufficient to only sink them into body of the
1929  // corresponding tile loop.
1931  for (int i = 0; i < NumLoops - 1; ++i) {
1932  CanonicalLoopInfo *Surrounding = Loops[i];
1933  CanonicalLoopInfo *Nested = Loops[i + 1];
1934 
1935  BasicBlock *EnterBB = Surrounding->getBody();
1936  BasicBlock *ExitBB = Nested->getHeader();
1937  InbetweenCode.emplace_back(EnterBB, ExitBB);
1938  }
1939 
1940  // Compute the trip counts of the floor loops.
1941  Builder.SetCurrentDebugLocation(DL);
1942  Builder.restoreIP(OutermostLoop->getPreheaderIP());
1943  SmallVector<Value *, 4> FloorCount, FloorRems;
1944  for (int i = 0; i < NumLoops; ++i) {
1945  Value *TileSize = TileSizes[i];
1946  Value *OrigTripCount = OrigTripCounts[i];
1947  Type *IVType = OrigTripCount->getType();
1948 
1949  Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
1950  Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
1951 
1952  // 0 if tripcount divides the tilesize, 1 otherwise.
1953  // 1 means we need an additional iteration for a partial tile.
1954  //
1955  // Unfortunately we cannot just use the roundup-formula
1956  // (tripcount + tilesize - 1)/tilesize
1957  // because the summation might overflow. We do not want introduce undefined
1958  // behavior when the untiled loop nest did not.
1959  Value *FloorTripOverflow =
1960  Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
1961 
1962  FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
1963  FloorTripCount =
1964  Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
1965  "omp_floor" + Twine(i) + ".tripcount", true);
1966 
1967  // Remember some values for later use.
1968  FloorCount.push_back(FloorTripCount);
1969  FloorRems.push_back(FloorTripRem);
1970  }
1971 
1972  // Generate the new loop nest, from the outermost to the innermost.
1973  std::vector<CanonicalLoopInfo *> Result;
1974  Result.reserve(NumLoops * 2);
1975 
1976  // The basic block of the surrounding loop that enters the nest generated
1977  // loop.
1978  BasicBlock *Enter = OutermostLoop->getPreheader();
1979 
1980  // The basic block of the surrounding loop where the inner code should
1981  // continue.
1982  BasicBlock *Continue = OutermostLoop->getAfter();
1983 
1984  // Where the next loop basic block should be inserted.
1985  BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
1986 
1987  auto EmbeddNewLoop =
1988  [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
1989  Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
1990  CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
1991  DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
1992  redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
1993  redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
1994 
1995  // Setup the position where the next embedded loop connects to this loop.
1996  Enter = EmbeddedLoop->getBody();
1997  Continue = EmbeddedLoop->getLatch();
1998  OutroInsertBefore = EmbeddedLoop->getLatch();
1999  return EmbeddedLoop;
2000  };
2001 
2002  auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
2003  const Twine &NameBase) {
2004  for (auto P : enumerate(TripCounts)) {
2005  CanonicalLoopInfo *EmbeddedLoop =
2006  EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
2007  Result.push_back(EmbeddedLoop);
2008  }
2009  };
2010 
2011  EmbeddNewLoops(FloorCount, "floor");
2012 
2013  // Within the innermost floor loop, emit the code that computes the tile
2014  // sizes.
2015  Builder.SetInsertPoint(Enter->getTerminator());
2016  SmallVector<Value *, 4> TileCounts;
2017  for (int i = 0; i < NumLoops; ++i) {
2018  CanonicalLoopInfo *FloorLoop = Result[i];
2019  Value *TileSize = TileSizes[i];
2020 
2021  Value *FloorIsEpilogue =
2022  Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
2023  Value *TileTripCount =
2024  Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
2025 
2026  TileCounts.push_back(TileTripCount);
2027  }
2028 
2029  // Create the tile loops.
2030  EmbeddNewLoops(TileCounts, "tile");
2031 
2032  // Insert the inbetween code into the body.
2033  BasicBlock *BodyEnter = Enter;
2034  BasicBlock *BodyEntered = nullptr;
2035  for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
2036  BasicBlock *EnterBB = P.first;
2037  BasicBlock *ExitBB = P.second;
2038 
2039  if (BodyEnter)
2040  redirectTo(BodyEnter, EnterBB, DL);
2041  else
2042  redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
2043 
2044  BodyEnter = nullptr;
2045  BodyEntered = ExitBB;
2046  }
2047 
2048  // Append the original loop nest body into the generated loop nest body.
2049  if (BodyEnter)
2050  redirectTo(BodyEnter, InnerEnter, DL);
2051  else
2052  redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
2053  redirectAllPredecessorsTo(InnerLatch, Continue, DL);
2054 
2055  // Replace the original induction variable with an induction variable computed
2056  // from the tile and floor induction variables.
2057  Builder.restoreIP(Result.back()->getBodyIP());
2058  for (int i = 0; i < NumLoops; ++i) {
2059  CanonicalLoopInfo *FloorLoop = Result[i];
2060  CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
2061  Value *OrigIndVar = OrigIndVars[i];
2062  Value *Size = TileSizes[i];
2063 
2064  Value *Scale =
2065  Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
2066  Value *Shift =
2067  Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
2068  OrigIndVar->replaceAllUsesWith(Shift);
2069  }
2070 
2071  // Remove unused parts of the original loops.
2072  SmallVector<BasicBlock *, 12> OldControlBBs;
2073  OldControlBBs.reserve(6 * Loops.size());
2074  for (CanonicalLoopInfo *Loop : Loops)
2075  Loop->collectControlBlocks(OldControlBBs);
2076  removeUnusedBlocksFromParent(OldControlBBs);
2077 
2078  for (CanonicalLoopInfo *L : Loops)
2079  L->invalidate();
2080 
2081 #ifndef NDEBUG
2082  for (CanonicalLoopInfo *GenL : Result)
2083  GenL->assertOK();
2084 #endif
2085  return Result;
2086 }
2087 
2088 /// Attach loop metadata \p Properties to the loop described by \p Loop. If the
2089 /// loop already has metadata, the loop properties are appended.
2091  ArrayRef<Metadata *> Properties) {
2092  assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
2093 
2094  // Nothing to do if no property to attach.
2095  if (Properties.empty())
2096  return;
2097 
2098  LLVMContext &Ctx = Loop->getFunction()->getContext();
2099  SmallVector<Metadata *> NewLoopProperties;
2100  NewLoopProperties.push_back(nullptr);
2101 
2102  // If the loop already has metadata, prepend it to the new metadata.
2103  BasicBlock *Latch = Loop->getLatch();
2104  assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
2105  MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
2106  if (Existing)
2107  append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
2108 
2109  append_range(NewLoopProperties, Properties);
2110  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
2111  LoopID->replaceOperandWith(0, LoopID);
2112 
2113  Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
2114 }
2115 
2117  LLVMContext &Ctx = Builder.getContext();
2119  Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2120  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
2121 }
2122 
2124  LLVMContext &Ctx = Builder.getContext();
2126  Loop, {
2127  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2128  });
2129 }
2130 
2131 /// Create the TargetMachine object to query the backend for optimization
2132 /// preferences.
2133 ///
2134 /// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
2135 /// e.g. Clang does not pass it to its CodeGen layer and creates it only when
2136 /// needed for the LLVM pass pipline. We use some default options to avoid
2137 /// having to pass too many settings from the frontend that probably do not
2138 /// matter.
2139 ///
2140 /// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
2141 /// method. If we are going to use TargetMachine for more purposes, especially
2142 /// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
2143 /// might become be worth requiring front-ends to pass on their TargetMachine,
2144 /// or at least cache it between methods. Note that while fontends such as Clang
2145 /// have just a single main TargetMachine per translation unit, "target-cpu" and
2146 /// "target-features" that determine the TargetMachine are per-function and can
2147 /// be overrided using __attribute__((target("OPTIONS"))).
2148 static std::unique_ptr<TargetMachine>
2150  Module *M = F->getParent();
2151 
2152  StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
2153  StringRef Features = F->getFnAttribute("target-features").getValueAsString();
2154  const std::string &Triple = M->getTargetTriple();
2155 
2156  std::string Error;
2158  if (!TheTarget)
2159  return {};
2160 
2162  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
2163  Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
2164  OptLevel));
2165 }
2166 
2167 /// Heuristically determine the best-performant unroll factor for \p CLI. This
2168 /// depends on the target processor. We are re-using the same heuristics as the
2169 /// LoopUnrollPass.
2171  Function *F = CLI->getFunction();
2172 
2173  // Assume the user requests the most aggressive unrolling, even if the rest of
2174  // the code is optimized using a lower setting.
2176  std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
2177 
2179  FAM.registerPass([]() { return TargetLibraryAnalysis(); });
2180  FAM.registerPass([]() { return AssumptionAnalysis(); });
2181  FAM.registerPass([]() { return DominatorTreeAnalysis(); });
2182  FAM.registerPass([]() { return LoopAnalysis(); });
2183  FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
2184  FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
2185  TargetIRAnalysis TIRA;
2186  if (TM)
2187  TIRA = TargetIRAnalysis(
2188  [&](const Function &F) { return TM->getTargetTransformInfo(F); });
2189  FAM.registerPass([&]() { return TIRA; });
2190 
2191  TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
2193  ScalarEvolution &&SE = SEA.run(*F, FAM);
2195  DominatorTree &&DT = DTA.run(*F, FAM);
2196  LoopAnalysis LIA;
2197  LoopInfo &&LI = LIA.run(*F, FAM);
2198  AssumptionAnalysis ACT;
2199  AssumptionCache &&AC = ACT.run(*F, FAM);
2201 
2202  Loop *L = LI.getLoopFor(CLI->getHeader());
2203  assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
2204 
2207  /*BlockFrequencyInfo=*/nullptr,
2208  /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
2209  /*UserThreshold=*/None,
2210  /*UserCount=*/None,
2211  /*UserAllowPartial=*/true,
2212  /*UserAllowRuntime=*/true,
2213  /*UserUpperBound=*/None,
2214  /*UserFullUnrollMaxCount=*/None);
2215 
2216  UP.Force = true;
2217 
2218  // Account for additional optimizations taking place before the LoopUnrollPass
2219  // would unroll the loop.
2222 
2223  // Use normal unroll factors even if the rest of the code is optimized for
2224  // size.
2225  UP.OptSizeThreshold = UP.Threshold;
2227 
2228  LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
2229  << " Threshold=" << UP.Threshold << "\n"
2230  << " PartialThreshold=" << UP.PartialThreshold << "\n"
2231  << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
2232  << " PartialOptSizeThreshold="
2233  << UP.PartialOptSizeThreshold << "\n");
2234 
2235  // Disable peeling.
2238  /*UserAllowPeeling=*/false,
2239  /*UserAllowProfileBasedPeeling=*/false,
2240  /*UserUnrollingSpecficValues=*/false);
2241 
2243  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
2244 
2245  // Assume that reads and writes to stack variables can be eliminated by
2246  // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
2247  // size.
2248  for (BasicBlock *BB : L->blocks()) {
2249  for (Instruction &I : *BB) {
2250  Value *Ptr;
2251  if (auto *Load = dyn_cast<LoadInst>(&I)) {
2252  Ptr = Load->getPointerOperand();
2253  } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
2254  Ptr = Store->getPointerOperand();
2255  } else
2256  continue;
2257 
2258  Ptr = Ptr->stripPointerCasts();
2259 
2260  if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2261  if (Alloca->getParent() == &F->getEntryBlock())
2262  EphValues.insert(&I);
2263  }
2264  }
2265  }
2266 
2267  unsigned NumInlineCandidates;
2268  bool NotDuplicatable;
2269  bool Convergent;
2270  unsigned LoopSize =
2271  ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
2272  TTI, EphValues, UP.BEInsns);
2273  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
2274 
2275  // Loop is not unrollable if the loop contains certain instructions.
2276  if (NotDuplicatable || Convergent) {
2277  LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
2278  return 1;
2279  }
2280 
2281  // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
2282  // be able to use it.
2283  int TripCount = 0;
2284  int MaxTripCount = 0;
2285  bool MaxOrZero = false;
2286  unsigned TripMultiple = 0;
2287 
2288  bool UseUpperBound = false;
2289  computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
2290  MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2291  UseUpperBound);
2292  unsigned Factor = UP.Count;
2293  LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
2294 
2295  // This function returns 1 to signal to not unroll a loop.
2296  if (Factor == 0)
2297  return 1;
2298  return Factor;
2299 }
2300 
2302  int32_t Factor,
2303  CanonicalLoopInfo **UnrolledCLI) {
2304  assert(Factor >= 0 && "Unroll factor must not be negative");
2305 
2306  Function *F = Loop->getFunction();
2307  LLVMContext &Ctx = F->getContext();
2308 
2309  // If the unrolled loop is not used for another loop-associated directive, it
2310  // is sufficient to add metadata for the LoopUnrollPass.
2311  if (!UnrolledCLI) {
2312  SmallVector<Metadata *, 2> LoopMetadata;
2313  LoopMetadata.push_back(
2314  MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
2315 
2316  if (Factor >= 1) {
2318  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2319  LoopMetadata.push_back(MDNode::get(
2320  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
2321  }
2322 
2323  addLoopMetadata(Loop, LoopMetadata);
2324  return;
2325  }
2326 
2327  // Heuristically determine the unroll factor.
2328  if (Factor == 0)
2330 
2331  // No change required with unroll factor 1.
2332  if (Factor == 1) {
2333  *UnrolledCLI = Loop;
2334  return;
2335  }
2336 
2337  assert(Factor >= 2 &&
2338  "unrolling only makes sense with a factor of 2 or larger");
2339 
2340  Type *IndVarTy = Loop->getIndVarType();
2341 
2342  // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
2343  // unroll the inner loop.
2344  Value *FactorVal =
2345  ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
2346  /*isSigned=*/false));
2347  std::vector<CanonicalLoopInfo *> LoopNest =
2348  tileLoops(DL, {Loop}, {FactorVal});
2349  assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
2350  *UnrolledCLI = LoopNest[0];
2351  CanonicalLoopInfo *InnerLoop = LoopNest[1];
2352 
2353  // LoopUnrollPass can only fully unroll loops with constant trip count.
2354  // Unroll by the unroll factor with a fallback epilog for the remainder
2355  // iterations if necessary.
2357  ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
2359  InnerLoop,
2360  {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
2361  MDNode::get(
2362  Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
2363 
2364 #ifndef NDEBUG
2365  (*UnrolledCLI)->assertOK();
2366 #endif
2367 }
2368 
2371  llvm::Value *BufSize, llvm::Value *CpyBuf,
2372  llvm::Value *CpyFn, llvm::Value *DidIt) {
2373  if (!updateToLocation(Loc))
2374  return Loc.IP;
2375 
2376  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2377  Value *Ident = getOrCreateIdent(SrcLocStr);
2378  Value *ThreadId = getOrCreateThreadID(Ident);
2379 
2380  llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
2381 
2382  Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2383 
2384  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2385  Builder.CreateCall(Fn, Args);
2386 
2387  return Builder.saveIP();
2388 }
2389 
2392  BodyGenCallbackTy BodyGenCB,
2393  FinalizeCallbackTy FiniCB, llvm::Value *DidIt) {
2394 
2395  if (!updateToLocation(Loc))
2396  return Loc.IP;
2397 
2398  // If needed (i.e. not null), initialize `DidIt` with 0
2399  if (DidIt) {
2400  Builder.CreateStore(Builder.getInt32(0), DidIt);
2401  }
2402 
2403  Directive OMPD = Directive::OMPD_single;
2404  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2405  Value *Ident = getOrCreateIdent(SrcLocStr);
2406  Value *ThreadId = getOrCreateThreadID(Ident);
2407  Value *Args[] = {Ident, ThreadId};
2408 
2409  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2410  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2411 
2412  Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
2413  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2414 
2415  // generates the following:
2416  // if (__kmpc_single()) {
2417  // .... single region ...
2418  // __kmpc_end_single
2419  // }
2420 
2421  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2422  /*Conditional*/ true, /*hasFinalize*/ true);
2423 }
2424 
2426  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2427  FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
2428 
2429  if (!updateToLocation(Loc))
2430  return Loc.IP;
2431 
2432  Directive OMPD = Directive::OMPD_critical;
2433  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2434  Value *Ident = getOrCreateIdent(SrcLocStr);
2435  Value *ThreadId = getOrCreateThreadID(Ident);
2436  Value *LockVar = getOMPCriticalRegionLock(CriticalName);
2437  Value *Args[] = {Ident, ThreadId, LockVar};
2438 
2440  Function *RTFn = nullptr;
2441  if (HintInst) {
2442  // Add Hint to entry Args and create call
2443  EnterArgs.push_back(HintInst);
2444  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
2445  } else {
2446  RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
2447  }
2448  Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
2449 
2450  Function *ExitRTLFn =
2451  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
2452  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2453 
2454  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2455  /*Conditional*/ false, /*hasFinalize*/ true);
2456 }
2457 
2460  InsertPointTy AllocaIP, unsigned NumLoops,
2461  ArrayRef<llvm::Value *> StoreValues,
2462  const Twine &Name, bool IsDependSource) {
2463  if (!updateToLocation(Loc))
2464  return Loc.IP;
2465 
2466  // Allocate space for vector and generate alloc instruction.
2467  auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
2468  Builder.restoreIP(AllocaIP);
2469  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
2470  ArgsBase->setAlignment(Align(8));
2471  Builder.restoreIP(Loc.IP);
2472 
2473  // Store the index value with offset in depend vector.
2474  for (unsigned I = 0; I < NumLoops; ++I) {
2475  Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
2476  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
2477  Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
2478  }
2479 
2480  Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
2481  ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
2482 
2483  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2484  Value *Ident = getOrCreateIdent(SrcLocStr);
2485  Value *ThreadId = getOrCreateThreadID(Ident);
2486  Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
2487 
2488  Function *RTLFn = nullptr;
2489  if (IsDependSource)
2490  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
2491  else
2492  RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
2493  Builder.CreateCall(RTLFn, Args);
2494 
2495  return Builder.saveIP();
2496 }
2497 
2499  const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
2500  FinalizeCallbackTy FiniCB, bool IsThreads) {
2501  if (!updateToLocation(Loc))
2502  return Loc.IP;
2503 
2504  Directive OMPD = Directive::OMPD_ordered;
2505  Instruction *EntryCall = nullptr;
2506  Instruction *ExitCall = nullptr;
2507 
2508  if (IsThreads) {
2509  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2510  Value *Ident = getOrCreateIdent(SrcLocStr);
2511  Value *ThreadId = getOrCreateThreadID(Ident);
2512  Value *Args[] = {Ident, ThreadId};
2513 
2514  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
2515  EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2516 
2517  Function *ExitRTLFn =
2518  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
2519  ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2520  }
2521 
2522  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2523  /*Conditional*/ false, /*hasFinalize*/ true);
2524 }
2525 
2526 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
2527  Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
2528  BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
2529  bool HasFinalize, bool IsCancellable) {
2530 
2531  if (HasFinalize)
2532  FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
2533 
2534  // Create inlined region's entry and body blocks, in preparation
2535  // for conditional creation
2536  BasicBlock *EntryBB = Builder.GetInsertBlock();
2537  Instruction *SplitPos = EntryBB->getTerminator();
2538  if (!isa_and_nonnull<BranchInst>(SplitPos))
2539  SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
2540  BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
2541  BasicBlock *FiniBB =
2542  EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
2543 
2544  Builder.SetInsertPoint(EntryBB->getTerminator());
2545  emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
2546 
2547  // generate body
2548  BodyGenCB(/* AllocaIP */ InsertPointTy(),
2549  /* CodeGenIP */ Builder.saveIP(), *FiniBB);
2550 
2551  // If we didn't emit a branch to FiniBB during body generation, it means
2552  // FiniBB is unreachable (e.g. while(1);). stop generating all the
2553  // unreachable blocks, and remove anything we are not going to use.
2554  auto SkipEmittingRegion = FiniBB->hasNPredecessors(0);
2555  if (SkipEmittingRegion) {
2556  FiniBB->eraseFromParent();
2557  ExitCall->eraseFromParent();
2558  // Discard finalization if we have it.
2559  if (HasFinalize) {
2560  assert(!FinalizationStack.empty() &&
2561  "Unexpected finalization stack state!");
2562  FinalizationStack.pop_back();
2563  }
2564  } else {
2565  // emit exit call and do any needed finalization.
2566  auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
2567  assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
2568  FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
2569  "Unexpected control flow graph state!!");
2570  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
2571  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
2572  "Unexpected Control Flow State!");
2573  MergeBlockIntoPredecessor(FiniBB);
2574  }
2575 
2576  // If we are skipping the region of a non conditional, remove the exit
2577  // block, and clear the builder's insertion point.
2578  assert(SplitPos->getParent() == ExitBB &&
2579  "Unexpected Insertion point location!");
2580  if (!Conditional && SkipEmittingRegion) {
2581  ExitBB->eraseFromParent();
2582  Builder.ClearInsertionPoint();
2583  } else {
2584  auto merged = MergeBlockIntoPredecessor(ExitBB);
2585  BasicBlock *ExitPredBB = SplitPos->getParent();
2586  auto InsertBB = merged ? ExitPredBB : ExitBB;
2587  if (!isa_and_nonnull<BranchInst>(SplitPos))
2588  SplitPos->eraseFromParent();
2589  Builder.SetInsertPoint(InsertBB);
2590  }
2591 
2592  return Builder.saveIP();
2593 }
2594 
2595 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
2596  Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
2597  // if nothing to do, Return current insertion point.
2598  if (!Conditional || !EntryCall)
2599  return Builder.saveIP();
2600 
2601  BasicBlock *EntryBB = Builder.GetInsertBlock();
2602  Value *CallBool = Builder.CreateIsNotNull(EntryCall);
2603  auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
2604  auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
2605 
2606  // Emit thenBB and set the Builder's insertion point there for
2607  // body generation next. Place the block after the current block.
2608  Function *CurFn = EntryBB->getParent();
2609  CurFn->getBasicBlockList().insertAfter(EntryBB->getIterator(), ThenBB);
2610 
2611  // Move Entry branch to end of ThenBB, and replace with conditional
2612  // branch (If-stmt)
2613  Instruction *EntryBBTI = EntryBB->getTerminator();
2614  Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
2615  EntryBBTI->removeFromParent();
2616  Builder.SetInsertPoint(UI);
2617  Builder.Insert(EntryBBTI);
2618  UI->eraseFromParent();
2619  Builder.SetInsertPoint(ThenBB->getTerminator());
2620 
2621  // return an insertion point to ExitBB.
2622  return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
2623 }
2624 
2625 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
2626  omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
2627  bool HasFinalize) {
2628 
2629  Builder.restoreIP(FinIP);
2630 
2631  // If there is finalization to do, emit it before the exit call
2632  if (HasFinalize) {
2633  assert(!FinalizationStack.empty() &&
2634  "Unexpected finalization stack state!");
2635 
2636  FinalizationInfo Fi = FinalizationStack.pop_back_val();
2637  assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
2638 
2639  Fi.FiniCB(FinIP);
2640 
2641  BasicBlock *FiniBB = FinIP.getBlock();
2642  Instruction *FiniBBTI = FiniBB->getTerminator();
2643 
2644  // set Builder IP for call creation
2645  Builder.SetInsertPoint(FiniBBTI);
2646  }
2647 
2648  if (!ExitCall)
2649  return Builder.saveIP();
2650 
2651  // place the Exitcall as last instruction before Finalization block terminator
2652  ExitCall->removeFromParent();
2653  Builder.Insert(ExitCall);
2654 
2655  return IRBuilder<>::InsertPoint(ExitCall->getParent(),
2656  ExitCall->getIterator());
2657 }
2658 
2660  InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
2661  llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
2662  if (!IP.isSet())
2663  return IP;
2664 
2666 
2667  // creates the following CFG structure
2668  // OMP_Entry : (MasterAddr != PrivateAddr)?
2669  // F T
2670  // | \
2671  // | copin.not.master
2672  // | /
2673  // v /
2674  // copyin.not.master.end
2675  // |
2676  // v
2677  // OMP.Entry.Next
2678 
2679  BasicBlock *OMP_Entry = IP.getBlock();
2680  Function *CurFn = OMP_Entry->getParent();
2681  BasicBlock *CopyBegin =
2682  BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
2683  BasicBlock *CopyEnd = nullptr;
2684 
2685  // If entry block is terminated, split to preserve the branch to following
2686  // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2687  if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
2688  CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
2689  "copyin.not.master.end");
2690  OMP_Entry->getTerminator()->eraseFromParent();
2691  } else {
2692  CopyEnd =
2693  BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
2694  }
2695 
2696  Builder.SetInsertPoint(OMP_Entry);
2697  Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
2698  Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
2699  Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
2700  Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
2701 
2702  Builder.SetInsertPoint(CopyBegin);
2703  if (BranchtoEnd)
2704  Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
2705 
2706  return Builder.saveIP();
2707 }
2708 
2711  std::string Name) {
2713  Builder.restoreIP(Loc.IP);
2714 
2715  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2716  Value *Ident = getOrCreateIdent(SrcLocStr);
2717  Value *ThreadId = getOrCreateThreadID(Ident);
2718  Value *Args[] = {ThreadId, Size, Allocator};
2719 
2720  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
2721 
2722  return Builder.CreateCall(Fn, Args, Name);
2723 }
2724 
2727  std::string Name) {
2729  Builder.restoreIP(Loc.IP);
2730 
2731  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2732  Value *Ident = getOrCreateIdent(SrcLocStr);
2733  Value *ThreadId = getOrCreateThreadID(Ident);
2734  Value *Args[] = {ThreadId, Addr, Allocator};
2735  Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
2736  return Builder.CreateCall(Fn, Args, Name);
2737 }
2738 
2740  const LocationDescription &Loc, llvm::Value *Pointer,
2743  Builder.restoreIP(Loc.IP);
2744 
2745  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2746  Value *Ident = getOrCreateIdent(SrcLocStr);
2747  Value *ThreadId = getOrCreateThreadID(Ident);
2748  Constant *ThreadPrivateCache =
2749  getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
2750  llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
2751 
2752  Function *Fn =
2753  getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
2754 
2755  return Builder.CreateCall(Fn, Args);
2756 }
2757 
2759 OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
2760  if (!updateToLocation(Loc))
2761  return Loc.IP;
2762 
2763  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2764  Value *Ident = getOrCreateIdent(SrcLocStr);
2765  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2766  ConstantInt *UseGenericStateMachine =
2767  ConstantInt::getBool(Int32->getContext(), !IsSPMD);
2768  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2769 
2770  Function *Fn = getOrCreateRuntimeFunctionPtr(
2771  omp::RuntimeFunction::OMPRTL___kmpc_target_init);
2772 
2773  CallInst *ThreadKind =
2774  Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
2775 
2776  Value *ExecUserCode = Builder.CreateICmpEQ(
2777  ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
2778 
2779  // ThreadKind = __kmpc_target_init(...)
2780  // if (ThreadKind == -1)
2781  // user_code
2782  // else
2783  // return;
2784 
2785  auto *UI = Builder.CreateUnreachable();
2786  BasicBlock *CheckBB = UI->getParent();
2787  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
2788 
2789  BasicBlock *WorkerExitBB = BasicBlock::Create(
2790  CheckBB->getContext(), "worker.exit", CheckBB->getParent());
2791  Builder.SetInsertPoint(WorkerExitBB);
2792  Builder.CreateRetVoid();
2793 
2794  auto *CheckBBTI = CheckBB->getTerminator();
2795  Builder.SetInsertPoint(CheckBBTI);
2796  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
2797 
2798  CheckBBTI->eraseFromParent();
2799  UI->eraseFromParent();
2800 
2801  // Continue in the "user_code" block, see diagram above and in
2802  // openmp/libomptarget/deviceRTLs/common/include/target.h .
2803  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
2804 }
2805 
2807  bool IsSPMD, bool RequiresFullRuntime) {
2808  if (!updateToLocation(Loc))
2809  return;
2810 
2811  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
2812  Value *Ident = getOrCreateIdent(SrcLocStr);
2813  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
2814  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
2815 
2816  Function *Fn = getOrCreateRuntimeFunctionPtr(
2817  omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
2818 
2819  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
2820 }
2821 
2822 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
2823  StringRef FirstSeparator,
2824  StringRef Separator) {
2825  SmallString<128> Buffer;
2826  llvm::raw_svector_ostream OS(Buffer);
2827  StringRef Sep = FirstSeparator;
2828  for (StringRef Part : Parts) {
2829  OS << Sep << Part;
2830  Sep = Separator;
2831  }
2832  return OS.str().str();
2833 }
2834 
2835 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2836  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2837  // TODO: Replace the twine arg with stringref to get rid of the conversion
2838  // logic. However This is taken from current implementation in clang as is.
2839  // Since this method is used in many places exclusively for OMP internal use
2840  // we will keep it as is for temporarily until we move all users to the
2841  // builder and then, if possible, fix it everywhere in one go.
2842  SmallString<256> Buffer;
2843  llvm::raw_svector_ostream Out(Buffer);
2844  Out << Name;
2845  StringRef RuntimeName = Out.str();
2846  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2847  if (Elem.second) {
2848  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2849  "OMP internal variable has different type than requested");
2850  } else {
2851  // TODO: investigate the appropriate linkage type used for the global
2852  // variable for possibly changing that to internal or private, or maybe
2853  // create different versions of the function for different OMP internal
2854  // variables.
2855  Elem.second = new llvm::GlobalVariable(
2856  M, Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage,
2857  llvm::Constant::getNullValue(Ty), Elem.first(),
2858  /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
2859  AddressSpace);
2860  }
2861 
2862  return Elem.second;
2863 }
2864 
2865 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
2866  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2867  std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
2868  return getOrCreateOMPInternalVariable(KmpCriticalNameTy, Name);
2869 }
2870 
2873  std::string VarName) {
2874  llvm::Constant *MaptypesArrayInit =
2875  llvm::ConstantDataArray::get(M.getContext(), Mappings);
2876  auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
2877  M, MaptypesArrayInit->getType(),
2878  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
2879  VarName);
2880  MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
2881  return MaptypesArrayGlobal;
2882 }
2883 
2885  InsertPointTy AllocaIP,
2886  unsigned NumOperands,
2887  struct MapperAllocas &MapperAllocas) {
2888  if (!updateToLocation(Loc))
2889  return;
2890 
2891  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2892  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2893  Builder.restoreIP(AllocaIP);
2894  AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
2895  AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
2896  AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
2897  Builder.restoreIP(Loc.IP);
2898  MapperAllocas.ArgsBase = ArgsBase;
2900  MapperAllocas.ArgSizes = ArgSizes;
2901 }
2902 
2904  Function *MapperFunc, Value *SrcLocInfo,
2905  Value *MaptypesArg, Value *MapnamesArg,
2906  struct MapperAllocas &MapperAllocas,
2907  int64_t DeviceID, unsigned NumOperands) {
2908  if (!updateToLocation(Loc))
2909  return;
2910 
2911  auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
2912  auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
2913  Value *ArgsBaseGEP =
2914  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
2915  {Builder.getInt32(0), Builder.getInt32(0)});
2916  Value *ArgsGEP =
2917  Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
2918  {Builder.getInt32(0), Builder.getInt32(0)});
2919  Value *ArgSizesGEP =
2920  Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
2921  {Builder.getInt32(0), Builder.getInt32(0)});
2922  Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
2923  Builder.CreateCall(MapperFunc,
2924  {SrcLocInfo, Builder.getInt64(DeviceID),
2925  Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
2926  ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
2927 }
2928 
2929 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2930  const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
2933  "Unexpected Atomic Ordering.");
2934 
2935  bool Flush = false;
2937 
2938  switch (AK) {
2939  case Read:
2942  FlushAO = AtomicOrdering::Acquire;
2943  Flush = true;
2944  }
2945  break;
2946  case Write:
2947  case Update:
2950  FlushAO = AtomicOrdering::Release;
2951  Flush = true;
2952  }
2953  break;
2954  case Capture:
2955  switch (AO) {
2957  FlushAO = AtomicOrdering::Acquire;
2958  Flush = true;
2959  break;
2961  FlushAO = AtomicOrdering::Release;
2962  Flush = true;
2963  break;
2967  Flush = true;
2968  break;
2969  default:
2970  // do nothing - leave silently.
2971  break;
2972  }
2973  }
2974 
2975  if (Flush) {
2976  // Currently Flush RT call still doesn't take memory_ordering, so for when
2977  // that happens, this tries to do the resolution of which atomic ordering
2978  // to use with but issue the flush call
2979  // TODO: pass `FlushAO` after memory ordering support is added
2980  (void)FlushAO;
2981  emitFlush(Loc);
2982  }
2983 
2984  // for AO == AtomicOrdering::Monotonic and all other case combinations
2985  // do nothing
2986  return Flush;
2987 }
2988 
2992  AtomicOrdering AO) {
2993  if (!updateToLocation(Loc))
2994  return Loc.IP;
2995 
2996  Type *XTy = X.Var->getType();
2997  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2998  Type *XElemTy = XTy->getPointerElementType();
2999  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3000  XElemTy->isPointerTy()) &&
3001  "OMP atomic read expected a scalar type");
3002 
3003  Value *XRead = nullptr;
3004 
3005  if (XElemTy->isIntegerTy()) {
3006  LoadInst *XLD =
3007  Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
3008  XLD->setAtomic(AO);
3009  XRead = cast<Value>(XLD);
3010  } else {
3011  // We need to bitcast and perform atomic op as integer
3012  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3013  IntegerType *IntCastTy =
3014  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3015  Value *XBCast = Builder.CreateBitCast(
3016  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
3017  LoadInst *XLoad =
3018  Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
3019  XLoad->setAtomic(AO);
3020  if (XElemTy->isFloatingPointTy()) {
3021  XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
3022  } else {
3023  XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
3024  }
3025  }
3026  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3027  Builder.CreateStore(XRead, V.Var, V.IsVolatile);
3028  return Builder.saveIP();
3029 }
3030 
3033  AtomicOpValue &X, Value *Expr,
3034  AtomicOrdering AO) {
3035  if (!updateToLocation(Loc))
3036  return Loc.IP;
3037 
3038  Type *XTy = X.Var->getType();
3039  assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
3040  Type *XElemTy = XTy->getPointerElementType();
3041  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3042  XElemTy->isPointerTy()) &&
3043  "OMP atomic write expected a scalar type");
3044 
3045  if (XElemTy->isIntegerTy()) {
3046  StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
3047  XSt->setAtomic(AO);
3048  } else {
3049  // We need to bitcast and perform atomic op as integers
3050  unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3051  IntegerType *IntCastTy =
3052  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3053  Value *XBCast = Builder.CreateBitCast(
3054  X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.dst.int.cast");
3055  Value *ExprCast =
3056  Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
3057  StoreInst *XSt = Builder.CreateStore(ExprCast, XBCast, X.IsVolatile);
3058  XSt->setAtomic(AO);
3059  }
3060 
3061  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3062  return Builder.saveIP();
3063 }
3064 
3066  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3067  Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
3068  AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart) {
3069  if (!updateToLocation(Loc))
3070  return Loc.IP;
3071 
3072  LLVM_DEBUG({
3073  Type *XTy = X.Var->getType();
3074  assert(XTy->isPointerTy() &&
3075  "OMP Atomic expects a pointer to target memory");
3076  Type *XElemTy = XTy->getPointerElementType();
3077  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3078  XElemTy->isPointerTy()) &&
3079  "OMP atomic update expected a scalar type");
3080  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3081  (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
3082  "OpenMP atomic does not support LT or GT operations");
3083  });
3084 
3085  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
3086  IsXLHSInRHSPart);
3087  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3088  return Builder.saveIP();
3089 }
3090 
3091 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
3092  AtomicRMWInst::BinOp RMWOp) {
3093  switch (RMWOp) {
3094  case AtomicRMWInst::Add:
3095  return Builder.CreateAdd(Src1, Src2);
3096  case AtomicRMWInst::Sub:
3097  return Builder.CreateSub(Src1, Src2);
3098  case AtomicRMWInst::And:
3099  return Builder.CreateAnd(Src1, Src2);
3100  case AtomicRMWInst::Nand:
3101  return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
3102  case AtomicRMWInst::Or:
3103  return Builder.CreateOr(Src1, Src2);
3104  case AtomicRMWInst::Xor:
3105  return Builder.CreateXor(Src1, Src2);
3106  case AtomicRMWInst::Xchg:
3107  case AtomicRMWInst::FAdd:
3108  case AtomicRMWInst::FSub:
3110  case AtomicRMWInst::Max:
3111  case AtomicRMWInst::Min:
3112  case AtomicRMWInst::UMax:
3113  case AtomicRMWInst::UMin:
3114  llvm_unreachable("Unsupported atomic update operation");
3115  }
3116  llvm_unreachable("Unsupported atomic update operation");
3117 }
3118 
3119 std::pair<Value *, Value *>
3120 OpenMPIRBuilder::emitAtomicUpdate(Instruction *AllocIP, Value *X, Value *Expr,
3122  AtomicUpdateCallbackTy &UpdateOp,
3123  bool VolatileX, bool IsXLHSInRHSPart) {
3124  Type *XElemTy = X->getType()->getPointerElementType();
3125 
3126  bool DoCmpExch =
3127  ((RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd)) ||
3128  (RMWOp == AtomicRMWInst::FSub) ||
3129  (RMWOp == AtomicRMWInst::Sub && !IsXLHSInRHSPart);
3130 
3131  std::pair<Value *, Value *> Res;
3132  if (XElemTy->isIntegerTy() && !DoCmpExch) {
3133  Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
3134  // not needed except in case of postfix captures. Generate anyway for
3135  // consistency with the else part. Will be removed with any DCE pass.
3136  Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3137  } else {
3138  unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
3139  IntegerType *IntCastTy =
3140  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3141  Value *XBCast =
3142  Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3143  LoadInst *OldVal =
3144  Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
3145  OldVal->setAtomic(AO);
3146  // CurBB
3147  // | /---\
3148  // ContBB |
3149  // | \---/
3150  // ExitBB
3151  BasicBlock *CurBB = Builder.GetInsertBlock();
3152  Instruction *CurBBTI = CurBB->getTerminator();
3153  CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
3154  BasicBlock *ExitBB =
3155  CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
3156  BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
3157  X->getName() + ".atomic.cont");
3158  ContBB->getTerminator()->eraseFromParent();
3159  Builder.SetInsertPoint(ContBB);
3160  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
3161  PHI->addIncoming(OldVal, CurBB);
3162  AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
3163  NewAtomicAddr->setName(X->getName() + "x.new.val");
3164  NewAtomicAddr->moveBefore(AllocIP);
3165  IntegerType *NewAtomicCastTy =
3166  IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
3167  bool IsIntTy = XElemTy->isIntegerTy();
3168  Value *NewAtomicIntAddr =
3169  (IsIntTy)
3170  ? NewAtomicAddr
3171  : Builder.CreateBitCast(NewAtomicAddr,
3172  NewAtomicCastTy->getPointerTo(Addrspace));
3173  Value *OldExprVal = PHI;
3174  if (!IsIntTy) {
3175  if (XElemTy->isFloatingPointTy()) {
3176  OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
3177  X->getName() + ".atomic.fltCast");
3178  } else {
3179  OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
3180  X->getName() + ".atomic.ptrCast");
3181  }
3182  }
3183 
3184  Value *Upd = UpdateOp(OldExprVal, Builder);
3185  Builder.CreateStore(Upd, NewAtomicAddr);
3186  LoadInst *DesiredVal = Builder.CreateLoad(XElemTy, NewAtomicIntAddr);
3187  Value *XAddr =
3188  (IsIntTy)
3189  ? X
3190  : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
3193  AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
3194  XAddr, OldExprVal, DesiredVal, llvm::MaybeAlign(), AO, Failure);
3195  Result->setVolatile(VolatileX);
3196  Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
3197  Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
3198  PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
3199  Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3200 
3201  Res.first = OldExprVal;
3202  Res.second = Upd;
3203 
3204  // set Insertion point in exit block
3205  if (UnreachableInst *ExitTI =
3206  dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
3207  CurBBTI->eraseFromParent();
3208  Builder.SetInsertPoint(ExitBB);
3209  } else {
3210  Builder.SetInsertPoint(ExitTI);
3211  }
3212  }
3213 
3214  return Res;
3215 }
3216 
3218  const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X,
3219  AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
3221  bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart) {
3222  if (!updateToLocation(Loc))
3223  return Loc.IP;
3224 
3225  LLVM_DEBUG({
3226  Type *XTy = X.Var->getType();
3227  assert(XTy->isPointerTy() &&
3228  "OMP Atomic expects a pointer to target memory");
3229  Type *XElemTy = XTy->getPointerElementType();
3230  assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
3231  XElemTy->isPointerTy()) &&
3232  "OMP atomic capture expected a scalar type");
3233  assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
3234  "OpenMP atomic does not support LT or GT operations");
3235  });
3236 
3237  // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
3238  // 'x' is simply atomically rewritten with 'expr'.
3239  AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
3240  std::pair<Value *, Value *> Result =
3241  emitAtomicUpdate(AllocIP, X.Var, Expr, AO, AtomicOp, UpdateOp,
3242  X.IsVolatile, IsXLHSInRHSPart);
3243 
3244  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3245  Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
3246 
3247  checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3248  return Builder.saveIP();
3249 }
3250 
3253  std::string VarName) {
3254  llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
3256  llvm::Type::getInt8Ty(M.getContext())->getPointerTo(), Names.size()),
3257  Names);
3258  auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
3259  M, MapNamesArrayInit->getType(),
3260  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
3261  VarName);
3262  return MapNamesArrayGlobal;
3263 }
3264 
3265 // Create all simple and struct types exposed by the runtime and remember
3266 // the llvm::PointerTypes of them for easy access later.
3267 void OpenMPIRBuilder::initializeTypes(Module &M) {
3268  LLVMContext &Ctx = M.getContext();
3269  StructType *T;
3270 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
3271 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
3272  VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
3273  VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
3274 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
3275  VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
3276  VarName##Ptr = PointerType::getUnqual(VarName);
3277 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
3278  T = StructType::getTypeByName(Ctx, StructName); \
3279  if (!T) \
3280  T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
3281  VarName = T; \
3282  VarName##Ptr = PointerType::getUnqual(T);
3283 #include "llvm/Frontend/OpenMP/OMPKinds.def"
3284 }
3285 
3288  SmallVectorImpl<BasicBlock *> &BlockVector) {
3290  BlockSet.insert(EntryBB);
3291  BlockSet.insert(ExitBB);
3292 
3293  Worklist.push_back(EntryBB);
3294  while (!Worklist.empty()) {
3295  BasicBlock *BB = Worklist.pop_back_val();
3296  BlockVector.push_back(BB);
3297  for (BasicBlock *SuccBB : successors(BB))
3298  if (BlockSet.insert(SuccBB).second)
3299  Worklist.push_back(SuccBB);
3300  }
3301 }
3302 
3303 void CanonicalLoopInfo::collectControlBlocks(
3305  // We only count those BBs as control block for which we do not need to
3306  // reverse the CFG, i.e. not the loop body which can contain arbitrary control
3307  // flow. For consistency, this also means we do not add the Body block, which
3308  // is just the entry to the body code.
3309  BBs.reserve(BBs.size() + 6);
3310  BBs.append({Preheader, Header, Cond, Latch, Exit, After});
3311 }
3312 
3314 #ifndef NDEBUG
3315  // No constraints if this object currently does not describe a loop.
3316  if (!isValid())
3317  return;
3318 
3319  // Verify standard control-flow we use for OpenMP loops.
3320  assert(Preheader);
3321  assert(isa<BranchInst>(Preheader->getTerminator()) &&
3322  "Preheader must terminate with unconditional branch");
3323  assert(Preheader->getSingleSuccessor() == Header &&
3324  "Preheader must jump to header");
3325 
3326  assert(Header);
3327  assert(isa<BranchInst>(Header->getTerminator()) &&
3328  "Header must terminate with unconditional branch");
3329  assert(Header->getSingleSuccessor() == Cond &&
3330  "Header must jump to exiting block");
3331 
3332  assert(Cond);
3333  assert(Cond->getSinglePredecessor() == Header &&
3334  "Exiting block only reachable from header");
3335 
3336  assert(isa<BranchInst>(Cond->getTerminator()) &&
3337  "Exiting block must terminate with conditional branch");
3338  assert(size(successors(Cond)) == 2 &&
3339  "Exiting block must have two successors");
3340  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
3341  "Exiting block's first successor jump to the body");
3342  assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
3343  "Exiting block's second successor must exit the loop");
3344 
3345  assert(Body);
3346  assert(Body->getSinglePredecessor() == Cond &&
3347  "Body only reachable from exiting block");
3348  assert(!isa<PHINode>(Body->front()));
3349 
3350  assert(Latch);
3351  assert(isa<BranchInst>(Latch->getTerminator()) &&
3352  "Latch must terminate with unconditional branch");
3353  assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
3354  // TODO: To support simple redirecting of the end of the body code that has
3355  // multiple; introduce another auxiliary basic block like preheader and after.
3356  assert(Latch->getSinglePredecessor() != nullptr);
3357  assert(!isa<PHINode>(Latch->front()));
3358 
3359  assert(Exit);
3360  assert(isa<BranchInst>(Exit->getTerminator()) &&
3361  "Exit block must terminate with unconditional branch");
3362  assert(Exit->getSingleSuccessor() == After &&
3363  "Exit block must jump to after block");
3364 
3365  assert(After);
3366  assert(After->getSinglePredecessor() == Exit &&
3367  "After block only reachable from exit block");
3368  assert(After->empty() || !isa<PHINode>(After->front()));
3369 
3370  Instruction *IndVar = getIndVar();
3371  assert(IndVar && "Canonical induction variable not found?");
3372  assert(isa<IntegerType>(IndVar->getType()) &&
3373  "Induction variable must be an integer");
3374  assert(cast<PHINode>(IndVar)->getParent() == Header &&
3375  "Induction variable must be a PHI in the loop header");
3376  assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
3377  assert(
3378  cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
3379  assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
3380 
3381  auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
3382  assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
3383  assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
3384  assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
3385  assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
3386  ->isOne());
3387 
3388  Value *TripCount = getTripCount();
3389  assert(TripCount && "Loop trip count not found?");
3390  assert(IndVar->getType() == TripCount->getType() &&
3391  "Trip count and induction variable must have the same type");
3392 
3393  auto *CmpI = cast<CmpInst>(&Cond->front());
3394  assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
3395  "Exit condition must be a signed less-than comparison");
3396  assert(CmpI->getOperand(0) == IndVar &&
3397  "Exit condition must compare the induction variable");
3398  assert(CmpI->getOperand(1) == TripCount &&
3399  "Exit condition must compare with the trip count");
3400 #endif
3401 }
3402 
3404  Preheader = nullptr;
3405  Header = nullptr;
3406  Cond = nullptr;
3407  Body = nullptr;
3408  Latch = nullptr;
3409  Exit = nullptr;
3410  After = nullptr;
3411 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::CanonicalLoopInfo::getPreheaderIP
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
Definition: OMPIRBuilder.h:1515
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
AssumptionCache.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::OpenMPIRBuilder::createCachedThreadPrivate
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
Definition: OMPIRBuilder.cpp:2739
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::OpenMPIRBuilder::LocationDescription
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:150
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2037
addLoopMetadata
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Definition: OMPIRBuilder.cpp:2090
llvm::OpenMPIRBuilder::OutlineInfo::ExitBB
BasicBlock * ExitBB
Definition: OMPIRBuilder.h:762
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::OpenMPIRBuilder::createSection
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
Definition: OMPIRBuilder.cpp:1017
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::OpenMPIRBuilder::createCritical
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
Definition: OMPIRBuilder.cpp:2425
llvm::CanonicalLoopInfo::getAfter
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Definition: OMPIRBuilder.h:1484
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
createTargetMachine
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOpt::Level OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
Definition: OMPIRBuilder.cpp:2149
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::TargetOptions
Definition: TargetOptions.h:124
llvm::OpenMPIRBuilder::createLoopSkeleton
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
Definition: OMPIRBuilder.cpp:1272
llvm::cl::Prefix
@ Prefix
Definition: CommandLine.h:164
llvm::Function::empty
bool empty() const
Definition: Function.h:739
llvm::Function::getBasicBlockList
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
Definition: Function.h:710
llvm::CodeExtractor::findAllocas
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Definition: CodeExtractor.cpp:495
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1327
llvm::OpenMPIRBuilder::ReductionInfo::getElementType
Type * getElementType() const
Returns the type of the element being reduced.
Definition: OMPIRBuilder.h:556
llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:752
llvm::CodeExtractor::extractCodeRegion
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Definition: CodeExtractor.cpp:1590
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::GlobalValue::NotThreadLocal
@ NotThreadLocal
Definition: GlobalValue.h:179
llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:738
llvm::ilist_node_with_parent::getNextNode
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:288
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::BasicBlock::empty
bool empty() const
Definition: BasicBlock.h:307
llvm::OpenMPIRBuilder::tileLoops
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
Definition: OMPIRBuilder.cpp:1900
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1977
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:717
llvm::OpenMPIRBuilder::InsertPointTy
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:53
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:457
llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2647
llvm::OpenMPIRBuilder::getOrCreateThreadID
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
Definition: OMPIRBuilder.cpp:354
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2208
llvm::DeleteDeadBlocks
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Definition: BasicBlockUtils.cpp:94
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:325
Error.h
OptimizationRemarkEmitter.h
llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::tgtok::VarName
@ VarName
Definition: TGLexer.h:71
llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:462
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:781
ScalarEvolution.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::OpenMPIRBuilder::AtomicOpValue
a struct to pack relevant information while generating atomic Ops
Definition: OMPIRBuilder.h:1189
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::CanonicalLoopInfo::getAfterIP
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
Definition: OMPIRBuilder.h:1527
getTripCount
static const SCEV * getTripCount(const SCEV *BECount, Type *IntPtr, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)
Compute trip count from the backedge taken count.
Definition: LoopIdiomRecognize.cpp:1050
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:381
llvm::GlobalValue::UnnamedAddr::Global
@ Global
llvm::ApproximateLoopSize
unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &EphValues, unsigned BEInsns)
ApproximateLoopSize - Approximate the size of the loop.
Definition: LoopUnrollPass.cpp:667
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:419
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::MCID::Convergent
@ Convergent
Definition: MCInstrDesc.h:182
CodeExtractor.h
llvm::OpenMPIRBuilder::ReductionInfo::Variable
Value * Variable
Reduction variable of pointer type.
Definition: OMPIRBuilder.h:561
llvm::OpenMPIRBuilder::FinalizeCallbackTy
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:63
llvm::CmpInst::ICMP_SLE
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:750
llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:894
llvm::CanonicalLoopInfo::getFunction
Function * getFunction() const
Definition: OMPIRBuilder.h:1532
llvm::OpenMPIRBuilder::AtomicOpValue::Var
Value * Var
Definition: OMPIRBuilder.h:1190
llvm::BasicBlock::hasNPredecessors
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:286
llvm::CallBase::arg_begin
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1303
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::OpenMPIRBuilder::createAtomicCapture
InsertPointTy createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXLHSInRHSPart)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
Definition: OMPIRBuilder.cpp:3217
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::TargetRegistry::lookupTarget
static const Target * lookupTarget(const std::string &Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Definition: TargetRegistry.cpp:62
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::OpenMPIRBuilder::emitTaskyieldImpl
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
Definition: OMPIRBuilder.cpp:901
llvm::OpenMPIRBuilder::createReductions
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
Definition: OMPIRBuilder.cpp:1063
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:384
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1203
llvm::OpenMPIRBuilder::collapseLoops
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
Definition: OMPIRBuilder.cpp:1774
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1105
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::ConstantExpr::getPointerCast
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2020
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:302
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:579
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::CanonicalLoopInfo::getIndVar
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Definition: OMPIRBuilder.h:1501
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional< unsigned > UserThreshold, Optional< unsigned > UserCount, Optional< bool > UserAllowPartial, Optional< bool > UserRuntime, Optional< bool > UserUpperBound, Optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:185
llvm::OpenMPIRBuilder::createOrderedDepend
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
Definition: OMPIRBuilder.cpp:2459
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
llvm::OpenMPIRBuilder::finalize
void finalize(Function *Fn=nullptr, bool AllowExtractorSinking=false)
Finalize the underlying module, e.g., by outlining regions.
Definition: OMPIRBuilder.cpp:159
Mappings
Inject TLI Mappings
Definition: InjectTLIMappings.cpp:172
CommandLine.h
CodeMetrics.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::OpenMPIRBuilder::getOrCreateDefaultSrcLocStr
Constant * getOrCreateDefaultSrcLocStr()
Return the (LLVM-IR) string describing the default source location.
Definition: OMPIRBuilder.cpp:330
TargetMachine.h
llvm::OpenMPIRBuilder::emitMapperCall
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
Definition: OMPIRBuilder.cpp:2903
llvm::OpenMPIRBuilder::getOrCreateIdent
Value * getOrCreateIdent(Constant *SrcLocStr, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
Definition: OMPIRBuilder.cpp:260
OMPIRBuilder.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CanonicalLoopInfo::assertOK
void assertOK() const
Consistency self-check.
Definition: OMPIRBuilder.cpp:3313
llvm::CanonicalLoopInfo::getCond
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
Definition: OMPIRBuilder.h:1452
llvm::OpenMPIRBuilder
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:29
llvm::OpenMPIRBuilder::createOMPFree
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Definition: OMPIRBuilder.cpp:2725
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:493
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
TileSize
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::OpenMPIRBuilder::createMapperAllocas
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Definition: OMPIRBuilder.cpp:2884
redirectAllPredecessorsTo
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
Definition: OMPIRBuilder.cpp:1735
llvm::OpenMPIRBuilder::createBarrier
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
Definition: OMPIRBuilder.cpp:361
llvm::OpenMPIRBuilder::createAtomicRead
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
Definition: OMPIRBuilder.cpp:2990
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
IP
Definition: NVPTXLowerArgs.cpp:166
TargetLibraryInfo.h
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::OpenMPIRBuilder::~OpenMPIRBuilder
~OpenMPIRBuilder()
Definition: OMPIRBuilder.cpp:244
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::CanonicalLoopInfo::isValid
bool isValid() const
Returns whether this object currently represents the IR of a loop.
Definition: OMPIRBuilder.h:1432
llvm::CodeExtractor::findInputsOutputs
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
Definition: CodeExtractor.cpp:647
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
MDBuilder.h
llvm::AtomicOrdering::Acquire
@ Acquire
llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:748
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
setCanonicalLoopTripCount
void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount)
Definition: OMPIRBuilder.cpp:1464
llvm::GlobalObject::addMetadata
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1268
llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:364
llvm::AssumptionAnalysis::run
AssumptionCache run(Function &F, FunctionAnalysisManager &)
Definition: AssumptionCache.h:177
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::CanonicalLoopInfo::getBodyIP
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
Definition: OMPIRBuilder.h:1521
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
removeUnusedBlocksFromParent
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
Definition: OMPIRBuilder.cpp:1743
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::OpenMPIRBuilder::MapperAllocas::Args
AllocaInst * Args
Definition: OMPIRBuilder.h:801
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::omp::IdentFlag
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:74
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:740
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::OpenMPIRBuilder::unrollLoopFull
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
Definition: OMPIRBuilder.cpp:2116
llvm::OpenMPIRBuilder::emitTaskwaitImpl
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Definition: OMPIRBuilder.cpp:883
llvm::OpenMPIRBuilder::createAtomicUpdate
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, Instruction *AllocIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXLHSInRHSPart)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
Definition: OMPIRBuilder.cpp:3065
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::OpenMPIRBuilder::createOMPAlloc
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
Definition: OMPIRBuilder.cpp:2709
llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::OpenMPIRBuilder::addAttributes
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Definition: OMPIRBuilder.cpp:59
llvm::None
const NoneType None
Definition: None.h:23
llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:742
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::OpenMPIRBuilder::applyDynamicWorkshareLoop
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a dynamically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1601
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 128 >
CFG.h
llvm::Twine::str
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
llvm::OpenMPIRBuilder::OutlineInfo::EntryBB
BasicBlock * EntryBB
Definition: OMPIRBuilder.h:762
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:326
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::omp::RuntimeFunction
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:54
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56
llvm::OpenMPIRBuilder::createTaskyield
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
Definition: OMPIRBuilder.cpp:912
llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:760
llvm::cl::opt< bool >
llvm::CanonicalLoopInfo::getPreheader
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Definition: OMPIRBuilder.h:1438
llvm::ClrHandlerType::Filter
@ Filter
llvm::OpenMPIRBuilder::createOffloadMaptypes
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
Definition: OMPIRBuilder.cpp:2872
llvm::OpenMPIRBuilder::AtomicOpValue::IsVolatile
bool IsVolatile
Definition: OMPIRBuilder.h:1192
llvm::OpenMPIRBuilder::createSections
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
Definition: OMPIRBuilder.cpp:918
llvm::OpenMPIRBuilder::MapperAllocas::ArgsBase
AllocaInst * ArgsBase
Definition: OMPIRBuilder.h:800
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::OpenMPIRBuilder::ReductionInfo::PrivateVariable
Value * PrivateVariable
Thread-private partial reduction variable.
Definition: OMPIRBuilder.h:564
llvm::OpenMPIRBuilder::createOrderedThreadsSimd
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
Definition: OMPIRBuilder.cpp:2498
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:181
llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:744
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::ScalarEvolutionAnalysis::run
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
Definition: ScalarEvolution.cpp:12886
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2775
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3116
DebugInfo.h
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::OpenMPIRBuilder::emitCancelationCheckImpl
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Definition: OMPIRBuilder.cpp:466
llvm::OpenMPIRBuilder::createCanonicalLoop
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.cpp:1342
llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:756
llvm::OpenMPIRBuilder::createTargetInit
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
The omp target interface.
Definition: OMPIRBuilder.cpp:2759
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::appendToUsed
void appendToUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.used list.
Definition: ModuleUtils.cpp:106
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:750
llvm::OpenMPIRBuilder::createParallel
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
Definition: OMPIRBuilder.cpp:505
llvm::MDString::get
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:473
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:138
llvm::omp::OMPScheduleType::Static
@ Static
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MDBuilder::createCallbackEncoding
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::OpenMPIRBuilder::LocationDescription::DL
DebugLoc DL
Definition: OMPIRBuilder.h:158
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::OpenMPIRBuilder::unrollLoopHeuristic
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
Definition: OMPIRBuilder.cpp:2123
llvm::ArrayType::get
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:602
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
Definition: OMPIRBuilder.cpp:150
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::OpenMPIRBuilder::createMaster
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
Definition: OMPIRBuilder.cpp:1225
llvm::MDNode
Metadata node.
Definition: Metadata.h:901
llvm::SplitBlockAndInsertIfThenElse
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition: BasicBlockUtils.cpp:1439
llvm::CodeGenOpt::Aggressive
@ Aggressive
Definition: CodeGen.h:56
llvm::OpenMPIRBuilder::createDebugKind
GlobalValue * createDebugKind(unsigned DebugLevel)
Create a global value containing the DebugLevel to control debuggin in the module.
Definition: OMPIRBuilder.cpp:248
llvm::OpenMPIRBuilder::createFlush
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Definition: OMPIRBuilder.cpp:877
llvm::CallBase::arg_end
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1309
llvm::DominatorTreeAnalysis::run
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:360
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Function::getFunction
const Function & getFunction() const
Definition: Function.h:136
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::OpenMPIRBuilder::OutlineInfo
Helper that contains information about regions we need to outline during finalization.
Definition: OMPIRBuilder.h:759
llvm::CanonicalLoopInfo
Class to represented the control flow structure of an OpenMP canonical loop.
Definition: OMPIRBuilder.h:1406
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Triple.h
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1528
TargetOptions.h
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:173
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:243
llvm::OpenMPIRBuilder::createOffloadMapnames
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Definition: OMPIRBuilder.cpp:3252
llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:691
llvm::BasicBlock::getUniquePredecessor
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:272
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:776
uint32_t
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1744
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ConstantDataArray::getString
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:3041
llvm::GlobalObject::hasMetadata
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:582
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
getKmpcForDynamicNextForType
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
Definition: OMPIRBuilder.cpp:1590
llvm::OpenMPIRBuilder::MapperAllocas::ArgSizes
AllocaInst * ArgSizes
Definition: OMPIRBuilder.h:802
llvm::OpenMPIRBuilder::OutlineInfo::PostOutlineCB
PostOutlineCBTy PostOutlineCB
Definition: OMPIRBuilder.h:761
llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:253
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
llvm::MDNode::getDistinct
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1211
llvm::OpenMPIRBuilder::unrollLoopPartial
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
Definition: OMPIRBuilder.cpp:2301
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::OpenMPIRBuilder::createMasked
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
Definition: OMPIRBuilder.cpp:1249
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::OpenMPIRBuilder::applyWorkshareLoop
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier)
Modifies the canonical loop to be a workshare loop.
Definition: OMPIRBuilder.cpp:1563
llvm::GlobalValue::CommonLinkage
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:58
llvm::LoopAnalysis::run
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopInfo.cpp:961
computeHeuristicUnrollFactor
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
Definition: OMPIRBuilder.cpp:2170
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:675
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:308
llvm::CanonicalLoopInfo::getExit
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
Definition: OMPIRBuilder.h:1474
llvm::AtomicOrdering::Release
@ Release
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
llvm::ConstantAsMetadata
Definition: Metadata.h:412
llvm::OpenMPIRBuilder::createTaskwait
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
Definition: OMPIRBuilder.cpp:895
redirectTo
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
Definition: OMPIRBuilder.cpp:1718
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::OpenMPIRBuilder::ReductionInfo::ReductionGen
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Definition: OMPIRBuilder.h:569
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:766
llvm::OpenMPIRBuilder::createAtomicWrite
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
Definition: OMPIRBuilder.cpp:3032
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:862
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:766
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:746
llvm::CanonicalLoopInfo::invalidate
void invalidate()
Invalidate this loop.
Definition: OMPIRBuilder.cpp:3403
llvm::MCID::Branch
@ Branch
Definition: MCInstrDesc.h:156
llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:261
llvm::OpenMPIRBuilder::createCancel
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
Definition: OMPIRBuilder.cpp:416
llvm::omp::GV
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57
PassManager.h
OptimisticAttributes
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
llvm::ConstantArray::get
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1263
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::TargetIRAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &)
Definition: TargetTransformInfo.cpp:1133
llvm::CanonicalLoopInfo::getHeader
BasicBlock * getHeader() const
The header is the entry for each iteration.
Definition: OMPIRBuilder.h:1445
llvm::OpenMPIRBuilder::getOrCreateSrcLocStr
Constant * getOrCreateSrcLocStr(StringRef LocStr)
Return the (LLVM-IR) string describing the source location LocStr.
Definition: OMPIRBuilder.cpp:293
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
llvm::AnalysisManager::registerPass
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:847
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
UnrollThresholdFactor
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
getKmpcForDynamicInitForType
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
Definition: OMPIRBuilder.cpp:1574
llvm::MDNode::replaceOperandWith
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:877
llvm::OpenMPIRBuilder::createTargetDeinit
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime)
Create a runtime call for kmpc_target_deinit.
Definition: OMPIRBuilder.cpp:2806
llvm::MDBuilder
Definition: MDBuilder.h:35
llvm::OpenMPIRBuilder::ReductionInfo
Information about an OpenMP reduction.
Definition: OMPIRBuilder.h:548
llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:124
llvm::BasicBlock::back
const Instruction & back() const
Definition: BasicBlock.h:310
llvm::OpenMPIRBuilder::initialize
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Definition: OMPIRBuilder.cpp:157
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:216
llvm::GlobalValue::PrivateLinkage
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:56
llvm::CanonicalLoopInfo::getLatch
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
Definition: OMPIRBuilder.h:1468
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:380
ModuleUtils.h
llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:436
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::remarks::Type::Failure
@ Failure
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
getFreshReductionFunc
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
Definition: OMPIRBuilder.cpp:1053
llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:763
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::OpenMPIRBuilder::emitBarrierImpl
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
Definition: OMPIRBuilder.cpp:369
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
llvm::PHINode
Definition: Instructions.h:2625
llvm::BasicBlock::removePredecessor
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:321
llvm::Function::removeFromParent
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:361
llvm::SmallVectorImpl< uint64_t >
llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:770
llvm::CodeExtractor::isEligible
bool isEligible() const
Test whether this code extractor is eligible.
Definition: CodeExtractor.cpp:620
llvm::CanonicalLoopInfo::getTripCount
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
Definition: OMPIRBuilder.h:1492
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::PassInstrumentationAnalysis
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:605
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:275
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
llvm::OpenMPIRBuilder::emitFlush
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
Definition: OMPIRBuilder.cpp:869
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::OpenMPIRBuilder::OutlineInfo::collectBlocks
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Definition: OMPIRBuilder.cpp:3286
llvm::OpenMPIRBuilder::MapperAllocas
Definition: OMPIRBuilder.h:799
llvm::OpenMPIRBuilder::applyStaticWorkshareLoop
InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk=nullptr)
Modifies the canonical loop to be a statically-scheduled workshare loop.
Definition: OMPIRBuilder.cpp:1472
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4707
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:528
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3204
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4146
llvm::cl::desc
Definition: CommandLine.h:414
llvm::OpenMPIRBuilder::createSingle
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, llvm::Value *DidIt)
Generator for '#omp single'.
Definition: OMPIRBuilder.cpp:2391
llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:758
llvm::OpenMPIRBuilder::createCopyinClauseBlocks
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
Definition: OMPIRBuilder.cpp:2659
llvm::OpenMPIRBuilder::createCopyPrivate
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
Definition: OMPIRBuilder.cpp:2370
llvm::SetVector< Value * >
llvm::omp::OMPScheduleType
OMPScheduleType
Definition: OMPConstants.h:113
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
Definition: OMPIRBuilder.cpp:92
LoopPeel.h
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
Value.h
llvm::OpenMPIRBuilder::ReductionInfo::AtomicReductionGen
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Definition: OMPIRBuilder.h:575
TargetRegistry.h
llvm::CanonicalLoopInfo::getBody
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
Definition: OMPIRBuilder.h:1460
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:674
llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:521
getKmpcForStaticInitForType
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Definition: OMPIRBuilder.cpp:1449
llvm::Directive
Definition: DirectiveEmitter.h:100
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, Optional< bool > UserAllowPeeling, Optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:612
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::AtomicOrdering::NotAtomic
@ NotAtomic
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:97
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Target::createTargetMachine
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM=None, CodeGenOpt::Level OL=CodeGenOpt::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Definition: TargetRegistry.h:449
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::OpenMPIRBuilder::LocationDescription::IP
InsertPointTy IP
Definition: OMPIRBuilder.h:157
llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:754