LLVM  15.0.0git
ExpandVectorPredication.cpp
Go to the documentation of this file.
1 //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for vector predication intrinsics, allowing
10 // targets to enable vector predication until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/Passes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/InstIterator.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/InitializePasses.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/Compiler.h"
30 #include "llvm/Support/Debug.h"
31 
32 using namespace llvm;
33 
36 
37 // Keep this in sync with TargetTransformInfo::VPLegalization.
38 #define VPINTERNAL_VPLEGAL_CASES \
39  VPINTERNAL_CASE(Legal) \
40  VPINTERNAL_CASE(Discard) \
41  VPINTERNAL_CASE(Convert)
42 
43 #define VPINTERNAL_CASE(X) "|" #X
44 
45 // Override options.
47  "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
48  cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
49  ". If non-empty, ignore "
50  "TargetTransformInfo and "
51  "always use this transformation for the %evl parameter (Used in "
52  "testing)."));
53 
55  "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
56  cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
57  ". If non-empty, Ignore "
58  "TargetTransformInfo and "
59  "always use this transformation for the %mask parameter (Used in "
60  "testing)."));
61 
62 #undef VPINTERNAL_CASE
63 #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
64 
65 static VPTransform parseOverrideOption(const std::string &TextOpt) {
67 }
68 
69 #undef VPINTERNAL_VPLEGAL_CASES
70 
71 // Whether any override options are set.
72 static bool anyExpandVPOverridesSet() {
73  return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
74 }
75 
76 #define DEBUG_TYPE "expandvp"
77 
78 STATISTIC(NumFoldedVL, "Number of folded vector length params");
79 STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
80 
81 ///// Helpers {
82 
83 /// \returns Whether the vector mask \p MaskVal has all lane bits set.
84 static bool isAllTrueMask(Value *MaskVal) {
85  auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
86  return ConstVec && ConstVec->isAllOnesValue();
87 }
88 
89 /// \returns A non-excepting divisor constant for this type.
90 static Constant *getSafeDivisor(Type *DivTy) {
91  assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
92  return ConstantInt::get(DivTy, 1u, false);
93 }
94 
95 /// Transfer operation properties from \p OldVPI to \p NewVal.
96 static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
97  auto *NewInst = dyn_cast<Instruction>(&NewVal);
98  if (!NewInst || !isa<FPMathOperator>(NewVal))
99  return;
100 
101  auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
102  if (!OldFMOp)
103  return;
104 
105  NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
106 }
107 
108 /// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
109 /// OldVP gets erased.
110 static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
111  transferDecorations(NewOp, OldOp);
112  OldOp.replaceAllUsesWith(&NewOp);
113  OldOp.eraseFromParent();
114 }
115 
116 static bool maySpeculateLanes(VPIntrinsic &VPI) {
117  // The result of VP reductions depends on the mask and evl.
118  if (isa<VPReductionIntrinsic>(VPI))
119  return false;
120  // Fallback to whether the intrinsic is speculatable.
122  unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
123  return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc,
124  cast<Operator>(&VPI));
125 }
126 
127 //// } Helpers
128 
129 namespace {
130 
131 // Expansion pass state at function scope.
132 struct CachingVPExpander {
133  Function &F;
134  const TargetTransformInfo &TTI;
135 
136  /// \returns A (fixed length) vector with ascending integer indices
137  /// (<0, 1, ..., NumElems-1>).
138  /// \p Builder
139  /// Used for instruction creation.
140  /// \p LaneTy
141  /// Integer element type of the result vector.
142  /// \p NumElems
143  /// Number of vector elements.
144  Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
145  unsigned NumElems);
146 
147  /// \returns A bitmask that is true where the lane position is less-than \p
148  /// EVLParam
149  ///
150  /// \p Builder
151  /// Used for instruction creation.
152  /// \p VLParam
153  /// The explicit vector length parameter to test against the lane
154  /// positions.
155  /// \p ElemCount
156  /// Static (potentially scalable) number of vector elements.
157  Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
158  ElementCount ElemCount);
159 
160  Value *foldEVLIntoMask(VPIntrinsic &VPI);
161 
162  /// "Remove" the %evl parameter of \p PI by setting it to the static vector
163  /// length of the operation.
164  void discardEVLParameter(VPIntrinsic &PI);
165 
166  /// \brief Lower this VP binary operator to a unpredicated binary operator.
167  Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
168  VPIntrinsic &PI);
169 
170  /// \brief Lower this VP reduction to a call to an unpredicated reduction
171  /// intrinsic.
172  Value *expandPredicationInReduction(IRBuilder<> &Builder,
174 
175  /// \brief Query TTI and expand the vector predication in \p P accordingly.
176  Value *expandPredication(VPIntrinsic &PI);
177 
178  /// \brief Determine how and whether the VPIntrinsic \p VPI shall be
179  /// expanded. This overrides TTI with the cl::opts listed at the top of this
180  /// file.
181  VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
182  bool UsingTTIOverrides;
183 
184 public:
185  CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
186  : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
187 
188  bool expandVectorPredication();
189 };
190 
191 //// CachingVPExpander {
192 
193 Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
194  unsigned NumElems) {
195  // TODO add caching
196  SmallVector<Constant *, 16> ConstElems;
197 
198  for (unsigned Idx = 0; Idx < NumElems; ++Idx)
199  ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
200 
201  return ConstantVector::get(ConstElems);
202 }
203 
204 Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
205  Value *EVLParam,
206  ElementCount ElemCount) {
207  // TODO add caching
208  // Scalable vector %evl conversion.
209  if (ElemCount.isScalable()) {
210  auto *M = Builder.GetInsertBlock()->getModule();
211  Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
212  Function *ActiveMaskFunc = Intrinsic::getDeclaration(
213  M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
214  // `get_active_lane_mask` performs an implicit less-than comparison.
215  Value *ConstZero = Builder.getInt32(0);
216  return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
217  }
218 
219  // Fixed vector %evl conversion.
220  Type *LaneTy = EVLParam->getType();
221  unsigned NumElems = ElemCount.getFixedValue();
222  Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
223  Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
224  return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
225 }
226 
227 Value *
228 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
229  VPIntrinsic &VPI) {
231  "Implicitly dropping %evl in non-speculatable operator!");
232 
233  auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
235 
236  Value *Op0 = VPI.getOperand(0);
237  Value *Op1 = VPI.getOperand(1);
238  Value *Mask = VPI.getMaskParam();
239 
240  // Blend in safe operands.
241  if (Mask && !isAllTrueMask(Mask)) {
242  switch (OC) {
243  default:
244  // Can safely ignore the predicate.
245  break;
246 
247  // Division operators need a safe divisor on masked-off lanes (1).
248  case Instruction::UDiv:
249  case Instruction::SDiv:
250  case Instruction::URem:
251  case Instruction::SRem:
252  // 2nd operand must not be zero.
253  Value *SafeDivisor = getSafeDivisor(VPI.getType());
254  Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
255  }
256  }
257 
258  Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
259 
260  replaceOperation(*NewBinOp, VPI);
261  return NewBinOp;
262 }
263 
264 static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
265  Type *EltTy) {
266  bool Negative = false;
267  unsigned EltBits = EltTy->getScalarSizeInBits();
268  switch (VPI.getIntrinsicID()) {
269  default:
270  llvm_unreachable("Expecting a VP reduction intrinsic");
271  case Intrinsic::vp_reduce_add:
272  case Intrinsic::vp_reduce_or:
273  case Intrinsic::vp_reduce_xor:
274  case Intrinsic::vp_reduce_umax:
275  return Constant::getNullValue(EltTy);
276  case Intrinsic::vp_reduce_mul:
277  return ConstantInt::get(EltTy, 1, /*IsSigned*/ false);
278  case Intrinsic::vp_reduce_and:
279  case Intrinsic::vp_reduce_umin:
280  return ConstantInt::getAllOnesValue(EltTy);
281  case Intrinsic::vp_reduce_smin:
282  return ConstantInt::get(EltTy->getContext(),
283  APInt::getSignedMaxValue(EltBits));
284  case Intrinsic::vp_reduce_smax:
285  return ConstantInt::get(EltTy->getContext(),
286  APInt::getSignedMinValue(EltBits));
287  case Intrinsic::vp_reduce_fmax:
288  Negative = true;
290  case Intrinsic::vp_reduce_fmin: {
291  FastMathFlags Flags = VPI.getFastMathFlags();
292  const fltSemantics &Semantics = EltTy->getFltSemantics();
293  return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
294  : !Flags.noInfs()
295  ? ConstantFP::getInfinity(EltTy, Negative)
296  : ConstantFP::get(EltTy,
297  APFloat::getLargest(Semantics, Negative));
298  }
299  case Intrinsic::vp_reduce_fadd:
300  return ConstantFP::getNegativeZero(EltTy);
301  case Intrinsic::vp_reduce_fmul:
302  return ConstantFP::get(EltTy, 1.0);
303  }
304 }
305 
306 Value *
307 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
308  VPReductionIntrinsic &VPI) {
310  "Implicitly dropping %evl in non-speculatable operator!");
311 
312  Value *Mask = VPI.getMaskParam();
313  Value *RedOp = VPI.getOperand(VPI.getVectorParamPos());
314 
315  // Insert neutral element in masked-out positions
316  if (Mask && !isAllTrueMask(Mask)) {
317  auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType());
318  auto *NeutralVector = Builder.CreateVectorSplat(
319  cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt);
320  RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector);
321  }
322 
323  Value *Reduction;
324  Value *Start = VPI.getOperand(VPI.getStartParamPos());
325 
326  switch (VPI.getIntrinsicID()) {
327  default:
328  llvm_unreachable("Impossible reduction kind");
329  case Intrinsic::vp_reduce_add:
330  Reduction = Builder.CreateAddReduce(RedOp);
331  Reduction = Builder.CreateAdd(Reduction, Start);
332  break;
333  case Intrinsic::vp_reduce_mul:
334  Reduction = Builder.CreateMulReduce(RedOp);
335  Reduction = Builder.CreateMul(Reduction, Start);
336  break;
337  case Intrinsic::vp_reduce_and:
338  Reduction = Builder.CreateAndReduce(RedOp);
339  Reduction = Builder.CreateAnd(Reduction, Start);
340  break;
341  case Intrinsic::vp_reduce_or:
342  Reduction = Builder.CreateOrReduce(RedOp);
343  Reduction = Builder.CreateOr(Reduction, Start);
344  break;
345  case Intrinsic::vp_reduce_xor:
346  Reduction = Builder.CreateXorReduce(RedOp);
347  Reduction = Builder.CreateXor(Reduction, Start);
348  break;
349  case Intrinsic::vp_reduce_smax:
350  Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true);
351  Reduction =
352  Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
353  break;
354  case Intrinsic::vp_reduce_smin:
355  Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true);
356  Reduction =
357  Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
358  break;
359  case Intrinsic::vp_reduce_umax:
360  Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false);
361  Reduction =
362  Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
363  break;
364  case Intrinsic::vp_reduce_umin:
365  Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false);
366  Reduction =
367  Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
368  break;
369  case Intrinsic::vp_reduce_fmax:
370  Reduction = Builder.CreateFPMaxReduce(RedOp);
372  Reduction =
373  Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
374  break;
375  case Intrinsic::vp_reduce_fmin:
376  Reduction = Builder.CreateFPMinReduce(RedOp);
378  Reduction =
379  Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
380  break;
381  case Intrinsic::vp_reduce_fadd:
382  Reduction = Builder.CreateFAddReduce(Start, RedOp);
383  break;
384  case Intrinsic::vp_reduce_fmul:
385  Reduction = Builder.CreateFMulReduce(Start, RedOp);
386  break;
387  }
388 
390  return Reduction;
391 }
392 
393 void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
394  LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
395 
396  if (VPI.canIgnoreVectorLengthParam())
397  return;
398 
399  Value *EVLParam = VPI.getVectorLengthParam();
400  if (!EVLParam)
401  return;
402 
403  ElementCount StaticElemCount = VPI.getStaticVectorLength();
404  Value *MaxEVL = nullptr;
406  if (StaticElemCount.isScalable()) {
407  // TODO add caching
408  auto *M = VPI.getModule();
409  Function *VScaleFunc =
410  Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
412  Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
413  Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
414  MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
415  /*NUW*/ true, /*NSW*/ false);
416  } else {
417  MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
418  }
419  VPI.setVectorLengthParam(MaxEVL);
420 }
421 
422 Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
423  LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
424 
425  IRBuilder<> Builder(&VPI);
426 
427  // Ineffective %evl parameter and so nothing to do here.
428  if (VPI.canIgnoreVectorLengthParam())
429  return &VPI;
430 
431  // Only VP intrinsics can have an %evl parameter.
432  Value *OldMaskParam = VPI.getMaskParam();
433  Value *OldEVLParam = VPI.getVectorLengthParam();
434  assert(OldMaskParam && "no mask param to fold the vl param into");
435  assert(OldEVLParam && "no EVL param to fold away");
436 
437  LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
438  LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
439 
440  // Convert the %evl predication into vector mask predication.
441  ElementCount ElemCount = VPI.getStaticVectorLength();
442  Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
443  Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
444  VPI.setMaskParam(NewMaskParam);
445 
446  // Drop the %evl parameter.
447  discardEVLParameter(VPI);
449  "transformation did not render the evl param ineffective!");
450 
451  // Reassess the modified instruction.
452  return &VPI;
453 }
454 
455 Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
456  LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
457 
458  IRBuilder<> Builder(&VPI);
459 
460  // Try lowering to a LLVM instruction first.
461  auto OC = VPI.getFunctionalOpcode();
462 
463  if (OC && Instruction::isBinaryOp(*OC))
464  return expandPredicationInBinaryOperator(Builder, VPI);
465 
466  if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
467  return expandPredicationInReduction(Builder, *VPRI);
468 
469  return &VPI;
470 }
471 
472 //// } CachingVPExpander
473 
474 struct TransformJob {
475  VPIntrinsic *PI;
477  TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
478  : PI(PI), Strategy(InitStrat) {}
479 
480  bool isDone() const { return Strategy.shouldDoNothing(); }
481 };
482 
483 void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
484  // Operations with speculatable lanes do not strictly need predication.
485  if (maySpeculateLanes(VPI)) {
486  // Converting a speculatable VP intrinsic means dropping %mask and %evl.
487  // No need to expand %evl into the %mask only to ignore that code.
488  if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
490  return;
491  }
492 
493  // We have to preserve the predicating effect of %evl for this
494  // non-speculatable VP intrinsic.
495  // 1) Never discard %evl.
496  // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
497  // %evl gets folded into %mask.
498  if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
499  (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
501  }
502 }
503 
505 CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
506  auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
507  if (LLVM_LIKELY(!UsingTTIOverrides)) {
508  // No overrides - we are in production.
509  return VPStrat;
510  }
511 
512  // Overrides set - we are in testing, the following does not need to be
513  // efficient.
515  VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
516  return VPStrat;
517 }
518 
519 /// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
520 bool CachingVPExpander::expandVectorPredication() {
522 
523  // Collect all VPIntrinsics that need expansion and determine their expansion
524  // strategy.
525  for (auto &I : instructions(F)) {
526  auto *VPI = dyn_cast<VPIntrinsic>(&I);
527  if (!VPI)
528  continue;
529  auto VPStrat = getVPLegalizationStrategy(*VPI);
530  sanitizeStrategy(*VPI, VPStrat);
531  if (!VPStrat.shouldDoNothing())
532  Worklist.emplace_back(VPI, VPStrat);
533  }
534  if (Worklist.empty())
535  return false;
536 
537  // Transform all VPIntrinsics on the worklist.
538  LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
539  << " instructions ::::\n");
540  for (TransformJob Job : Worklist) {
541  // Transform the EVL parameter.
542  switch (Job.Strategy.EVLParamStrategy) {
544  break;
546  discardEVLParameter(*Job.PI);
547  break;
549  if (foldEVLIntoMask(*Job.PI))
550  ++NumFoldedVL;
551  break;
552  }
553  Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
554 
555  // Replace with a non-predicated operation.
556  switch (Job.Strategy.OpStrategy) {
558  break;
560  llvm_unreachable("Invalid strategy for operators.");
562  expandPredication(*Job.PI);
563  ++NumLoweredVPOps;
564  break;
565  }
566  Job.Strategy.OpStrategy = VPLegalization::Legal;
567 
568  assert(Job.isDone() && "incomplete transformation");
569  }
570 
571  return true;
572 }
573 class ExpandVectorPredication : public FunctionPass {
574 public:
575  static char ID;
576  ExpandVectorPredication() : FunctionPass(ID) {
578  }
579 
580  bool runOnFunction(Function &F) override {
581  const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
582  CachingVPExpander VPExpander(F, *TTI);
583  return VPExpander.expandVectorPredication();
584  }
585 
586  void getAnalysisUsage(AnalysisUsage &AU) const override {
588  AU.setPreservesCFG();
589  }
590 };
591 } // namespace
592 
594 INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
595  "Expand vector predication intrinsics", false, false)
598 INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
599  "Expand vector predication intrinsics", false, false)
600 
602  return new ExpandVectorPredication();
603 }
604 
607  const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
608  CachingVPExpander VPExpander(F, TTI);
609  if (!VPExpander.expandVectorPredication())
610  return PreservedAnalyses::all();
612  PA.preserveSet<CFGAnalyses>();
613  return PA;
614 }
transferDecorations
static void transferDecorations(Value &NewVal, VPIntrinsic &VPI)
Transfer operation properties from OldVPI to NewVal.
Definition: ExpandVectorPredication.cpp:96
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2479
llvm::initializeExpandVectorPredicationPass
void initializeExpandVectorPredicationPass(PassRegistry &)
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
expandvp
expandvp
Definition: ExpandVectorPredication.cpp:598
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1418
IntrinsicInst.h
llvm::ElementCount
Definition: TypeSize.h:404
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
InstIterator.h
llvm::Function
Definition: Function.h:60
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::TargetTransformInfo::getVPLegalizationStrategy
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
Definition: TargetTransformInfo.cpp:1093
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.cpp:67
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::VPIntrinsic::setVectorLengthParam
void setVectorLengthParam(Value *)
Definition: IntrinsicInst.cpp:332
llvm::IRBuilder<>
ValueTracking.h
llvm::VPIntrinsic::canIgnoreVectorLengthParam
bool canIgnoreVectorLengthParam() const
Definition: IntrinsicInst.cpp:447
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetTransformInfo::VPLegalization
Definition: TargetTransformInfo.h:1448
llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetTransformInfo::VPLegalization::Convert
@ Convert
Definition: TargetTransformInfo.h:1455
llvm::Optional< unsigned >
parseOverrideOption
static VPTransform parseOverrideOption(const std::string &TextOpt)
Definition: ExpandVectorPredication.cpp:65
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::ConstantFP::getNegativeZero
static Constant * getNegativeZero(Type *Ty)
Definition: Constants.h:293
llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition: FMF.h:67
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2137
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::VPIntrinsic::setMaskParam
void setMaskParam(Value *)
Definition: IntrinsicInst.cpp:321
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
intrinsics
Expand vector predication intrinsics
Definition: ExpandVectorPredication.cpp:599
Constants.h
llvm::VPReductionIntrinsic
This represents vector predication reduction intrinsics.
Definition: IntrinsicInst.h:455
Intrinsics.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::isSafeToSpeculativelyExecuteWithOpcode
bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Operator *Inst, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
Definition: ValueTracking.cpp:4698
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
llvm::VPIntrinsic::getVectorLengthParam
Value * getVectorLengthParam() const
Definition: IntrinsicInst.cpp:326
llvm::TargetTransformInfo::VPLegalization::EVLParamStrategy
VPTransform EVLParamStrategy
Definition: TargetTransformInfo.h:1462
Passes.h
llvm::VPIntrinsic::getFunctionalOpcode
Optional< unsigned > getFunctionalOpcode() const
Definition: IntrinsicInst.h:446
ExpandVectorPredication.h
llvm::TargetTransformInfo::VPLegalization::VPTransform
VPTransform
Definition: TargetTransformInfo.h:1449
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::APFloat
Definition: APFloat.h:701
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::ConstantFP::getQNaN
static Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
Definition: Constants.cpp:1027
llvm::VPIntrinsic::getStaticVectorLength
ElementCount getStaticVectorLength() const
Definition: IntrinsicInst.cpp:298
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
EVLTransformOverride
static cl::opt< std::string > EVLTransformOverride("expandvp-override-evl-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, ignore " "TargetTransformInfo and " "always use this transformation for the %evl parameter (Used in " "testing)."))
llvm::TargetTransformInfo::VPLegalization::OpStrategy
VPTransform OpStrategy
Definition: TargetTransformInfo.h:1468
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
replaceOperation
static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp)
Transfer all properties from OldOp to NewOp and replace all uses.
Definition: ExpandVectorPredication.cpp:110
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::TargetTransformInfo::VPLegalization::shouldDoNothing
bool shouldDoNothing() const
Definition: TargetTransformInfo.h:1470
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
anyExpandVPOverridesSet
static bool anyExpandVPOverridesSet()
Definition: ExpandVectorPredication.cpp:72
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetTransformInfo::VPLegalization::Discard
@ Discard
Definition: TargetTransformInfo.h:1453
llvm::SystemZISD::OC
@ OC
Definition: SystemZISelLowering.h:122
llvm::createExpandVectorPredicationPass
FunctionPass * createExpandVectorPredicationPass()
This pass expands the vector predication intrinsics into unpredicated instructions with selects or ju...
Definition: ExpandVectorPredication.cpp:601
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:162
predication
loop predication
Definition: LoopPredication.cpp:354
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2127
llvm::Instruction::getFastMathFlags
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition: Instruction.cpp:289
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
llvm::LinearPolySize::getFixedValue
ScalarTy getFixedValue() const
Definition: TypeSize.h:312
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
Compiler.h
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1397
llvm::VPReductionIntrinsic::getStartParamPos
unsigned getStartParamPos() const
Definition: IntrinsicInst.cpp:629
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
MaskTransformOverride
static cl::opt< std::string > MaskTransformOverride("expandvp-override-mask-transform", cl::init(""), cl::Hidden, cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES ". If non-empty, Ignore " "TargetTransformInfo and " "always use this transformation for the %mask parameter (Used in " "testing)."))
llvm::ExpandVectorPredicationPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: ExpandVectorPredication.cpp:606
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", "Expand vector predication intrinsics", false, false) INITIALIZE_PASS_END(ExpandVectorPredication
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2142
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::VPIntrinsic
This is the common base class for vector predication intrinsics.
Definition: IntrinsicInst.h:391
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
VPLegalization
TargetTransformInfo::VPLegalization VPLegalization
Definition: ExpandVectorPredication.cpp:34
llvm::VPReductionIntrinsic::getVectorParamPos
unsigned getVectorParamPos() const
Definition: IntrinsicInst.cpp:625
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:975
maySpeculateLanes
static bool maySpeculateLanes(VPIntrinsic &VPI)
Definition: ExpandVectorPredication.cpp:116
llvm::fltSemantics
Definition: APFloat.cpp:54
Function.h
VPINTERNAL_VPLEGAL_CASES
#define VPINTERNAL_VPLEGAL_CASES
Definition: ExpandVectorPredication.cpp:38
llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:199
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:786
Instructions.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::TargetTransformInfo::VPLegalization::Legal
@ Legal
Definition: TargetTransformInfo.h:1451
getSafeDivisor
static Constant * getSafeDivisor(Type *DivTy)
Definition: ExpandVectorPredication.cpp:90
TargetTransformInfo.h
LLVM_LIKELY
#define LLVM_LIKELY(EXPR)
Definition: Compiler.h:219
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::VPIntrinsic::getMaskParam
Value * getMaskParam() const
Definition: IntrinsicInst.cpp:315
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
isAllTrueMask
static bool isAllTrueMask(Value *MaskVal)
Definition: ExpandVectorPredication.cpp:84
Reduction
loop Loop Strength Reduction
Definition: LoopStrengthReduce.cpp:6711
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:318
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2132
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38