LLVM  9.0.0svn
ARMCodeGenPrepare.cpp
Go to the documentation of this file.
1 //===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass inserts intrinsics to handle small types that would otherwise be
11 /// promoted during legalization. Here we can manually promote types or insert
12 /// intrinsics which can handle narrow types that aren't supported by the
13 /// register classes.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ARM.h"
18 #include "ARMSubtarget.h"
19 #include "ARMTargetMachine.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/IR/Attributes.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/IR/Verifier.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
38 
39 #define DEBUG_TYPE "arm-codegenprepare"
40 
41 using namespace llvm;
42 
43 static cl::opt<bool>
44 DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true),
45  cl::desc("Disable ARM specific CodeGenPrepare pass"));
46 
47 static cl::opt<bool>
48 EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false),
49  cl::desc("Use DSP instructions for scalar operations"));
50 
51 static cl::opt<bool>
52 EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
53  cl::desc("Use DSP instructions for scalar operations\
54  with immediate operands"));
55 
56 // The goal of this pass is to enable more efficient code generation for
57 // operations on narrow types (i.e. types with < 32-bits) and this is a
58 // motivating IR code example:
59 //
60 // define hidden i32 @cmp(i8 zeroext) {
61 // %2 = add i8 %0, -49
62 // %3 = icmp ult i8 %2, 3
63 // ..
64 // }
65 //
66 // The issue here is that i8 is type-legalized to i32 because i8 is not a
67 // legal type. Thus, arithmetic is done in integer-precision, but then the
68 // byte value is masked out as follows:
69 //
70 // t19: i32 = add t4, Constant:i32<-49>
71 // t24: i32 = and t19, Constant:i32<255>
72 //
73 // Consequently, we generate code like this:
74 //
75 // subs r0, #49
76 // uxtb r1, r0
77 // cmp r1, #3
78 //
79 // This shows that masking out the byte value results in generation of
80 // the UXTB instruction. This is not optimal as r0 already contains the byte
81 // value we need, and so instead we can just generate:
82 //
83 // sub.w r1, r0, #49
84 // cmp r1, #3
85 //
86 // We achieve this by type promoting the IR to i32 like so for this example:
87 //
88 // define i32 @cmp(i8 zeroext %c) {
89 // %0 = zext i8 %c to i32
90 // %c.off = add i32 %0, -49
91 // %1 = icmp ult i32 %c.off, 3
92 // ..
93 // }
94 //
95 // For this to be valid and legal, we need to prove that the i32 add is
96 // producing the same value as the i8 addition, and that e.g. no overflow
97 // happens.
98 //
99 // A brief sketch of the algorithm and some terminology.
100 // We pattern match interesting IR patterns:
101 // - which have "sources": instructions producing narrow values (i8, i16), and
102 // - they have "sinks": instructions consuming these narrow values.
103 //
104 // We collect all instruction connecting sources and sinks in a worklist, so
105 // that we can mutate these instruction and perform type promotion when it is
106 // legal to do so.
107 
108 namespace {
109 class IRPromoter {
110  SmallPtrSet<Value*, 8> NewInsts;
111  SmallPtrSet<Instruction*, 4> InstsToRemove;
113  SmallPtrSet<Value*, 8> Promoted;
114  Module *M = nullptr;
115  LLVMContext &Ctx;
116  // The type we promote to: always i32
117  IntegerType *ExtTy = nullptr;
118  // The type of the value that the search began from, either i8 or i16.
119  // This defines the max range of the values that we allow in the promoted
120  // tree.
121  IntegerType *OrigTy = nullptr;
122  SetVector<Value*> *Visited;
123  SmallPtrSetImpl<Value*> *Sources;
125  SmallPtrSetImpl<Instruction*> *SafeToPromote;
126 
127  void ReplaceAllUsersOfWith(Value *From, Value *To);
128  void PrepareConstants(void);
129  void ExtendSources(void);
130  void ConvertTruncs(void);
131  void PromoteTree(void);
132  void TruncateSinks(void);
133  void Cleanup(void);
134 
135 public:
136  IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
137  ExtTy(Type::getInt32Ty(Ctx)) { }
138 
139 
140  void Mutate(Type *OrigTy,
141  SetVector<Value*> &Visited,
142  SmallPtrSetImpl<Value*> &Sources,
144  SmallPtrSetImpl<Instruction*> &SafeToPromote);
145 };
146 
147 class ARMCodeGenPrepare : public FunctionPass {
148  const ARMSubtarget *ST = nullptr;
149  IRPromoter *Promoter = nullptr;
150  std::set<Value*> AllVisited;
151  SmallPtrSet<Instruction*, 8> SafeToPromote;
152 
153  bool isSafeOverflow(Instruction *I);
154  bool isSupportedValue(Value *V);
155  bool isLegalToPromote(Value *V);
156  bool TryToPromote(Value *V);
157 
158 public:
159  static char ID;
160  static unsigned TypeSize;
161  Type *OrigTy = nullptr;
162 
163  ARMCodeGenPrepare() : FunctionPass(ID) {}
164 
165  void getAnalysisUsage(AnalysisUsage &AU) const override {
167  }
168 
169  StringRef getPassName() const override { return "ARM IR optimizations"; }
170 
171  bool doInitialization(Module &M) override;
172  bool runOnFunction(Function &F) override;
173  bool doFinalization(Module &M) override;
174 };
175 
176 }
177 
178 static bool GenerateSignBits(Value *V) {
179  if (auto *Arg = dyn_cast<Argument>(V))
180  return Arg->hasSExtAttr();
181 
182  if (!isa<Instruction>(V))
183  return false;
184 
185  unsigned Opc = cast<Instruction>(V)->getOpcode();
186  return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
187  Opc == Instruction::SRem || Opc == Instruction::SExt ||
188  Opc == Instruction::SIToFP;
189 }
190 
191 static bool EqualTypeSize(Value *V) {
192  return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
193 }
194 
195 static bool LessOrEqualTypeSize(Value *V) {
196  return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
197 }
198 
199 static bool GreaterThanTypeSize(Value *V) {
200  return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
201 }
202 
203 static bool LessThanTypeSize(Value *V) {
204  return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize;
205 }
206 
207 /// Some instructions can use 8- and 16-bit operands, and we don't need to
208 /// promote anything larger. We disallow booleans to make life easier when
209 /// dealing with icmps but allow any other integer that is <= 16 bits. Void
210 /// types are accepted so we can handle switches.
211 static bool isSupportedType(Value *V) {
212  Type *Ty = V->getType();
213 
214  // Allow voids and pointers, these won't be promoted.
215  if (Ty->isVoidTy() || Ty->isPointerTy())
216  return true;
217 
218  if (auto *Ld = dyn_cast<LoadInst>(V))
219  Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
220 
221  if (!isa<IntegerType>(Ty) ||
222  cast<IntegerType>(V->getType())->getBitWidth() == 1)
223  return false;
224 
225  return LessOrEqualTypeSize(V);
226 }
227 
228 /// Return true if the given value is a source in the use-def chain, producing
229 /// a narrow 'TypeSize' value. These values will be zext to start the promotion
230 /// of the tree to i32. We guarantee that these won't populate the upper bits
231 /// of the register. ZExt on the loads will be free, and the same for call
232 /// return values because we only accept ones that guarantee a zeroext ret val.
233 /// Many arguments will have the zeroext attribute too, so those would be free
234 /// too.
235 static bool isSource(Value *V) {
236  if (!isa<IntegerType>(V->getType()))
237  return false;
238 
239  // TODO Allow zext to be sources.
240  if (isa<Argument>(V))
241  return true;
242  else if (isa<LoadInst>(V))
243  return true;
244  else if (isa<BitCastInst>(V))
245  return true;
246  else if (auto *Call = dyn_cast<CallInst>(V))
247  return Call->hasRetAttr(Attribute::AttrKind::ZExt);
248  else if (auto *Trunc = dyn_cast<TruncInst>(V))
249  return EqualTypeSize(Trunc);
250  return false;
251 }
252 
253 /// Return true if V will require any promoted values to be truncated for the
254 /// the IR to remain valid. We can't mutate the value type of these
255 /// instructions.
256 static bool isSink(Value *V) {
257  // TODO The truncate also isn't actually necessary because we would already
258  // proved that the data value is kept within the range of the original data
259  // type.
260 
261  // Sinks are:
262  // - points where the value in the register is being observed, such as an
263  // icmp, switch or store.
264  // - points where value types have to match, such as calls and returns.
265  // - zext are included to ease the transformation and are generally removed
266  // later on.
267  if (auto *Store = dyn_cast<StoreInst>(V))
268  return LessOrEqualTypeSize(Store->getValueOperand());
269  if (auto *Return = dyn_cast<ReturnInst>(V))
270  return LessOrEqualTypeSize(Return->getReturnValue());
271  if (auto *ZExt = dyn_cast<ZExtInst>(V))
272  return GreaterThanTypeSize(ZExt);
273  if (auto *Switch = dyn_cast<SwitchInst>(V))
274  return LessThanTypeSize(Switch->getCondition());
275  if (auto *ICmp = dyn_cast<ICmpInst>(V))
276  return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
277 
278  return isa<CallInst>(V);
279 }
280 
281 /// Return whether the instruction can be promoted within any modifications to
282 /// its operands or result.
283 bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
284  // FIXME Do we need NSW too?
285  if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
286  return true;
287 
288  // We can support a, potentially, overflowing instruction (I) if:
289  // - It is only used by an unsigned icmp.
290  // - The icmp uses a constant.
291  // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
292  // around zero to become a larger number than before.
293  // - The underflowing instruction (I) also uses a constant.
294  //
295  // We can then use the two constants to calculate whether the result would
296  // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
297  // just underflows the range, the icmp would give the same result whether the
298  // result has been truncated or not. We calculate this by:
299  // - Zero extending both constants, if needed, to 32-bits.
300  // - Take the absolute value of I's constant, adding this to the icmp const.
301  // - Check that this value is not out of range for small type. If it is, it
302  // means that it has underflowed enough to wrap around the icmp constant.
303  //
304  // For example:
305  //
306  // %sub = sub i8 %a, 2
307  // %cmp = icmp ule i8 %sub, 254
308  //
309  // If %a = 0, %sub = -2 == FE == 254
310  // But if this is evalulated as a i32
311  // %sub = -2 == FF FF FF FE == 4294967294
312  // So the unsigned compares (i8 and i32) would not yield the same result.
313  //
314  // Another way to look at it is:
315  // %a - 2 <= 254
316  // %a + 2 <= 254 + 2
317  // %a <= 256
318  // And we can't represent 256 in the i8 format, so we don't support it.
319  //
320  // Whereas:
321  //
322  // %sub i8 %a, 1
323  // %cmp = icmp ule i8 %sub, 254
324  //
325  // If %a = 0, %sub = -1 == FF == 255
326  // As i32:
327  // %sub = -1 == FF FF FF FF == 4294967295
328  //
329  // In this case, the unsigned compare results would be the same and this
330  // would also be true for ult, uge and ugt:
331  // - (255 < 254) == (0xFFFFFFFF < 254) == false
332  // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
333  // - (255 > 254) == (0xFFFFFFFF > 254) == true
334  // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
335  //
336  // To demonstrate why we can't handle increasing values:
337  //
338  // %add = add i8 %a, 2
339  // %cmp = icmp ult i8 %add, 127
340  //
341  // If %a = 254, %add = 256 == (i8 1)
342  // As i32:
343  // %add = 256
344  //
345  // (1 < 127) != (256 < 127)
346 
347  unsigned Opc = I->getOpcode();
348  if (Opc != Instruction::Add && Opc != Instruction::Sub)
349  return false;
350 
351  if (!I->hasOneUse() ||
352  !isa<ICmpInst>(*I->user_begin()) ||
353  !isa<ConstantInt>(I->getOperand(1)))
354  return false;
355 
356  ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
357  bool NegImm = OverflowConst->isNegative();
358  bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
359  ((Opc == Instruction::Add) && NegImm);
360  if (!IsDecreasing)
361  return false;
362 
363  // Don't support an icmp that deals with sign bits.
364  auto *CI = cast<ICmpInst>(*I->user_begin());
365  if (CI->isSigned() || CI->isEquality())
366  return false;
367 
368  ConstantInt *ICmpConst = nullptr;
369  if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
370  ICmpConst = Const;
371  else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
372  ICmpConst = Const;
373  else
374  return false;
375 
376  // Now check that the result can't wrap on itself.
377  APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
378  ICmpConst->getValue().zext(32) : ICmpConst->getValue();
379 
380  Total += OverflowConst->getValue().getBitWidth() < 32 ?
381  OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
382 
383  APInt Max = APInt::getAllOnesValue(ARMCodeGenPrepare::TypeSize);
384 
385  if (Total.getBitWidth() > Max.getBitWidth()) {
386  if (Total.ugt(Max.zext(Total.getBitWidth())))
387  return false;
388  } else if (Max.getBitWidth() > Total.getBitWidth()) {
389  if (Total.zext(Max.getBitWidth()).ugt(Max))
390  return false;
391  } else if (Total.ugt(Max))
392  return false;
393 
394  LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
395  return true;
396 }
397 
398 static bool shouldPromote(Value *V) {
399  if (!isa<IntegerType>(V->getType()) || isSink(V))
400  return false;
401 
402  if (isSource(V))
403  return true;
404 
405  auto *I = dyn_cast<Instruction>(V);
406  if (!I)
407  return false;
408 
409  if (isa<ICmpInst>(I))
410  return false;
411 
412  return true;
413 }
414 
415 /// Return whether we can safely mutate V's type to ExtTy without having to be
416 /// concerned with zero extending or truncation.
417 static bool isPromotedResultSafe(Value *V) {
418  if (!isa<Instruction>(V))
419  return true;
420 
421  if (GenerateSignBits(V))
422  return false;
423 
424  return !isa<OverflowingBinaryOperator>(V);
425 }
426 
427 /// Return the intrinsic for the instruction that can perform the same
428 /// operation but on a narrow type. This is using the parallel dsp intrinsics
429 /// on scalar values.
431  // Whether we use the signed or unsigned versions of these intrinsics
432  // doesn't matter because we're not using the GE bits that they set in
433  // the APSR.
434  switch(I->getOpcode()) {
435  default:
436  break;
437  case Instruction::Add:
438  return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
439  Intrinsic::arm_uadd8;
440  case Instruction::Sub:
441  return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
442  Intrinsic::arm_usub8;
443  }
444  llvm_unreachable("unhandled opcode for narrow intrinsic");
445 }
446 
447 void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
449  Instruction *InstTo = dyn_cast<Instruction>(To);
450  bool ReplacedAll = true;
451 
452  LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To
453  << "\n");
454 
455  for (Use &U : From->uses()) {
456  auto *User = cast<Instruction>(U.getUser());
457  if (InstTo && User->isIdenticalTo(InstTo)) {
458  ReplacedAll = false;
459  continue;
460  }
461  Users.push_back(User);
462  }
463 
464  for (auto *U : Users)
465  U->replaceUsesOfWith(From, To);
466 
467  if (ReplacedAll)
468  if (auto *I = dyn_cast<Instruction>(From))
469  InstsToRemove.insert(I);
470 }
471 
472 void IRPromoter::PrepareConstants() {
473  IRBuilder<> Builder{Ctx};
474  // First step is to prepare the instructions for mutation. Most constants
475  // just need to be zero extended into their new type, but complications arise
476  // because:
477  // - For nuw binary operators, negative immediates would need sign extending;
478  // however, instead we'll change them to positive and zext them. We can do
479  // this because:
480  // > The operators that can wrap are: add, sub, mul and shl.
481  // > shl interprets its second operand as unsigned and if the first operand
482  // is an immediate, it will need zext to be nuw.
483  // > I'm assuming mul has to interpret immediates as unsigned for nuw.
484  // > Which leaves the nuw add and sub to be handled; as with shl, if an
485  // immediate is used as operand 0, it will need zext to be nuw.
486  // - We also allow add and sub to safely overflow in certain circumstances
487  // and only when the value (operand 0) is being decreased.
488  //
489  // For adds and subs, that are either nuw or safely wrap and use a negative
490  // immediate as operand 1, we create an equivalent instruction using a
491  // positive immediate. That positive immediate can then be zext along with
492  // all the other immediates later.
493  for (auto *V : *Visited) {
494  if (!isa<Instruction>(V))
495  continue;
496 
497  auto *I = cast<Instruction>(V);
498  if (SafeToPromote->count(I)) {
499 
500  if (!isa<OverflowingBinaryOperator>(I))
501  continue;
502 
503  if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
504  if (!Const->isNegative())
505  continue;
506 
507  unsigned Opc = I->getOpcode();
508  if (Opc != Instruction::Add && Opc != Instruction::Sub)
509  continue;
510 
511  LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
512  auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
513  Builder.SetInsertPoint(I);
514  Value *NewVal = Opc == Instruction::Sub ?
515  Builder.CreateAdd(I->getOperand(0), NewConst) :
516  Builder.CreateSub(I->getOperand(0), NewConst);
517  LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
518 
519  if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
520  NewInst->copyIRFlags(I);
521  NewInsts.insert(NewInst);
522  }
523  InstsToRemove.insert(I);
524  I->replaceAllUsesWith(NewVal);
525  }
526  }
527  }
528  for (auto *I : NewInsts)
529  Visited->insert(I);
530 }
531 
532 void IRPromoter::ExtendSources() {
533  IRBuilder<> Builder{Ctx};
534 
535  auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
536  assert(V->getType() != ExtTy && "zext already extends to i32");
537  LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
538  Builder.SetInsertPoint(InsertPt);
539  if (auto *I = dyn_cast<Instruction>(V))
540  Builder.SetCurrentDebugLocation(I->getDebugLoc());
541 
542  Value *ZExt = Builder.CreateZExt(V, ExtTy);
543  if (auto *I = dyn_cast<Instruction>(ZExt)) {
544  if (isa<Argument>(V))
545  I->moveBefore(InsertPt);
546  else
547  I->moveAfter(InsertPt);
548  NewInsts.insert(I);
549  }
550 
551  ReplaceAllUsersOfWith(V, ZExt);
552  };
553 
554  // Now, insert extending instructions between the sources and their users.
555  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n");
556  for (auto V : *Sources) {
557  LLVM_DEBUG(dbgs() << " - " << *V << "\n");
558  if (auto *I = dyn_cast<Instruction>(V))
559  InsertZExt(I, I);
560  else if (auto *Arg = dyn_cast<Argument>(V)) {
561  BasicBlock &BB = Arg->getParent()->front();
562  InsertZExt(Arg, &*BB.getFirstInsertionPt());
563  } else {
564  llvm_unreachable("unhandled source that needs extending");
565  }
566  Promoted.insert(V);
567  }
568 }
569 
570 void IRPromoter::PromoteTree() {
571  LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
572 
573  IRBuilder<> Builder{Ctx};
574 
575  // Mutate the types of the instructions within the tree. Here we handle
576  // constant operands.
577  for (auto *V : *Visited) {
578  if (Sources->count(V))
579  continue;
580 
581  auto *I = cast<Instruction>(V);
582  if (Sinks->count(I))
583  continue;
584 
585  for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
586  Value *Op = I->getOperand(i);
587  if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
588  continue;
589 
590  if (auto *Const = dyn_cast<ConstantInt>(Op)) {
591  Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
592  I->setOperand(i, NewConst);
593  } else if (isa<UndefValue>(Op))
594  I->setOperand(i, UndefValue::get(ExtTy));
595  }
596 
597  if (shouldPromote(I)) {
598  I->mutateType(ExtTy);
599  Promoted.insert(I);
600  }
601  }
602 
603  // Finally, any instructions that should be promoted but haven't yet been,
604  // need to be handled using intrinsics.
605  for (auto *V : *Visited) {
606  auto *I = dyn_cast<Instruction>(V);
607  if (!I)
608  continue;
609 
610  if (Sources->count(I) || Sinks->count(I))
611  continue;
612 
613  if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
614  continue;
615 
616  assert(EnableDSP && "DSP intrinisc insertion not enabled!");
617 
618  // Replace unsafe instructions with appropriate intrinsic calls.
619  LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
620  << *I << "\n");
621  Function *DSPInst =
623  Builder.SetInsertPoint(I);
624  Builder.SetCurrentDebugLocation(I->getDebugLoc());
625  Value *Args[] = { I->getOperand(0), I->getOperand(1) };
626  CallInst *Call = Builder.CreateCall(DSPInst, Args);
627  NewInsts.insert(Call);
628  ReplaceAllUsersOfWith(I, Call);
629  }
630 }
631 
632 void IRPromoter::TruncateSinks() {
633  LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n");
634 
635  IRBuilder<> Builder{Ctx};
636 
637  auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
638  if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
639  return nullptr;
640 
641  if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V))
642  return nullptr;
643 
644  LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
645  << *V << "\n");
646  Builder.SetInsertPoint(cast<Instruction>(V));
647  auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
648  if (Trunc)
649  NewInsts.insert(Trunc);
650  return Trunc;
651  };
652 
653  // Fix up any stores or returns that use the results of the promoted
654  // chain.
655  for (auto I : *Sinks) {
656  LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n");
657 
658  // Handle calls separately as we need to iterate over arg operands.
659  if (auto *Call = dyn_cast<CallInst>(I)) {
660  for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
661  Value *Arg = Call->getArgOperand(i);
662  Type *Ty = TruncTysMap[Call][i];
663  if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
664  Trunc->moveBefore(Call);
665  Call->setArgOperand(i, Trunc);
666  }
667  }
668  continue;
669  }
670 
671  // Special case switches because we need to truncate the condition.
672  if (auto *Switch = dyn_cast<SwitchInst>(I)) {
673  Type *Ty = TruncTysMap[Switch][0];
674  if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
675  Trunc->moveBefore(Switch);
676  Switch->setCondition(Trunc);
677  }
678  continue;
679  }
680 
681  // Now handle the others.
682  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
683  Type *Ty = TruncTysMap[I][i];
684  if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
685  Trunc->moveBefore(I);
686  I->setOperand(i, Trunc);
687  }
688  }
689  }
690 }
691 
692 void IRPromoter::Cleanup() {
693  LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
694  // Some zexts will now have become redundant, along with their trunc
695  // operands, so remove them
696  for (auto V : *Visited) {
697  if (!isa<ZExtInst>(V))
698  continue;
699 
700  auto ZExt = cast<ZExtInst>(V);
701  if (ZExt->getDestTy() != ExtTy)
702  continue;
703 
704  Value *Src = ZExt->getOperand(0);
705  if (ZExt->getSrcTy() == ZExt->getDestTy()) {
706  LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt
707  << "\n");
708  ReplaceAllUsersOfWith(ZExt, Src);
709  continue;
710  }
711 
712  // Unless they produce a value that is narrower than ExtTy, we can
713  // replace the result of the zext with the input of a newly inserted
714  // trunc.
715  if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
716  Src->getType() == OrigTy) {
717  auto *Trunc = cast<TruncInst>(Src);
718  assert(Trunc->getOperand(0)->getType() == ExtTy &&
719  "expected inserted trunc to be operating on i32");
720  ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
721  }
722  }
723 
724  for (auto *I : InstsToRemove) {
725  LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
726  I->dropAllReferences();
727  I->eraseFromParent();
728  }
729 
730  InstsToRemove.clear();
731  NewInsts.clear();
732  TruncTysMap.clear();
733  Promoted.clear();
734 }
735 
736 void IRPromoter::ConvertTruncs() {
737  LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
738  IRBuilder<> Builder{Ctx};
739 
740  for (auto *V : *Visited) {
741  if (!isa<TruncInst>(V) || Sources->count(V))
742  continue;
743 
744  auto *Trunc = cast<TruncInst>(V);
745  Builder.SetInsertPoint(Trunc);
746  IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
747  IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
748 
749  unsigned NumBits = DestTy->getScalarSizeInBits();
750  ConstantInt *Mask =
751  ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
752  Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
753 
754  if (auto *I = dyn_cast<Instruction>(Masked))
755  NewInsts.insert(I);
756 
757  ReplaceAllUsersOfWith(Trunc, Masked);
758  }
759 }
760 
761 void IRPromoter::Mutate(Type *OrigTy,
762  SetVector<Value*> &Visited,
763  SmallPtrSetImpl<Value*> &Sources,
765  SmallPtrSetImpl<Instruction*> &SafeToPromote) {
766  LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
767  << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
768 
769  assert(isa<IntegerType>(OrigTy) && "expected integer type");
770  this->OrigTy = cast<IntegerType>(OrigTy);
771  assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() &&
772  "original type not smaller than extended type");
773 
774  this->Visited = &Visited;
775  this->Sources = &Sources;
776  this->Sinks = &Sinks;
777  this->SafeToPromote = &SafeToPromote;
778 
779  // Cache original types of the values that will likely need truncating
780  for (auto *I : Sinks) {
781  if (auto *Call = dyn_cast<CallInst>(I)) {
782  for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
783  Value *Arg = Call->getArgOperand(i);
784  TruncTysMap[Call].push_back(Arg->getType());
785  }
786  } else if (auto *Switch = dyn_cast<SwitchInst>(I))
787  TruncTysMap[I].push_back(Switch->getCondition()->getType());
788  else {
789  for (unsigned i = 0; i < I->getNumOperands(); ++i)
790  TruncTysMap[I].push_back(I->getOperand(i)->getType());
791  }
792  }
793  for (auto *V : Visited) {
794  if (!isa<TruncInst>(V) || Sources.count(V))
795  continue;
796  auto *Trunc = cast<TruncInst>(V);
797  TruncTysMap[Trunc].push_back(Trunc->getDestTy());
798  }
799 
800  // Convert adds and subs using negative immediates to equivalent instructions
801  // that use positive constants.
802  PrepareConstants();
803 
804  // Insert zext instructions between sources and their users.
805  ExtendSources();
806 
807  // Promote visited instructions, mutating their types in place. Also insert
808  // DSP intrinsics, if enabled, for adds and subs which would be unsafe to
809  // promote.
810  PromoteTree();
811 
812  // Convert any truncs, that aren't sources, into AND masks.
813  ConvertTruncs();
814 
815  // Insert trunc instructions for use by calls, stores etc...
816  TruncateSinks();
817 
818  // Finally, remove unecessary zexts and truncs, delete old instructions and
819  // clear the data structures.
820  Cleanup();
821 
822  LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
823 }
824 
825 /// We accept most instructions, as well as Arguments and ConstantInsts. We
826 /// Disallow casts other than zext and truncs and only allow calls if their
827 /// return value is zeroext. We don't allow opcodes that can introduce sign
828 /// bits.
829 bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
830  if (auto *I = dyn_cast<ICmpInst>(V)) {
831  // Now that we allow small types than TypeSize, only allow icmp of
832  // TypeSize because they will require a trunc to be legalised.
833  // TODO: Allow icmp of smaller types, and calculate at the end
834  // whether the transform would be beneficial.
835  if (isa<PointerType>(I->getOperand(0)->getType()))
836  return true;
837  return EqualTypeSize(I->getOperand(0));
838  }
839 
840  if (GenerateSignBits(V)) {
841  LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
842  return false;
843  }
844 
845  // Memory instructions
846  if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
847  return true;
848 
849  // Branches and targets.
850  if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
851  return true;
852 
853  // Non-instruction values that we can handle.
854  if ((isa<Constant>(V) && !isa<ConstantExpr>(V)) || isa<Argument>(V))
855  return isSupportedType(V);
856 
857  if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
858  isa<LoadInst>(V))
859  return isSupportedType(V);
860 
861  if (auto *Cast = dyn_cast<CastInst>(V))
862  return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
863 
864  // Special cases for calls as we need to check for zeroext
865  // TODO We should accept calls even if they don't have zeroext, as they can
866  // still be sinks.
867  if (auto *Call = dyn_cast<CallInst>(V))
868  return isSupportedType(Call) &&
869  Call->hasRetAttr(Attribute::AttrKind::ZExt);
870 
871  if (!isa<BinaryOperator>(V))
872  return false;
873 
874  if (!isSupportedType(V))
875  return false;
876 
877  return true;
878 }
879 
880 /// Check that the type of V would be promoted and that the original type is
881 /// smaller than the targeted promoted type. Check that we're not trying to
882 /// promote something larger than our base 'TypeSize' type.
884 
885  auto *I = dyn_cast<Instruction>(V);
886  if (!I)
887  return true;
888 
889  if (SafeToPromote.count(I))
890  return true;
891 
892  if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
893  SafeToPromote.insert(I);
894  return true;
895  }
896 
897  if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
898  return false;
899 
900  // If promotion is not safe, can we use a DSP instruction to natively
901  // handle the narrow type?
902  if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
903  return false;
904 
905  if (ST->isThumb() && !ST->hasThumb2())
906  return false;
907 
908  // TODO
909  // Would it be profitable? For Thumb code, these parallel DSP instructions
910  // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
911  // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
912  // halved. They also do not take immediates as operands.
913  for (auto &Op : I->operands()) {
914  if (isa<Constant>(Op)) {
915  if (!EnableDSPWithImms)
916  return false;
917  }
918  }
919  LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n");
920  return true;
921 }
922 
923 bool ARMCodeGenPrepare::TryToPromote(Value *V) {
924  OrigTy = V->getType();
925  TypeSize = OrigTy->getPrimitiveSizeInBits();
926  if (TypeSize > 16 || TypeSize < 8)
927  return false;
928 
929  SafeToPromote.clear();
930 
931  if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
932  return false;
933 
934  LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
935  << TypeSize << "\n");
936 
937  SetVector<Value*> WorkList;
938  SmallPtrSet<Value*, 8> Sources;
940  SetVector<Value*> CurrentVisited;
941  WorkList.insert(V);
942 
943  // Return true if V was added to the worklist as a supported instruction,
944  // if it was already visited, or if we don't need to explore it (e.g.
945  // pointer values and GEPs), and false otherwise.
946  auto AddLegalInst = [&](Value *V) {
947  if (CurrentVisited.count(V))
948  return true;
949 
950  // Ignore GEPs because they don't need promoting and the constant indices
951  // will prevent the transformation.
952  if (isa<GetElementPtrInst>(V))
953  return true;
954 
955  if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
956  LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
957  return false;
958  }
959 
960  WorkList.insert(V);
961  return true;
962  };
963 
964  // Iterate through, and add to, a tree of operands and users in the use-def.
965  while (!WorkList.empty()) {
966  Value *V = WorkList.back();
967  WorkList.pop_back();
968  if (CurrentVisited.count(V))
969  continue;
970 
971  // Ignore non-instructions, other than arguments.
972  if (!isa<Instruction>(V) && !isSource(V))
973  continue;
974 
975  // If we've already visited this value from somewhere, bail now because
976  // the tree has already been explored.
977  // TODO: This could limit the transform, ie if we try to promote something
978  // from an i8 and fail first, before trying an i16.
979  if (AllVisited.count(V))
980  return false;
981 
982  CurrentVisited.insert(V);
983  AllVisited.insert(V);
984 
985  // Calls can be both sources and sinks.
986  if (isSink(V))
987  Sinks.insert(cast<Instruction>(V));
988 
989  if (isSource(V))
990  Sources.insert(V);
991 
992  if (!isSink(V) && !isSource(V)) {
993  if (auto *I = dyn_cast<Instruction>(V)) {
994  // Visit operands of any instruction visited.
995  for (auto &U : I->operands()) {
996  if (!AddLegalInst(U))
997  return false;
998  }
999  }
1000  }
1001 
1002  // Don't visit users of a node which isn't going to be mutated unless its a
1003  // source.
1004  if (isSource(V) || shouldPromote(V)) {
1005  for (Use &U : V->uses()) {
1006  if (!AddLegalInst(U.getUser()))
1007  return false;
1008  }
1009  }
1010  }
1011 
1012  LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
1013  for (auto *I : CurrentVisited)
1014  I->dump();
1015  );
1016  unsigned ToPromote = 0;
1017  for (auto *V : CurrentVisited) {
1018  if (Sources.count(V))
1019  continue;
1020  if (Sinks.count(cast<Instruction>(V)))
1021  continue;
1022  ++ToPromote;
1023  }
1024 
1025  if (ToPromote < 2)
1026  return false;
1027 
1028  Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
1029  return true;
1030 }
1031 
1032 bool ARMCodeGenPrepare::doInitialization(Module &M) {
1033  Promoter = new IRPromoter(&M);
1034  return false;
1035 }
1036 
1038  if (skipFunction(F) || DisableCGP)
1039  return false;
1040 
1041  auto *TPC = &getAnalysis<TargetPassConfig>();
1042  if (!TPC)
1043  return false;
1044 
1045  const TargetMachine &TM = TPC->getTM<TargetMachine>();
1046  ST = &TM.getSubtarget<ARMSubtarget>(F);
1047  bool MadeChange = false;
1048  LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n");
1049 
1050  // Search up from icmps to try to promote their operands.
1051  for (BasicBlock &BB : F) {
1052  auto &Insts = BB.getInstList();
1053  for (auto &I : Insts) {
1054  if (AllVisited.count(&I))
1055  continue;
1056 
1057  if (isa<ICmpInst>(I)) {
1058  auto &CI = cast<ICmpInst>(I);
1059 
1060  // Skip signed or pointer compares
1061  if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
1062  continue;
1063 
1064  LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
1065 
1066  for (auto &Op : CI.operands()) {
1067  if (auto *I = dyn_cast<Instruction>(Op))
1068  MadeChange |= TryToPromote(I);
1069  }
1070  }
1071  }
1072  LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
1073  dbgs() << F;
1074  report_fatal_error("Broken function after type promotion");
1075  });
1076  }
1077  if (MadeChange)
1078  LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n");
1079 
1080  return MadeChange;
1081 }
1082 
1083 bool ARMCodeGenPrepare::doFinalization(Module &M) {
1084  delete Promoter;
1085  return false;
1086 }
1087 
1088 INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE,
1089  "ARM IR optimizations", false, false)
1091  false, false)
1092 
1093 char ARMCodeGenPrepare::ID = 0;
1094 unsigned ARMCodeGenPrepare::TypeSize = 0;
1095 
1097  return new ARMCodeGenPrepare();
1098 }
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1799
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
iterator_range< use_iterator > uses()
Definition: Value.h:354
static bool isSupportedType(Value *V)
Some instructions can use 8- and 16-bit operands, and we don&#39;t need to promote anything larger...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
static Intrinsic::ID getNarrowIntrinsic(Instruction *I)
Return the intrinsic for the instruction that can perform the same operation but on a narrow type...
void dropAllReferences()
Drop all references to operands.
Definition: User.h:294
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:857
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isLegalToPromote(CallSite CS, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
F(f)
iv Induction Variable Users
Definition: IVUsers.cpp:51
const T & back() const
Return the last element of the SetVector.
Definition: SetVector.h:128
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4362
AnalysisUsage & addRequired()
INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations", false, false) INITIALIZE_PASS_END(ARMCodeGenPrepare
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
static bool isPromotedResultSafe(Value *V)
Return whether we can safely mutate V&#39;s type to ExtTy without having to be concerned with zero extend...
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
This file contains the simple types necessary to represent the attributes associated with functions a...
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:221
Target-Independent Code Generator Pass Configuration Options.
static bool shouldPromote(Value *V)
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1674
static bool isSource(Value *V)
Return true if the given value is a source in the use-def chain, producing a narrow &#39;TypeSize&#39; value...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool LessOrEqualTypeSize(Value *V)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
static bool GenerateSignBits(Value *V)
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
static bool GreaterThanTypeSize(Value *V)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1022
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * getOperand(unsigned i) const
Definition: User.h:169
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:216
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
op_range operands()
Definition: User.h:237
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
static bool EqualTypeSize(Value *V)
Class to represent integer types.
Definition: DerivedTypes.h:39
static cl::opt< bool > EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false), cl::desc("Use DSP instructions for scalar operations\ with immediate operands"))
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isNegative() const
Definition: Constants.h:187
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
BlockVerifier::State From
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static cl::opt< bool > DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true), cl::desc("Disable ARM specific CodeGenPrepare pass"))
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:529
#define DEBUG_TYPE
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
void clear()
Definition: ilist.h:307
static bool LessThanTypeSize(Value *V)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
static cl::opt< bool > EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false), cl::desc("Use DSP instructions for scalar operations"))
#define I(x, y, z)
Definition: MD5.cpp:58
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
bool verifyFunction(const Function &F, raw_ostream *OS=nullptr)
Check a function for errors, useful for use when debugging a pass.
Definition: Verifier.cpp:4940
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:603
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:90
user_iterator user_begin()
Definition: Value.h:375
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
FunctionPass * createARMCodeGenPreparePass()
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
ARM IR optimizations
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Statically lint checks LLVM IR
Definition: Lint.cpp:192
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static bool isSink(Value *V)
Return true if V will require any promoted values to be truncated for the the IR to remain valid...