LLVM  13.0.0git
TruncInstCombine.cpp
Go to the documentation of this file.
1 //===- TruncInstCombine.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // TruncInstCombine - looks for expression dags post-dominated by TruncInst and
10 // for each eligible dag, it will create a reduced bit-width expression, replace
11 // the old expression with this new one and remove the old expression.
12 // Eligible expression dag is such that:
13 // 1. Contains only supported instructions.
14 // 2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
15 // 3. Can be evaluated into type with reduced legal bit-width.
16 // 4. All instructions in the dag must not have users outside the dag.
17 // The only exception is for {ZExt, SExt}Inst with operand type equal to
18 // the new reduced type evaluated in (3).
19 //
20 // The motivation for this optimization is that evaluating and expression using
21 // smaller bit-width is preferable, especially for vectorization where we can
22 // fit more values in one vectorized instruction. In addition, this optimization
23 // may decrease the number of cast instructions, but will not increase it.
24 //
25 //===----------------------------------------------------------------------===//
26 
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/Statistic.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/Instruction.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "aggressive-instcombine"
40 
41 STATISTIC(
42  NumDAGsReduced,
43  "Number of truncations eliminated by reducing bit width of expression DAG");
44 STATISTIC(NumInstrsReduced,
45  "Number of instructions whose bit width was reduced");
46 
47 /// Given an instruction and a container, it fills all the relevant operands of
48 /// that instruction, with respect to the Trunc expression dag optimizaton.
50  unsigned Opc = I->getOpcode();
51  switch (Opc) {
52  case Instruction::Trunc:
53  case Instruction::ZExt:
54  case Instruction::SExt:
55  // These CastInst are considered leaves of the evaluated expression, thus,
56  // their operands are not relevent.
57  break;
58  case Instruction::Add:
59  case Instruction::Sub:
60  case Instruction::Mul:
61  case Instruction::And:
62  case Instruction::Or:
63  case Instruction::Xor:
64  Ops.push_back(I->getOperand(0));
65  Ops.push_back(I->getOperand(1));
66  break;
68  Ops.push_back(I->getOperand(1));
69  Ops.push_back(I->getOperand(2));
70  break;
71  default:
72  llvm_unreachable("Unreachable!");
73  }
74 }
75 
76 bool TruncInstCombine::buildTruncExpressionDag() {
77  SmallVector<Value *, 8> Worklist;
79  // Clear old expression dag.
80  InstInfoMap.clear();
81 
82  Worklist.push_back(CurrentTruncInst->getOperand(0));
83 
84  while (!Worklist.empty()) {
85  Value *Curr = Worklist.back();
86 
87  if (isa<Constant>(Curr)) {
88  Worklist.pop_back();
89  continue;
90  }
91 
92  auto *I = dyn_cast<Instruction>(Curr);
93  if (!I)
94  return false;
95 
96  if (!Stack.empty() && Stack.back() == I) {
97  // Already handled all instruction operands, can remove it from both the
98  // Worklist and the Stack, and add it to the instruction info map.
99  Worklist.pop_back();
100  Stack.pop_back();
101  // Insert I to the Info map.
102  InstInfoMap.insert(std::make_pair(I, Info()));
103  continue;
104  }
105 
106  if (InstInfoMap.count(I)) {
107  Worklist.pop_back();
108  continue;
109  }
110 
111  // Add the instruction to the stack before start handling its operands.
112  Stack.push_back(I);
113 
114  unsigned Opc = I->getOpcode();
115  switch (Opc) {
116  case Instruction::Trunc:
117  case Instruction::ZExt:
118  case Instruction::SExt:
119  // trunc(trunc(x)) -> trunc(x)
120  // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
121  // trunc(ext(x)) -> trunc(x) if the source type is larger than the new
122  // dest
123  break;
124  case Instruction::Add:
125  case Instruction::Sub:
126  case Instruction::Mul:
127  case Instruction::And:
128  case Instruction::Or:
129  case Instruction::Xor:
130  case Instruction::Select: {
133  append_range(Worklist, Operands);
134  break;
135  }
136  default:
137  // TODO: Can handle more cases here:
138  // 1. shufflevector, extractelement, insertelement
139  // 2. udiv, urem
140  // 3. shl, lshr, ashr
141  // 4. phi node(and loop handling)
142  // ...
143  return false;
144  }
145  }
146  return true;
147 }
148 
149 unsigned TruncInstCombine::getMinBitWidth() {
150  SmallVector<Value *, 8> Worklist;
152 
153  Value *Src = CurrentTruncInst->getOperand(0);
154  Type *DstTy = CurrentTruncInst->getType();
155  unsigned TruncBitWidth = DstTy->getScalarSizeInBits();
156  unsigned OrigBitWidth =
157  CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
158 
159  if (isa<Constant>(Src))
160  return TruncBitWidth;
161 
162  Worklist.push_back(Src);
163  InstInfoMap[cast<Instruction>(Src)].ValidBitWidth = TruncBitWidth;
164 
165  while (!Worklist.empty()) {
166  Value *Curr = Worklist.back();
167 
168  if (isa<Constant>(Curr)) {
169  Worklist.pop_back();
170  continue;
171  }
172 
173  // Otherwise, it must be an instruction.
174  auto *I = cast<Instruction>(Curr);
175 
176  auto &Info = InstInfoMap[I];
177 
180 
181  if (!Stack.empty() && Stack.back() == I) {
182  // Already handled all instruction operands, can remove it from both, the
183  // Worklist and the Stack, and update MinBitWidth.
184  Worklist.pop_back();
185  Stack.pop_back();
186  for (auto *Operand : Operands)
187  if (auto *IOp = dyn_cast<Instruction>(Operand))
188  Info.MinBitWidth =
189  std::max(Info.MinBitWidth, InstInfoMap[IOp].MinBitWidth);
190  continue;
191  }
192 
193  // Add the instruction to the stack before start handling its operands.
194  Stack.push_back(I);
195  unsigned ValidBitWidth = Info.ValidBitWidth;
196 
197  // Update minimum bit-width before handling its operands. This is required
198  // when the instruction is part of a loop.
199  Info.MinBitWidth = std::max(Info.MinBitWidth, Info.ValidBitWidth);
200 
201  for (auto *Operand : Operands)
202  if (auto *IOp = dyn_cast<Instruction>(Operand)) {
203  // If we already calculated the minimum bit-width for this valid
204  // bit-width, or for a smaller valid bit-width, then just keep the
205  // answer we already calculated.
206  unsigned IOpBitwidth = InstInfoMap.lookup(IOp).ValidBitWidth;
207  if (IOpBitwidth >= ValidBitWidth)
208  continue;
209  InstInfoMap[IOp].ValidBitWidth = ValidBitWidth;
210  Worklist.push_back(IOp);
211  }
212  }
213  unsigned MinBitWidth = InstInfoMap.lookup(cast<Instruction>(Src)).MinBitWidth;
214  assert(MinBitWidth >= TruncBitWidth);
215 
216  if (MinBitWidth > TruncBitWidth) {
217  // In this case reducing expression with vector type might generate a new
218  // vector type, which is not preferable as it might result in generating
219  // sub-optimal code.
220  if (DstTy->isVectorTy())
221  return OrigBitWidth;
222  // Use the smallest integer type in the range [MinBitWidth, OrigBitWidth).
223  Type *Ty = DL.getSmallestLegalIntType(DstTy->getContext(), MinBitWidth);
224  // Update minimum bit-width with the new destination type bit-width if
225  // succeeded to find such, otherwise, with original bit-width.
226  MinBitWidth = Ty ? Ty->getScalarSizeInBits() : OrigBitWidth;
227  } else { // MinBitWidth == TruncBitWidth
228  // In this case the expression can be evaluated with the trunc instruction
229  // destination type, and trunc instruction can be omitted. However, we
230  // should not perform the evaluation if the original type is a legal scalar
231  // type and the target type is illegal.
232  bool FromLegal = MinBitWidth == 1 || DL.isLegalInteger(OrigBitWidth);
233  bool ToLegal = MinBitWidth == 1 || DL.isLegalInteger(MinBitWidth);
234  if (!DstTy->isVectorTy() && FromLegal && !ToLegal)
235  return OrigBitWidth;
236  }
237  return MinBitWidth;
238 }
239 
240 Type *TruncInstCombine::getBestTruncatedType() {
241  if (!buildTruncExpressionDag())
242  return nullptr;
243 
244  // We don't want to duplicate instructions, which isn't profitable. Thus, we
245  // can't shrink something that has multiple users, unless all users are
246  // post-dominated by the trunc instruction, i.e., were visited during the
247  // expression evaluation.
248  unsigned DesiredBitWidth = 0;
249  for (auto Itr : InstInfoMap) {
250  Instruction *I = Itr.first;
251  if (I->hasOneUse())
252  continue;
253  bool IsExtInst = (isa<ZExtInst>(I) || isa<SExtInst>(I));
254  for (auto *U : I->users())
255  if (auto *UI = dyn_cast<Instruction>(U))
256  if (UI != CurrentTruncInst && !InstInfoMap.count(UI)) {
257  if (!IsExtInst)
258  return nullptr;
259  // If this is an extension from the dest type, we can eliminate it,
260  // even if it has multiple users. Thus, update the DesiredBitWidth and
261  // validate all extension instructions agrees on same DesiredBitWidth.
262  unsigned ExtInstBitWidth =
263  I->getOperand(0)->getType()->getScalarSizeInBits();
264  if (DesiredBitWidth && DesiredBitWidth != ExtInstBitWidth)
265  return nullptr;
266  DesiredBitWidth = ExtInstBitWidth;
267  }
268  }
269 
270  unsigned OrigBitWidth =
271  CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
272 
273  // Calculate minimum allowed bit-width allowed for shrinking the currently
274  // visited truncate's operand.
275  unsigned MinBitWidth = getMinBitWidth();
276 
277  // Check that we can shrink to smaller bit-width than original one and that
278  // it is similar to the DesiredBitWidth is such exists.
279  if (MinBitWidth >= OrigBitWidth ||
280  (DesiredBitWidth && DesiredBitWidth != MinBitWidth))
281  return nullptr;
282 
283  return IntegerType::get(CurrentTruncInst->getContext(), MinBitWidth);
284 }
285 
286 /// Given a reduced scalar type \p Ty and a \p V value, return a reduced type
287 /// for \p V, according to its type, if it vector type, return the vector
288 /// version of \p Ty, otherwise return \p Ty.
289 static Type *getReducedType(Value *V, Type *Ty) {
290  assert(Ty && !Ty->isVectorTy() && "Expect Scalar Type");
291  if (auto *VTy = dyn_cast<VectorType>(V->getType()))
292  return VectorType::get(Ty, VTy->getElementCount());
293  return Ty;
294 }
295 
296 Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) {
297  Type *Ty = getReducedType(V, SclTy);
298  if (auto *C = dyn_cast<Constant>(V)) {
299  C = ConstantExpr::getIntegerCast(C, Ty, false);
300  // If we got a constantexpr back, try to simplify it with DL info.
301  return ConstantFoldConstant(C, DL, &TLI);
302  }
303 
304  auto *I = cast<Instruction>(V);
305  Info Entry = InstInfoMap.lookup(I);
306  assert(Entry.NewValue);
307  return Entry.NewValue;
308 }
309 
310 void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
311  NumInstrsReduced += InstInfoMap.size();
312  for (auto &Itr : InstInfoMap) { // Forward
313  Instruction *I = Itr.first;
314  TruncInstCombine::Info &NodeInfo = Itr.second;
315 
316  assert(!NodeInfo.NewValue && "Instruction has been evaluated");
317 
319  Value *Res = nullptr;
320  unsigned Opc = I->getOpcode();
321  switch (Opc) {
322  case Instruction::Trunc:
323  case Instruction::ZExt:
324  case Instruction::SExt: {
325  Type *Ty = getReducedType(I, SclTy);
326  // If the source type of the cast is the type we're trying for then we can
327  // just return the source. There's no need to insert it because it is not
328  // new.
329  if (I->getOperand(0)->getType() == Ty) {
330  assert(!isa<TruncInst>(I) && "Cannot reach here with TruncInst");
331  NodeInfo.NewValue = I->getOperand(0);
332  continue;
333  }
334  // Otherwise, must be the same type of cast, so just reinsert a new one.
335  // This also handles the case of zext(trunc(x)) -> zext(x).
336  Res = Builder.CreateIntCast(I->getOperand(0), Ty,
337  Opc == Instruction::SExt);
338 
339  // Update Worklist entries with new value if needed.
340  // There are three possible changes to the Worklist:
341  // 1. Update Old-TruncInst -> New-TruncInst.
342  // 2. Remove Old-TruncInst (if New node is not TruncInst).
343  // 3. Add New-TruncInst (if Old node was not TruncInst).
344  auto *Entry = find(Worklist, I);
345  if (Entry != Worklist.end()) {
346  if (auto *NewCI = dyn_cast<TruncInst>(Res))
347  *Entry = NewCI;
348  else
349  Worklist.erase(Entry);
350  } else if (auto *NewCI = dyn_cast<TruncInst>(Res))
351  Worklist.push_back(NewCI);
352  break;
353  }
354  case Instruction::Add:
355  case Instruction::Sub:
356  case Instruction::Mul:
357  case Instruction::And:
358  case Instruction::Or:
359  case Instruction::Xor: {
360  Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
361  Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
362  Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
363  break;
364  }
365  case Instruction::Select: {
366  Value *Op0 = I->getOperand(0);
367  Value *LHS = getReducedOperand(I->getOperand(1), SclTy);
368  Value *RHS = getReducedOperand(I->getOperand(2), SclTy);
369  Res = Builder.CreateSelect(Op0, LHS, RHS);
370  break;
371  }
372  default:
373  llvm_unreachable("Unhandled instruction");
374  }
375 
376  NodeInfo.NewValue = Res;
377  if (auto *ResI = dyn_cast<Instruction>(Res))
378  ResI->takeName(I);
379  }
380 
381  Value *Res = getReducedOperand(CurrentTruncInst->getOperand(0), SclTy);
382  Type *DstTy = CurrentTruncInst->getType();
383  if (Res->getType() != DstTy) {
384  IRBuilder<> Builder(CurrentTruncInst);
385  Res = Builder.CreateIntCast(Res, DstTy, false);
386  if (auto *ResI = dyn_cast<Instruction>(Res))
387  ResI->takeName(CurrentTruncInst);
388  }
389  CurrentTruncInst->replaceAllUsesWith(Res);
390 
391  // Erase old expression dag, which was replaced by the reduced expression dag.
392  // We iterate backward, which means we visit the instruction before we visit
393  // any of its operands, this way, when we get to the operand, we already
394  // removed the instructions (from the expression dag) that uses it.
395  CurrentTruncInst->eraseFromParent();
396  for (auto I = InstInfoMap.rbegin(), E = InstInfoMap.rend(); I != E; ++I) {
397  // We still need to check that the instruction has no users before we erase
398  // it, because {SExt, ZExt}Inst Instruction might have other users that was
399  // not reduced, in such case, we need to keep that instruction.
400  if (I->first->use_empty())
401  I->first->eraseFromParent();
402  }
403 }
404 
406  bool MadeIRChange = false;
407 
408  // Collect all TruncInst in the function into the Worklist for evaluating.
409  for (auto &BB : F) {
410  // Ignore unreachable basic block.
411  if (!DT.isReachableFromEntry(&BB))
412  continue;
413  for (auto &I : BB)
414  if (auto *CI = dyn_cast<TruncInst>(&I))
415  Worklist.push_back(CI);
416  }
417 
418  // Process all TruncInst in the Worklist, for each instruction:
419  // 1. Check if it dominates an eligible expression dag to be reduced.
420  // 2. Create a reduced expression dag and replace the old one with it.
421  while (!Worklist.empty()) {
422  CurrentTruncInst = Worklist.pop_back_val();
423 
424  if (Type *NewDstSclTy = getBestTruncatedType()) {
425  LLVM_DEBUG(
426  dbgs() << "ICE: TruncInstCombine reducing type of expression dag "
427  "dominated by: "
428  << CurrentTruncInst << '\n');
429  ReduceExpressionDag(NewDstSclTy);
430  ++NumDAGsReduced;
431  MadeIRChange = true;
432  }
433  }
434 
435  return MadeIRChange;
436 }
llvm
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:704
llvm::Function
Definition: Function.h:61
llvm::SmallVector< Value *, 8 >
Statistic.h
llvm::IRBuilder<>
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:634
ConstantFolding.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::TruncInstCombine::run
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
Definition: TruncInstCombine.cpp:405
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Instruction.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
getReducedType
static Type * getReducedType(Value *V, Type *Ty)
Given a reduced scalar type Ty and a V value, return a reduced type for V, according to its type,...
Definition: TruncInstCombine.cpp:289
getRelevantOperands
static void getRelevantOperands(Instruction *I, SmallVectorImpl< Value * > &Ops)
Given an instruction and a container, it fills all the relevant operands of that instruction,...
Definition: TruncInstCombine.cpp:49
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
AggressiveInstCombineInternal.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1502
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::pdb::PDB_MemoryType::Stack
@ Stack
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
DataLayout.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:517
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1672
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:965
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2058
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::DataLayout::isLegalInteger
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:263
llvm::DataLayout::getSmallestLegalIntType
Type * getSmallestLegalIntType(LLVMContext &C, unsigned Width=0) const
Returns the smallest integer type with size at least as big as Width bits.
Definition: DataLayout.cpp:853
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:773
Dominators.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::SmallVectorImpl< Value * >
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1258
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:269
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:367
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628