LLVM 18.0.0git
TypePromotion.cpp
Go to the documentation of this file.
1//===----- TypePromotion.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This is an opcode based type promotion pass for small types that would
11/// otherwise be promoted during legalisation. This works around the limitations
12/// of selection dag for cyclic regions. The search begins from icmp
13/// instructions operands where a tree, consisting of non-wrapping or safe
14/// wrapping instructions, is built, checked and promoted if possible.
15///
16//===----------------------------------------------------------------------===//
17
19#include "llvm/ADT/SetVector.h"
20#include "llvm/ADT/StringRef.h"
23#include "llvm/CodeGen/Passes.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstrTypes.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Type.h"
35#include "llvm/IR/Value.h"
37#include "llvm/Pass.h"
41
42#define DEBUG_TYPE "type-promotion"
43#define PASS_NAME "Type Promotion"
44
45using namespace llvm;
46
47static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
48 cl::init(false),
49 cl::desc("Disable type promotion pass"));
50
51// The goal of this pass is to enable more efficient code generation for
52// operations on narrow types (i.e. types with < 32-bits) and this is a
53// motivating IR code example:
54//
55// define hidden i32 @cmp(i8 zeroext) {
56// %2 = add i8 %0, -49
57// %3 = icmp ult i8 %2, 3
58// ..
59// }
60//
61// The issue here is that i8 is type-legalized to i32 because i8 is not a
62// legal type. Thus, arithmetic is done in integer-precision, but then the
63// byte value is masked out as follows:
64//
65// t19: i32 = add t4, Constant:i32<-49>
66// t24: i32 = and t19, Constant:i32<255>
67//
68// Consequently, we generate code like this:
69//
70// subs r0, #49
71// uxtb r1, r0
72// cmp r1, #3
73//
74// This shows that masking out the byte value results in generation of
75// the UXTB instruction. This is not optimal as r0 already contains the byte
76// value we need, and so instead we can just generate:
77//
78// sub.w r1, r0, #49
79// cmp r1, #3
80//
81// We achieve this by type promoting the IR to i32 like so for this example:
82//
83// define i32 @cmp(i8 zeroext %c) {
84// %0 = zext i8 %c to i32
85// %c.off = add i32 %0, -49
86// %1 = icmp ult i32 %c.off, 3
87// ..
88// }
89//
90// For this to be valid and legal, we need to prove that the i32 add is
91// producing the same value as the i8 addition, and that e.g. no overflow
92// happens.
93//
94// A brief sketch of the algorithm and some terminology.
95// We pattern match interesting IR patterns:
96// - which have "sources": instructions producing narrow values (i8, i16), and
97// - they have "sinks": instructions consuming these narrow values.
98//
99// We collect all instruction connecting sources and sinks in a worklist, so
100// that we can mutate these instruction and perform type promotion when it is
101// legal to do so.
102
103namespace {
104class IRPromoter {
105 LLVMContext &Ctx;
106 unsigned PromotedWidth = 0;
107 SetVector<Value *> &Visited;
108 SetVector<Value *> &Sources;
111 SmallPtrSetImpl<Instruction *> &InstsToRemove;
112 IntegerType *ExtTy = nullptr;
116
117 void ReplaceAllUsersOfWith(Value *From, Value *To);
118 void ExtendSources();
119 void ConvertTruncs();
120 void PromoteTree();
121 void TruncateSinks();
122 void Cleanup();
123
124public:
125 IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited,
128 SmallPtrSetImpl<Instruction *> &instsToRemove)
129 : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources),
130 Sinks(sinks), SafeWrap(wrap), InstsToRemove(instsToRemove) {
131 ExtTy = IntegerType::get(Ctx, PromotedWidth);
132 }
133
134 void Mutate();
135};
136
137class TypePromotionImpl {
138 unsigned TypeSize = 0;
139 LLVMContext *Ctx = nullptr;
140 unsigned RegisterBitWidth = 0;
141 SmallPtrSet<Value *, 16> AllVisited;
142 SmallPtrSet<Instruction *, 8> SafeToPromote;
144 SmallPtrSet<Instruction *, 4> InstsToRemove;
145
146 // Does V have the same size result type as TypeSize.
147 bool EqualTypeSize(Value *V);
148 // Does V have the same size, or narrower, result type as TypeSize.
149 bool LessOrEqualTypeSize(Value *V);
150 // Does V have a result type that is wider than TypeSize.
151 bool GreaterThanTypeSize(Value *V);
152 // Does V have a result type that is narrower than TypeSize.
153 bool LessThanTypeSize(Value *V);
154 // Should V be a leaf in the promote tree?
155 bool isSource(Value *V);
156 // Should V be a root in the promotion tree?
157 bool isSink(Value *V);
158 // Should we change the result type of V? It will result in the users of V
159 // being visited.
160 bool shouldPromote(Value *V);
161 // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
162 // result won't affect the computation?
163 bool isSafeWrap(Instruction *I);
164 // Can V have its integer type promoted, or can the type be ignored.
165 bool isSupportedType(Value *V);
166 // Is V an instruction with a supported opcode or another value that we can
167 // handle, such as constants and basic blocks.
168 bool isSupportedValue(Value *V);
169 // Is V an instruction thats result can trivially promoted, or has safe
170 // wrapping.
171 bool isLegalToPromote(Value *V);
172 bool TryToPromote(Value *V, unsigned PromotedWidth, const LoopInfo &LI);
173
174public:
175 bool run(Function &F, const TargetMachine *TM,
176 const TargetTransformInfo &TTI, const LoopInfo &LI);
177};
178
179class TypePromotionLegacy : public FunctionPass {
180public:
181 static char ID;
182
183 TypePromotionLegacy() : FunctionPass(ID) {}
184
185 void getAnalysisUsage(AnalysisUsage &AU) const override {
189 AU.setPreservesCFG();
191 }
192
193 StringRef getPassName() const override { return PASS_NAME; }
194
195 bool runOnFunction(Function &F) override;
196};
197
198} // namespace
199
201 unsigned Opc = I->getOpcode();
202 return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
203 Opc == Instruction::SRem || Opc == Instruction::SExt;
204}
205
206bool TypePromotionImpl::EqualTypeSize(Value *V) {
207 return V->getType()->getScalarSizeInBits() == TypeSize;
208}
209
210bool TypePromotionImpl::LessOrEqualTypeSize(Value *V) {
211 return V->getType()->getScalarSizeInBits() <= TypeSize;
212}
213
214bool TypePromotionImpl::GreaterThanTypeSize(Value *V) {
215 return V->getType()->getScalarSizeInBits() > TypeSize;
216}
217
218bool TypePromotionImpl::LessThanTypeSize(Value *V) {
219 return V->getType()->getScalarSizeInBits() < TypeSize;
220}
221
222/// Return true if the given value is a source in the use-def chain, producing
223/// a narrow 'TypeSize' value. These values will be zext to start the promotion
224/// of the tree to i32. We guarantee that these won't populate the upper bits
225/// of the register. ZExt on the loads will be free, and the same for call
226/// return values because we only accept ones that guarantee a zeroext ret val.
227/// Many arguments will have the zeroext attribute too, so those would be free
228/// too.
229bool TypePromotionImpl::isSource(Value *V) {
230 if (!isa<IntegerType>(V->getType()))
231 return false;
232
233 // TODO Allow zext to be sources.
234 if (isa<Argument>(V))
235 return true;
236 else if (isa<LoadInst>(V))
237 return true;
238 else if (auto *Call = dyn_cast<CallInst>(V))
239 return Call->hasRetAttr(Attribute::AttrKind::ZExt);
240 else if (auto *Trunc = dyn_cast<TruncInst>(V))
241 return EqualTypeSize(Trunc);
242 return false;
243}
244
245/// Return true if V will require any promoted values to be truncated for the
246/// the IR to remain valid. We can't mutate the value type of these
247/// instructions.
248bool TypePromotionImpl::isSink(Value *V) {
249 // TODO The truncate also isn't actually necessary because we would already
250 // proved that the data value is kept within the range of the original data
251 // type. We currently remove any truncs inserted for handling zext sinks.
252
253 // Sinks are:
254 // - points where the value in the register is being observed, such as an
255 // icmp, switch or store.
256 // - points where value types have to match, such as calls and returns.
257 // - zext are included to ease the transformation and are generally removed
258 // later on.
259 if (auto *Store = dyn_cast<StoreInst>(V))
260 return LessOrEqualTypeSize(Store->getValueOperand());
261 if (auto *Return = dyn_cast<ReturnInst>(V))
262 return LessOrEqualTypeSize(Return->getReturnValue());
263 if (auto *ZExt = dyn_cast<ZExtInst>(V))
264 return GreaterThanTypeSize(ZExt);
265 if (auto *Switch = dyn_cast<SwitchInst>(V))
266 return LessThanTypeSize(Switch->getCondition());
267 if (auto *ICmp = dyn_cast<ICmpInst>(V))
268 return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
269
270 return isa<CallInst>(V);
271}
272
273/// Return whether this instruction can safely wrap.
274bool TypePromotionImpl::isSafeWrap(Instruction *I) {
275 // We can support a potentially wrapping instruction (I) if:
276 // - It is only used by an unsigned icmp.
277 // - The icmp uses a constant.
278 // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279 // around zero to become a larger number than before.
280 // - The wrapping instruction (I) also uses a constant.
281 //
282 // We can then use the two constants to calculate whether the result would
283 // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284 // just underflows the range, the icmp would give the same result whether the
285 // result has been truncated or not. We calculate this by:
286 // - Zero extending both constants, if needed, to RegisterBitWidth.
287 // - Take the absolute value of I's constant, adding this to the icmp const.
288 // - Check that this value is not out of range for small type. If it is, it
289 // means that it has underflowed enough to wrap around the icmp constant.
290 //
291 // For example:
292 //
293 // %sub = sub i8 %a, 2
294 // %cmp = icmp ule i8 %sub, 254
295 //
296 // If %a = 0, %sub = -2 == FE == 254
297 // But if this is evalulated as a i32
298 // %sub = -2 == FF FF FF FE == 4294967294
299 // So the unsigned compares (i8 and i32) would not yield the same result.
300 //
301 // Another way to look at it is:
302 // %a - 2 <= 254
303 // %a + 2 <= 254 + 2
304 // %a <= 256
305 // And we can't represent 256 in the i8 format, so we don't support it.
306 //
307 // Whereas:
308 //
309 // %sub i8 %a, 1
310 // %cmp = icmp ule i8 %sub, 254
311 //
312 // If %a = 0, %sub = -1 == FF == 255
313 // As i32:
314 // %sub = -1 == FF FF FF FF == 4294967295
315 //
316 // In this case, the unsigned compare results would be the same and this
317 // would also be true for ult, uge and ugt:
318 // - (255 < 254) == (0xFFFFFFFF < 254) == false
319 // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320 // - (255 > 254) == (0xFFFFFFFF > 254) == true
321 // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
322 //
323 // To demonstrate why we can't handle increasing values:
324 //
325 // %add = add i8 %a, 2
326 // %cmp = icmp ult i8 %add, 127
327 //
328 // If %a = 254, %add = 256 == (i8 1)
329 // As i32:
330 // %add = 256
331 //
332 // (1 < 127) != (256 < 127)
333
334 unsigned Opc = I->getOpcode();
335 if (Opc != Instruction::Add && Opc != Instruction::Sub)
336 return false;
337
338 if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
339 !isa<ConstantInt>(I->getOperand(1)))
340 return false;
341
342 // Don't support an icmp that deals with sign bits.
343 auto *CI = cast<ICmpInst>(*I->user_begin());
344 if (CI->isSigned() || CI->isEquality())
345 return false;
346
347 ConstantInt *ICmpConstant = nullptr;
348 if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
349 ICmpConstant = Const;
350 else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
351 ICmpConstant = Const;
352 else
353 return false;
354
355 const APInt &ICmpConst = ICmpConstant->getValue();
356 APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
357 if (Opc == Instruction::Sub)
358 OverflowConst = -OverflowConst;
359 if (!OverflowConst.isNonPositive())
360 return false;
361
362 // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
363 // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
364 // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
365 if (OverflowConst.sgt(ICmpConst)) {
366 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
367 << "const of " << *I << "\n");
368 SafeWrap.insert(I);
369 return true;
370 } else {
371 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
372 << "const of " << *I << " and " << *CI << "\n");
373 SafeWrap.insert(I);
374 SafeWrap.insert(CI);
375 return true;
376 }
377 return false;
378}
379
380bool TypePromotionImpl::shouldPromote(Value *V) {
381 if (!isa<IntegerType>(V->getType()) || isSink(V))
382 return false;
383
384 if (isSource(V))
385 return true;
386
387 auto *I = dyn_cast<Instruction>(V);
388 if (!I)
389 return false;
390
391 if (isa<ICmpInst>(I))
392 return false;
393
394 return true;
395}
396
397/// Return whether we can safely mutate V's type to ExtTy without having to be
398/// concerned with zero extending or truncation.
400 if (GenerateSignBits(I))
401 return false;
402
403 if (!isa<OverflowingBinaryOperator>(I))
404 return true;
405
406 return I->hasNoUnsignedWrap();
407}
408
409void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
411 Instruction *InstTo = dyn_cast<Instruction>(To);
412 bool ReplacedAll = true;
413
414 LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
415 << "\n");
416
417 for (Use &U : From->uses()) {
418 auto *User = cast<Instruction>(U.getUser());
419 if (InstTo && User->isIdenticalTo(InstTo)) {
420 ReplacedAll = false;
421 continue;
422 }
423 Users.push_back(User);
424 }
425
426 for (auto *U : Users)
427 U->replaceUsesOfWith(From, To);
428
429 if (ReplacedAll)
430 if (auto *I = dyn_cast<Instruction>(From))
431 InstsToRemove.insert(I);
432}
433
434void IRPromoter::ExtendSources() {
435 IRBuilder<> Builder{Ctx};
436
437 auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
438 assert(V->getType() != ExtTy && "zext already extends to i32");
439 LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
440 Builder.SetInsertPoint(InsertPt);
441 if (auto *I = dyn_cast<Instruction>(V))
442 Builder.SetCurrentDebugLocation(I->getDebugLoc());
443
444 Value *ZExt = Builder.CreateZExt(V, ExtTy);
445 if (auto *I = dyn_cast<Instruction>(ZExt)) {
446 if (isa<Argument>(V))
447 I->moveBefore(InsertPt);
448 else
449 I->moveAfter(InsertPt);
450 NewInsts.insert(I);
451 }
452
453 ReplaceAllUsersOfWith(V, ZExt);
454 };
455
456 // Now, insert extending instructions between the sources and their users.
457 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
458 for (auto *V : Sources) {
459 LLVM_DEBUG(dbgs() << " - " << *V << "\n");
460 if (auto *I = dyn_cast<Instruction>(V))
461 InsertZExt(I, I);
462 else if (auto *Arg = dyn_cast<Argument>(V)) {
463 BasicBlock &BB = Arg->getParent()->front();
464 InsertZExt(Arg, &*BB.getFirstInsertionPt());
465 } else {
466 llvm_unreachable("unhandled source that needs extending");
467 }
468 Promoted.insert(V);
469 }
470}
471
472void IRPromoter::PromoteTree() {
473 LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
474
475 // Mutate the types of the instructions within the tree. Here we handle
476 // constant operands.
477 for (auto *V : Visited) {
478 if (Sources.count(V))
479 continue;
480
481 auto *I = cast<Instruction>(V);
482 if (Sinks.count(I))
483 continue;
484
485 for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
486 Value *Op = I->getOperand(i);
487 if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
488 continue;
489
490 if (auto *Const = dyn_cast<ConstantInt>(Op)) {
491 // For subtract, we don't need to sext the constant. We only put it in
492 // SafeWrap because SafeWrap.size() is used elsewhere.
493 // For cmp, we need to sign extend a constant appearing in either
494 // operand. For add, we should only sign extend the RHS.
495 Constant *NewConst =
496 ConstantInt::get(Const->getContext(),
497 (SafeWrap.contains(I) &&
498 (I->getOpcode() == Instruction::ICmp || i == 1) &&
499 I->getOpcode() != Instruction::Sub)
500 ? Const->getValue().sext(PromotedWidth)
501 : Const->getValue().zext(PromotedWidth));
502 I->setOperand(i, NewConst);
503 } else if (isa<UndefValue>(Op))
504 I->setOperand(i, ConstantInt::get(ExtTy, 0));
505 }
506
507 // Mutate the result type, unless this is an icmp or switch.
508 if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
509 I->mutateType(ExtTy);
510 Promoted.insert(I);
511 }
512 }
513}
514
515void IRPromoter::TruncateSinks() {
516 LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
517
518 IRBuilder<> Builder{Ctx};
519
520 auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
521 if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
522 return nullptr;
523
524 if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
525 return nullptr;
526
527 LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
528 << *V << "\n");
529 Builder.SetInsertPoint(cast<Instruction>(V));
530 auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
531 if (Trunc)
532 NewInsts.insert(Trunc);
533 return Trunc;
534 };
535
536 // Fix up any stores or returns that use the results of the promoted
537 // chain.
538 for (auto *I : Sinks) {
539 LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
540
541 // Handle calls separately as we need to iterate over arg operands.
542 if (auto *Call = dyn_cast<CallInst>(I)) {
543 for (unsigned i = 0; i < Call->arg_size(); ++i) {
544 Value *Arg = Call->getArgOperand(i);
545 Type *Ty = TruncTysMap[Call][i];
546 if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
547 Trunc->moveBefore(Call);
548 Call->setArgOperand(i, Trunc);
549 }
550 }
551 continue;
552 }
553
554 // Special case switches because we need to truncate the condition.
555 if (auto *Switch = dyn_cast<SwitchInst>(I)) {
556 Type *Ty = TruncTysMap[Switch][0];
557 if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
558 Trunc->moveBefore(Switch);
559 Switch->setCondition(Trunc);
560 }
561 continue;
562 }
563
564 // Don't insert a trunc for a zext which can still legally promote.
565 // Nor insert a trunc when the input value to that trunc has the same width
566 // as the zext we are inserting it for. When this happens the input operand
567 // for the zext will be promoted to the same width as the zext's return type
568 // rendering that zext unnecessary. This zext gets removed before the end
569 // of the pass.
570 if (auto ZExt = dyn_cast<ZExtInst>(I))
571 if (ZExt->getType()->getScalarSizeInBits() >= PromotedWidth)
572 continue;
573
574 // Now handle the others.
575 for (unsigned i = 0; i < I->getNumOperands(); ++i) {
576 Type *Ty = TruncTysMap[I][i];
577 if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
578 Trunc->moveBefore(I);
579 I->setOperand(i, Trunc);
580 }
581 }
582 }
583}
584
585void IRPromoter::Cleanup() {
586 LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
587 // Some zexts will now have become redundant, along with their trunc
588 // operands, so remove them.
589 for (auto *V : Visited) {
590 if (!isa<ZExtInst>(V))
591 continue;
592
593 auto ZExt = cast<ZExtInst>(V);
594 if (ZExt->getDestTy() != ExtTy)
595 continue;
596
597 Value *Src = ZExt->getOperand(0);
598 if (ZExt->getSrcTy() == ZExt->getDestTy()) {
599 LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
600 << "\n");
601 ReplaceAllUsersOfWith(ZExt, Src);
602 continue;
603 }
604
605 // We've inserted a trunc for a zext sink, but we already know that the
606 // input is in range, negating the need for the trunc.
607 if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
608 auto *Trunc = cast<TruncInst>(Src);
609 assert(Trunc->getOperand(0)->getType() == ExtTy &&
610 "expected inserted trunc to be operating on i32");
611 ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
612 }
613 }
614
615 for (auto *I : InstsToRemove) {
616 LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
617 I->dropAllReferences();
618 }
619}
620
621void IRPromoter::ConvertTruncs() {
622 LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
623 IRBuilder<> Builder{Ctx};
624
625 for (auto *V : Visited) {
626 if (!isa<TruncInst>(V) || Sources.count(V))
627 continue;
628
629 auto *Trunc = cast<TruncInst>(V);
630 Builder.SetInsertPoint(Trunc);
631 IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
632 IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
633
634 unsigned NumBits = DestTy->getScalarSizeInBits();
636 ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
637 Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
638 if (SrcTy != ExtTy)
639 Masked = Builder.CreateTrunc(Masked, ExtTy);
640
641 if (auto *I = dyn_cast<Instruction>(Masked))
642 NewInsts.insert(I);
643
644 ReplaceAllUsersOfWith(Trunc, Masked);
645 }
646}
647
648void IRPromoter::Mutate() {
649 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
650 << PromotedWidth << "-bits\n");
651
652 // Cache original types of the values that will likely need truncating
653 for (auto *I : Sinks) {
654 if (auto *Call = dyn_cast<CallInst>(I)) {
655 for (Value *Arg : Call->args())
656 TruncTysMap[Call].push_back(Arg->getType());
657 } else if (auto *Switch = dyn_cast<SwitchInst>(I))
658 TruncTysMap[I].push_back(Switch->getCondition()->getType());
659 else {
660 for (unsigned i = 0; i < I->getNumOperands(); ++i)
661 TruncTysMap[I].push_back(I->getOperand(i)->getType());
662 }
663 }
664 for (auto *V : Visited) {
665 if (!isa<TruncInst>(V) || Sources.count(V))
666 continue;
667 auto *Trunc = cast<TruncInst>(V);
668 TruncTysMap[Trunc].push_back(Trunc->getDestTy());
669 }
670
671 // Insert zext instructions between sources and their users.
672 ExtendSources();
673
674 // Promote visited instructions, mutating their types in place.
675 PromoteTree();
676
677 // Convert any truncs, that aren't sources, into AND masks.
678 ConvertTruncs();
679
680 // Insert trunc instructions for use by calls, stores etc...
681 TruncateSinks();
682
683 // Finally, remove unecessary zexts and truncs, delete old instructions and
684 // clear the data structures.
685 Cleanup();
686
687 LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
688}
689
690/// We disallow booleans to make life easier when dealing with icmps but allow
691/// any other integer that fits in a scalar register. Void types are accepted
692/// so we can handle switches.
693bool TypePromotionImpl::isSupportedType(Value *V) {
694 Type *Ty = V->getType();
695
696 // Allow voids and pointers, these won't be promoted.
697 if (Ty->isVoidTy() || Ty->isPointerTy())
698 return true;
699
700 if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
701 cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
702 return false;
703
704 return LessOrEqualTypeSize(V);
705}
706
707/// We accept most instructions, as well as Arguments and ConstantInsts. We
708/// Disallow casts other than zext and truncs and only allow calls if their
709/// return value is zeroext. We don't allow opcodes that can introduce sign
710/// bits.
711bool TypePromotionImpl::isSupportedValue(Value *V) {
712 if (auto *I = dyn_cast<Instruction>(V)) {
713 switch (I->getOpcode()) {
714 default:
715 return isa<BinaryOperator>(I) && isSupportedType(I) &&
717 case Instruction::GetElementPtr:
718 case Instruction::Store:
719 case Instruction::Br:
720 case Instruction::Switch:
721 return true;
722 case Instruction::PHI:
723 case Instruction::Select:
724 case Instruction::Ret:
725 case Instruction::Load:
726 case Instruction::Trunc:
727 return isSupportedType(I);
728 case Instruction::BitCast:
729 return I->getOperand(0)->getType() == I->getType();
730 case Instruction::ZExt:
731 return isSupportedType(I->getOperand(0));
732 case Instruction::ICmp:
733 // Now that we allow small types than TypeSize, only allow icmp of
734 // TypeSize because they will require a trunc to be legalised.
735 // TODO: Allow icmp of smaller types, and calculate at the end
736 // whether the transform would be beneficial.
737 if (isa<PointerType>(I->getOperand(0)->getType()))
738 return true;
739 return EqualTypeSize(I->getOperand(0));
740 case Instruction::Call: {
741 // Special cases for calls as we need to check for zeroext
742 // TODO We should accept calls even if they don't have zeroext, as they
743 // can still be sinks.
744 auto *Call = cast<CallInst>(I);
745 return isSupportedType(Call) &&
746 Call->hasRetAttr(Attribute::AttrKind::ZExt);
747 }
748 }
749 } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
750 return isSupportedType(V);
751 } else if (isa<Argument>(V))
752 return isSupportedType(V);
753
754 return isa<BasicBlock>(V);
755}
756
757/// Check that the type of V would be promoted and that the original type is
758/// smaller than the targeted promoted type. Check that we're not trying to
759/// promote something larger than our base 'TypeSize' type.
760bool TypePromotionImpl::isLegalToPromote(Value *V) {
761 auto *I = dyn_cast<Instruction>(V);
762 if (!I)
763 return true;
764
765 if (SafeToPromote.count(I))
766 return true;
767
768 if (isPromotedResultSafe(I) || isSafeWrap(I)) {
769 SafeToPromote.insert(I);
770 return true;
771 }
772 return false;
773}
774
775bool TypePromotionImpl::TryToPromote(Value *V, unsigned PromotedWidth,
776 const LoopInfo &LI) {
777 Type *OrigTy = V->getType();
779 SafeToPromote.clear();
780 SafeWrap.clear();
781
782 if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
783 return false;
784
785 LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
786 << TypeSize << " bits to " << PromotedWidth << "\n");
787
788 SetVector<Value *> WorkList;
789 SetVector<Value *> Sources;
791 SetVector<Value *> CurrentVisited;
792 WorkList.insert(V);
793
794 // Return true if V was added to the worklist as a supported instruction,
795 // if it was already visited, or if we don't need to explore it (e.g.
796 // pointer values and GEPs), and false otherwise.
797 auto AddLegalInst = [&](Value *V) {
798 if (CurrentVisited.count(V))
799 return true;
800
801 // Ignore GEPs because they don't need promoting and the constant indices
802 // will prevent the transformation.
803 if (isa<GetElementPtrInst>(V))
804 return true;
805
806 if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
807 LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
808 return false;
809 }
810
811 WorkList.insert(V);
812 return true;
813 };
814
815 // Iterate through, and add to, a tree of operands and users in the use-def.
816 while (!WorkList.empty()) {
817 Value *V = WorkList.pop_back_val();
818 if (CurrentVisited.count(V))
819 continue;
820
821 // Ignore non-instructions, other than arguments.
822 if (!isa<Instruction>(V) && !isSource(V))
823 continue;
824
825 // If we've already visited this value from somewhere, bail now because
826 // the tree has already been explored.
827 // TODO: This could limit the transform, ie if we try to promote something
828 // from an i8 and fail first, before trying an i16.
829 if (AllVisited.count(V))
830 return false;
831
832 CurrentVisited.insert(V);
833 AllVisited.insert(V);
834
835 // Calls can be both sources and sinks.
836 if (isSink(V))
837 Sinks.insert(cast<Instruction>(V));
838
839 if (isSource(V))
840 Sources.insert(V);
841
842 if (!isSink(V) && !isSource(V)) {
843 if (auto *I = dyn_cast<Instruction>(V)) {
844 // Visit operands of any instruction visited.
845 for (auto &U : I->operands()) {
846 if (!AddLegalInst(U))
847 return false;
848 }
849 }
850 }
851
852 // Don't visit users of a node which isn't going to be mutated unless its a
853 // source.
854 if (isSource(V) || shouldPromote(V)) {
855 for (Use &U : V->uses()) {
856 if (!AddLegalInst(U.getUser()))
857 return false;
858 }
859 }
860 }
861
862 LLVM_DEBUG({
863 dbgs() << "IR Promotion: Visited nodes:\n";
864 for (auto *I : CurrentVisited)
865 I->dump();
866 });
867
868 unsigned ToPromote = 0;
869 unsigned NonFreeArgs = 0;
870 unsigned NonLoopSources = 0, LoopSinks = 0;
872 for (auto *CV : CurrentVisited) {
873 if (auto *I = dyn_cast<Instruction>(CV))
874 Blocks.insert(I->getParent());
875
876 if (Sources.count(CV)) {
877 if (auto *Arg = dyn_cast<Argument>(CV))
878 if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
879 ++NonFreeArgs;
880 if (!isa<Instruction>(CV) ||
881 !LI.getLoopFor(cast<Instruction>(CV)->getParent()))
882 ++NonLoopSources;
883 continue;
884 }
885
886 if (isa<PHINode>(CV))
887 continue;
888 if (LI.getLoopFor(cast<Instruction>(CV)->getParent()))
889 ++LoopSinks;
890 if (Sinks.count(cast<Instruction>(CV)))
891 continue;
892 ++ToPromote;
893 }
894
895 // DAG optimizations should be able to handle these cases better, especially
896 // for function arguments.
897 if (!isa<PHINode>(V) && !(LoopSinks && NonLoopSources) &&
898 (ToPromote < 2 || (Blocks.size() == 1 && NonFreeArgs > SafeWrap.size())))
899 return false;
900
901 IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
902 SafeWrap, InstsToRemove);
903 Promoter.Mutate();
904 return true;
905}
906
907bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
909 const LoopInfo &LI) {
911 return false;
912
913 LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
914
915 AllVisited.clear();
916 SafeToPromote.clear();
917 SafeWrap.clear();
918 bool MadeChange = false;
919 const DataLayout &DL = F.getParent()->getDataLayout();
920 const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
921 const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
922 RegisterBitWidth =
924 Ctx = &F.getParent()->getContext();
925
926 // Return the preferred integer width of the instruction, or zero if we
927 // shouldn't try.
928 auto GetPromoteWidth = [&](Instruction *I) -> uint32_t {
929 if (!isa<IntegerType>(I->getType()))
930 return 0;
931
932 EVT SrcVT = TLI->getValueType(DL, I->getType());
933 if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
934 return 0;
935
936 if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger)
937 return 0;
938
939 EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
940 if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
941 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
942 << "for promoted type\n");
943 return 0;
944 }
945
946 // TODO: Should we prefer to use RegisterBitWidth instead?
947 return PromotedVT.getFixedSizeInBits();
948 };
949
950 auto BBIsInLoop = [&](BasicBlock *BB) -> bool {
951 for (auto *L : LI)
952 if (L->contains(BB))
953 return true;
954 return false;
955 };
956
957 for (BasicBlock &BB : F) {
958 for (Instruction &I : BB) {
959 if (AllVisited.count(&I))
960 continue;
961
962 if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
963 isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
964 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: "
965 << *I.getOperand(0) << "\n");
966 EVT ZExtVT = TLI->getValueType(DL, I.getType());
967 Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
968 auto PromoteWidth = ZExtVT.getFixedSizeInBits();
969 if (RegisterBitWidth < PromoteWidth) {
970 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target "
971 << "register for ZExt type\n");
972 continue;
973 }
974 MadeChange |= TryToPromote(Phi, PromoteWidth, LI);
975 } else if (auto *ICmp = dyn_cast<ICmpInst>(&I)) {
976 // Search up from icmps to try to promote their operands.
977 // Skip signed or pointer compares
978 if (ICmp->isSigned())
979 continue;
980
981 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
982
983 for (auto &Op : ICmp->operands()) {
984 if (auto *OpI = dyn_cast<Instruction>(Op)) {
985 if (auto PromotedWidth = GetPromoteWidth(OpI)) {
986 MadeChange |= TryToPromote(OpI, PromotedWidth, LI);
987 break;
988 }
989 }
990 }
991 }
992 }
993 if (!InstsToRemove.empty()) {
994 for (auto *I : InstsToRemove)
995 I->eraseFromParent();
996 InstsToRemove.clear();
997 }
998 }
999
1000 AllVisited.clear();
1001 SafeToPromote.clear();
1002 SafeWrap.clear();
1003
1004 return MadeChange;
1005}
1006
1007INITIALIZE_PASS_BEGIN(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
1011INITIALIZE_PASS_END(TypePromotionLegacy, DEBUG_TYPE, PASS_NAME, false, false)
1012
1013char TypePromotionLegacy::ID = 0;
1014
1015bool TypePromotionLegacy::runOnFunction(Function &F) {
1016 if (skipFunction(F))
1017 return false;
1018
1019 auto &TPC = getAnalysis<TargetPassConfig>();
1020 auto *TM = &TPC.getTM<TargetMachine>();
1021 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1022 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1023
1024 TypePromotionImpl TP;
1025 return TP.run(F, TM, TTI, LI);
1026}
1027
1029 return new TypePromotionLegacy();
1030}
1031
1034 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1035 auto &LI = AM.getResult<LoopAnalysis>(F);
1036 TypePromotionImpl TP;
1037
1038 bool Changed = TP.run(F, TM, TTI, LI);
1039 if (!Changed)
1040 return PreservedAnalyses::all();
1041
1044 PA.preserve<LoopAnalysis>();
1045 return PA;
1046}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:505
static bool runOnFunction(Function &F, bool PostInlining)
static const HTTPClientCleanup Cleanup
Definition: HTTPClient.cpp:42
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static bool isPromotedResultSafe(Instruction *I)
Return whether we can safely mutate V's type to ExtTy without having to be concerned with zero extend...
static cl::opt< bool > DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), cl::desc("Disable type promotion pass"))
static bool GenerateSignBits(Instruction *I)
#define PASS_NAME
#define DEBUG_TYPE
Defines an IR pass for type promotion.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1173
bool isNonPositive() const
Determine if this APInt Value is non-positive (<= 0).
Definition: APInt.h:339
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:803
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:446
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:133
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & front() const
Definition: Function.h:801
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2639
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:285
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:591
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:208
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:193
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target-Independent Code Generator Pass Configuration Options.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TypeSize getRegisterBitWidth(RegisterKind K) const
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:188
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
FunctionPass * createTypePromotionLegacyPass()
Create IR Type Promotion pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LLVMAttributeRef wrap(Attribute Attr)
Definition: Attributes.h:282
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:359