LLVM 17.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Analyze 'Val', seeing if it is a simple linear expression.
29/// If so, decompose it, returning some value X, such that Val is
30/// X*Scale+Offset.
31///
32static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
34 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
35 Offset = CI->getZExtValue();
36 Scale = 0;
37 return ConstantInt::get(Val->getType(), 0);
38 }
39
40 if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
41 // Cannot look past anything that might overflow.
42 // We specifically require nuw because we store the Scale in an unsigned
43 // and perform an unsigned divide on it.
44 OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
45 if (OBI && !OBI->hasNoUnsignedWrap()) {
46 Scale = 1;
47 Offset = 0;
48 return Val;
49 }
50
51 if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
52 if (I->getOpcode() == Instruction::Shl) {
53 // This is a value scaled by '1 << the shift amt'.
54 Scale = UINT64_C(1) << RHS->getZExtValue();
55 Offset = 0;
56 return I->getOperand(0);
57 }
58
59 if (I->getOpcode() == Instruction::Mul) {
60 // This value is scaled by 'RHS'.
61 Scale = RHS->getZExtValue();
62 Offset = 0;
63 return I->getOperand(0);
64 }
65
66 if (I->getOpcode() == Instruction::Add) {
67 // We have X+C. Check to see if we really have (X*C2)+C1,
68 // where C1 is divisible by C2.
69 unsigned SubScale;
70 Value *SubVal =
71 decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
72 Offset += RHS->getZExtValue();
73 Scale = SubScale;
74 return SubVal;
75 }
76 }
77 }
78
79 // Otherwise, we can't look past this.
80 Scale = 1;
81 Offset = 0;
82 return Val;
83}
84
85/// If we find a cast of an allocation instruction, try to eliminate the cast by
86/// moving the type information into the alloc.
88 AllocaInst &AI) {
89 PointerType *PTy = cast<PointerType>(CI.getType());
90 // Opaque pointers don't have an element type we could replace with.
91 if (PTy->isOpaque())
92 return nullptr;
93
96
97 // Get the type really allocated and the type casted to.
98 Type *AllocElTy = AI.getAllocatedType();
99 Type *CastElTy = PTy->getNonOpaquePointerElementType();
100 if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
101
102 // This optimisation does not work for cases where the cast type
103 // is scalable and the allocated type is not. This because we need to
104 // know how many times the casted type fits into the allocated type.
105 // For the opposite case where the allocated type is scalable and the
106 // cast type is not this leads to poor code quality due to the
107 // introduction of 'vscale' into the calculations. It seems better to
108 // bail out for this case too until we've done a proper cost-benefit
109 // analysis.
110 bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
111 bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
112 if (AllocIsScalable != CastIsScalable) return nullptr;
113
114 Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
115 Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
116 if (CastElTyAlign < AllocElTyAlign) return nullptr;
117
118 // If the allocation has multiple uses, only promote it if we are strictly
119 // increasing the alignment of the resultant allocation. If we keep it the
120 // same, we open the door to infinite loops of various kinds.
121 if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
122
123 // The alloc and cast types should be either both fixed or both scalable.
124 uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();
125 uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();
126 if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
127
128 // If the allocation has multiple uses, only promote it if we're not
129 // shrinking the amount of memory being allocated.
130 uint64_t AllocElTyStoreSize =
132 uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();
133 if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
134
135 // See if we can satisfy the modulus by pulling a scale out of the array
136 // size argument.
137 unsigned ArraySizeScale;
138 uint64_t ArrayOffset;
139 Value *NumElements = // See if the array size is a decomposable linear expr.
140 decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
141
142 // If we can now satisfy the modulus, by using a non-1 scale, we really can
143 // do the xform.
144 if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
145 (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
146
147 // We don't currently support arrays of scalable types.
148 assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
149
150 unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
151 Value *Amt = nullptr;
152 if (Scale == 1) {
153 Amt = NumElements;
154 } else {
155 Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
156 // Insert before the alloca, not before the cast.
157 Amt = Builder.CreateMul(Amt, NumElements);
158 }
159
160 if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
162 Offset, true);
163 Amt = Builder.CreateAdd(Amt, Off);
164 }
165
166 AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
167 New->setAlignment(AI.getAlign());
168 New->takeName(&AI);
169 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
170 New->setMetadata(LLVMContext::MD_DIAssignID,
171 AI.getMetadata(LLVMContext::MD_DIAssignID));
172
173 replaceAllDbgUsesWith(AI, *New, *New, DT);
174
175 // If the allocation has multiple real uses, insert a cast and change all
176 // things that used it to use the new cast. This will also hack on CI, but it
177 // will die soon.
178 if (!AI.hasOneUse()) {
179 // New is the allocation instruction, pointer typed. AI is the original
180 // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
181 Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
182 replaceInstUsesWith(AI, NewCast);
184 }
185 return replaceInstUsesWith(CI, New);
186}
187
188/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
189/// true for, actually insert the code to evaluate the expression.
191 bool isSigned) {
192 if (Constant *C = dyn_cast<Constant>(V)) {
193 C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
194 // If we got a constantexpr back, try to simplify it with DL info.
195 return ConstantFoldConstant(C, DL, &TLI);
196 }
197
198 // Otherwise, it must be an instruction.
199 Instruction *I = cast<Instruction>(V);
200 Instruction *Res = nullptr;
201 unsigned Opc = I->getOpcode();
202 switch (Opc) {
203 case Instruction::Add:
204 case Instruction::Sub:
205 case Instruction::Mul:
206 case Instruction::And:
207 case Instruction::Or:
208 case Instruction::Xor:
209 case Instruction::AShr:
210 case Instruction::LShr:
211 case Instruction::Shl:
212 case Instruction::UDiv:
213 case Instruction::URem: {
214 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
215 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
217 break;
218 }
219 case Instruction::Trunc:
220 case Instruction::ZExt:
221 case Instruction::SExt:
222 // If the source type of the cast is the type we're trying for then we can
223 // just return the source. There's no need to insert it because it is not
224 // new.
225 if (I->getOperand(0)->getType() == Ty)
226 return I->getOperand(0);
227
228 // Otherwise, must be the same type of cast, so just reinsert a new one.
229 // This also handles the case of zext(trunc(x)) -> zext(x).
230 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
231 Opc == Instruction::SExt);
232 break;
233 case Instruction::Select: {
234 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
235 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
236 Res = SelectInst::Create(I->getOperand(0), True, False);
237 break;
238 }
239 case Instruction::PHI: {
240 PHINode *OPN = cast<PHINode>(I);
242 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
243 Value *V =
245 NPN->addIncoming(V, OPN->getIncomingBlock(i));
246 }
247 Res = NPN;
248 break;
249 }
250 case Instruction::FPToUI:
251 case Instruction::FPToSI:
252 Res = CastInst::Create(
253 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
254 break;
255 case Instruction::Call:
256 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
257 switch (II->getIntrinsicID()) {
258 default:
259 llvm_unreachable("Unsupported call!");
260 case Intrinsic::vscale: {
261 Function *Fn =
262 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
263 Res = CallInst::Create(Fn->getFunctionType(), Fn);
264 break;
265 }
266 }
267 }
268 break;
269 default:
270 // TODO: Can handle more cases here.
271 llvm_unreachable("Unreachable!");
272 }
273
274 Res->takeName(I);
275 return InsertNewInstWith(Res, *I);
276}
277
279InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
280 const CastInst *CI2) {
281 Type *SrcTy = CI1->getSrcTy();
282 Type *MidTy = CI1->getDestTy();
283 Type *DstTy = CI2->getDestTy();
284
285 Instruction::CastOps firstOp = CI1->getOpcode();
286 Instruction::CastOps secondOp = CI2->getOpcode();
287 Type *SrcIntPtrTy =
288 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
289 Type *MidIntPtrTy =
290 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
291 Type *DstIntPtrTy =
292 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
293 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
294 DstTy, SrcIntPtrTy, MidIntPtrTy,
295 DstIntPtrTy);
296
297 // We don't want to form an inttoptr or ptrtoint that converts to an integer
298 // type that differs from the pointer size.
299 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
300 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
301 Res = 0;
302
303 return Instruction::CastOps(Res);
304}
305
306/// Implement the transforms common to all CastInst visitors.
308 Value *Src = CI.getOperand(0);
309 Type *Ty = CI.getType();
310
311 if (auto *SrcC = dyn_cast<Constant>(Src))
312 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
313 return replaceInstUsesWith(CI, Res);
314
315 // Try to eliminate a cast of a cast.
316 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
317 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
318 // The first cast (CSrc) is eliminable so we need to fix up or replace
319 // the second cast (CI). CSrc will then have a good chance of being dead.
320 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
321 // Point debug users of the dying cast to the new one.
322 if (CSrc->hasOneUse())
323 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
324 return Res;
325 }
326 }
327
328 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
329 // We are casting a select. Try to fold the cast into the select if the
330 // select does not have a compare instruction with matching operand types
331 // or the select is likely better done in a narrow type.
332 // Creating a select with operands that are different sizes than its
333 // condition may inhibit other folds and lead to worse codegen.
334 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
335 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
336 (CI.getOpcode() == Instruction::Trunc &&
337 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
338 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
339 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
340 return NV;
341 }
342 }
343 }
344
345 // If we are casting a PHI, then fold the cast into the PHI.
346 if (auto *PN = dyn_cast<PHINode>(Src)) {
347 // Don't do this if it would create a PHI node with an illegal type from a
348 // legal type.
349 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
350 shouldChangeType(CI.getSrcTy(), CI.getType()))
351 if (Instruction *NV = foldOpIntoPhi(CI, PN))
352 return NV;
353 }
354
355 // Canonicalize a unary shuffle after the cast if neither operation changes
356 // the size or element size of the input vector.
357 // TODO: We could allow size-changing ops if that doesn't harm codegen.
358 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
359 Value *X;
360 ArrayRef<int> Mask;
361 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
362 // TODO: Allow scalable vectors?
363 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
364 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
365 if (SrcTy && DestTy &&
366 SrcTy->getNumElements() == DestTy->getNumElements() &&
367 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
368 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
369 return new ShuffleVectorInst(CastX, Mask);
370 }
371 }
372
373 return nullptr;
374}
375
376/// Constants and extensions/truncates from the destination type are always
377/// free to be evaluated in that type. This is a helper for canEvaluate*.
378static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
379 if (isa<Constant>(V))
380 return true;
381 Value *X;
382 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
383 X->getType() == Ty)
384 return true;
385
386 return false;
387}
388
389/// Filter out values that we can not evaluate in the destination type for free.
390/// This is a helper for canEvaluate*.
391static bool canNotEvaluateInType(Value *V, Type *Ty) {
392 assert(!isa<Constant>(V) && "Constant should already be handled.");
393 if (!isa<Instruction>(V))
394 return true;
395 // We don't extend or shrink something that has multiple uses -- doing so
396 // would require duplicating the instruction which isn't profitable.
397 if (!V->hasOneUse())
398 return true;
399
400 return false;
401}
402
403/// Return true if we can evaluate the specified expression tree as type Ty
404/// instead of its larger type, and arrive with the same value.
405/// This is used by code that tries to eliminate truncates.
406///
407/// Ty will always be a type smaller than V. We should return true if trunc(V)
408/// can be computed by computing V in the smaller type. If V is an instruction,
409/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
410/// makes sense if x and y can be efficiently truncated.
411///
412/// This function works on both vectors and scalars.
413///
415 Instruction *CxtI) {
416 if (canAlwaysEvaluateInType(V, Ty))
417 return true;
418 if (canNotEvaluateInType(V, Ty))
419 return false;
420
421 auto *I = cast<Instruction>(V);
422 Type *OrigTy = V->getType();
423 switch (I->getOpcode()) {
424 case Instruction::Add:
425 case Instruction::Sub:
426 case Instruction::Mul:
427 case Instruction::And:
428 case Instruction::Or:
429 case Instruction::Xor:
430 // These operators can all arbitrarily be extended or truncated.
431 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
432 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
433
434 case Instruction::UDiv:
435 case Instruction::URem: {
436 // UDiv and URem can be truncated if all the truncated bits are zero.
437 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
439 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
440 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
441 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
442 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
443 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
444 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
445 }
446 break;
447 }
448 case Instruction::Shl: {
449 // If we are truncating the result of this SHL, and if it's a shift of an
450 // inrange amount, we can always perform a SHL in a smaller type.
452 KnownBits AmtKnownBits =
453 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
454 if (AmtKnownBits.getMaxValue().ult(BitWidth))
455 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
456 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
457 break;
458 }
459 case Instruction::LShr: {
460 // If this is a truncate of a logical shr, we can truncate it to a smaller
461 // lshr iff we know that the bits we would otherwise be shifting in are
462 // already zeros.
463 // TODO: It is enough to check that the bits we would be shifting in are
464 // zero - use AmtKnownBits.getMaxValue().
465 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
467 KnownBits AmtKnownBits =
468 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
469 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
470 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
471 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
472 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
473 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
474 }
475 break;
476 }
477 case Instruction::AShr: {
478 // If this is a truncate of an arithmetic shr, we can truncate it to a
479 // smaller ashr iff we know that all the bits from the sign bit of the
480 // original type and the sign bit of the truncate type are similar.
481 // TODO: It is enough to check that the bits we would be shifting in are
482 // similar to sign bit of the truncate type.
483 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
485 KnownBits AmtKnownBits =
486 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
487 unsigned ShiftedBits = OrigBitWidth - BitWidth;
488 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
489 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
490 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
491 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
492 break;
493 }
494 case Instruction::Trunc:
495 // trunc(trunc(x)) -> trunc(x)
496 return true;
497 case Instruction::ZExt:
498 case Instruction::SExt:
499 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
500 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
501 return true;
502 case Instruction::Select: {
503 SelectInst *SI = cast<SelectInst>(I);
504 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
505 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
506 }
507 case Instruction::PHI: {
508 // We can change a phi if we can change all operands. Note that we never
509 // get into trouble with cyclic PHIs here because we only consider
510 // instructions with a single use.
511 PHINode *PN = cast<PHINode>(I);
512 for (Value *IncValue : PN->incoming_values())
513 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
514 return false;
515 return true;
516 }
517 case Instruction::FPToUI:
518 case Instruction::FPToSI: {
519 // If the integer type can hold the max FP value, it is safe to cast
520 // directly to that type. Otherwise, we may create poison via overflow
521 // that did not exist in the original code.
522 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
523 const fltSemantics &Semantics = InputTy->getFltSemantics();
524 uint32_t MinBitWidth =
526 I->getOpcode() == Instruction::FPToSI);
527 return Ty->getScalarSizeInBits() >= MinBitWidth;
528 }
529 default:
530 // TODO: Can handle more cases here.
531 break;
532 }
533
534 return false;
535}
536
537/// Given a vector that is bitcast to an integer, optionally logically
538/// right-shifted, and truncated, convert it to an extractelement.
539/// Example (big endian):
540/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
541/// --->
542/// extractelement <4 x i32> %X, 1
544 InstCombinerImpl &IC) {
545 Value *TruncOp = Trunc.getOperand(0);
546 Type *DestType = Trunc.getType();
547 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
548 return nullptr;
549
550 Value *VecInput = nullptr;
551 ConstantInt *ShiftVal = nullptr;
552 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
553 m_LShr(m_BitCast(m_Value(VecInput)),
554 m_ConstantInt(ShiftVal)))) ||
555 !isa<VectorType>(VecInput->getType()))
556 return nullptr;
557
558 VectorType *VecType = cast<VectorType>(VecInput->getType());
559 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
560 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
561 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
562
563 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
564 return nullptr;
565
566 // If the element type of the vector doesn't match the result type,
567 // bitcast it to a vector type that we can extract from.
568 unsigned NumVecElts = VecWidth / DestWidth;
569 if (VecType->getElementType() != DestType) {
570 VecType = FixedVectorType::get(DestType, NumVecElts);
571 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
572 }
573
574 unsigned Elt = ShiftAmount / DestWidth;
575 if (IC.getDataLayout().isBigEndian())
576 Elt = NumVecElts - 1 - Elt;
577
578 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
579}
580
581/// Funnel/Rotate left/right may occur in a wider type than necessary because of
582/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
583Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
584 assert((isa<VectorType>(Trunc.getSrcTy()) ||
585 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
586 "Don't narrow to an illegal scalar type");
587
588 // Bail out on strange types. It is possible to handle some of these patterns
589 // even with non-power-of-2 sizes, but it is not a likely scenario.
590 Type *DestTy = Trunc.getType();
591 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
592 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
593 if (!isPowerOf2_32(NarrowWidth))
594 return nullptr;
595
596 // First, find an or'd pair of opposite shifts:
597 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
598 BinaryOperator *Or0, *Or1;
599 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
600 return nullptr;
601
602 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
603 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
604 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
605 Or0->getOpcode() == Or1->getOpcode())
606 return nullptr;
607
608 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
609 if (Or0->getOpcode() == BinaryOperator::LShr) {
610 std::swap(Or0, Or1);
611 std::swap(ShVal0, ShVal1);
612 std::swap(ShAmt0, ShAmt1);
613 }
614 assert(Or0->getOpcode() == BinaryOperator::Shl &&
615 Or1->getOpcode() == BinaryOperator::LShr &&
616 "Illegal or(shift,shift) pair");
617
618 // Match the shift amount operands for a funnel/rotate pattern. This always
619 // matches a subtraction on the R operand.
620 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
621 // The shift amounts may add up to the narrow bit width:
622 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
623 // If this is a funnel shift (different operands are shifted), then the
624 // shift amount can not over-shift (create poison) in the narrow type.
625 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
626 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
627 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
628 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
629 return L;
630
631 // The following patterns currently only work for rotation patterns.
632 // TODO: Add more general funnel-shift compatible patterns.
633 if (ShVal0 != ShVal1)
634 return nullptr;
635
636 // The shift amount may be masked with negation:
637 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
638 Value *X;
639 unsigned Mask = Width - 1;
640 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
642 return X;
643
644 // Same as above, but the shift amount may be extended after masking:
645 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
647 return X;
648
649 return nullptr;
650 };
651
652 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
653 bool IsFshl = true; // Sub on LSHR.
654 if (!ShAmt) {
655 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
656 IsFshl = false; // Sub on SHL.
657 }
658 if (!ShAmt)
659 return nullptr;
660
661 // The right-shifted value must have high zeros in the wide type (for example
662 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
663 // truncated, so those do not matter.
664 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
665 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
666 return nullptr;
667
668 // We have an unnecessarily wide rotate!
669 // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
670 // Narrow the inputs and convert to funnel shift intrinsic:
671 // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
672 Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
673 Value *X, *Y;
674 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
675 if (ShVal0 != ShVal1)
676 Y = Builder.CreateTrunc(ShVal1, DestTy);
677 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
678 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
679 return CallInst::Create(F, {X, Y, NarrowShAmt});
680}
681
682/// Try to narrow the width of math or bitwise logic instructions by pulling a
683/// truncate ahead of binary operators.
684Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
685 Type *SrcTy = Trunc.getSrcTy();
686 Type *DestTy = Trunc.getType();
687 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
688 unsigned DestWidth = DestTy->getScalarSizeInBits();
689
690 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
691 return nullptr;
692
693 BinaryOperator *BinOp;
694 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
695 return nullptr;
696
697 Value *BinOp0 = BinOp->getOperand(0);
698 Value *BinOp1 = BinOp->getOperand(1);
699 switch (BinOp->getOpcode()) {
700 case Instruction::And:
701 case Instruction::Or:
702 case Instruction::Xor:
703 case Instruction::Add:
704 case Instruction::Sub:
705 case Instruction::Mul: {
706 Constant *C;
707 if (match(BinOp0, m_Constant(C))) {
708 // trunc (binop C, X) --> binop (trunc C', X)
709 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
710 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
711 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
712 }
713 if (match(BinOp1, m_Constant(C))) {
714 // trunc (binop X, C) --> binop (trunc X, C')
715 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
716 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
717 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
718 }
719 Value *X;
720 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
721 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
722 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
723 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
724 }
725 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
726 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
727 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
728 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
729 }
730 break;
731 }
732 case Instruction::LShr:
733 case Instruction::AShr: {
734 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
735 Value *A;
736 Constant *C;
737 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
738 unsigned MaxShiftAmt = SrcWidth - DestWidth;
739 // If the shift is small enough, all zero/sign bits created by the shift
740 // are removed by the trunc.
742 APInt(SrcWidth, MaxShiftAmt)))) {
743 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
744 bool IsExact = OldShift->isExact();
745 auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
746 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
747 Value *Shift =
748 OldShift->getOpcode() == Instruction::AShr
749 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
750 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
751 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
752 }
753 }
754 break;
755 }
756 default: break;
757 }
758
759 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
760 return NarrowOr;
761
762 return nullptr;
763}
764
765/// Try to narrow the width of a splat shuffle. This could be generalized to any
766/// shuffle with a constant operand, but we limit the transform to avoid
767/// creating a shuffle type that targets may not be able to lower effectively.
769 InstCombiner::BuilderTy &Builder) {
770 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
771 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
772 all_equal(Shuf->getShuffleMask()) &&
773 Shuf->getType() == Shuf->getOperand(0)->getType()) {
774 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
775 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
776 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
777 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
778 }
779
780 return nullptr;
781}
782
783/// Try to narrow the width of an insert element. This could be generalized for
784/// any vector constant, but we limit the transform to insertion into undef to
785/// avoid potential backend problems from unsupported insertion widths. This
786/// could also be extended to handle the case of inserting a scalar constant
787/// into a vector variable.
789 InstCombiner::BuilderTy &Builder) {
790 Instruction::CastOps Opcode = Trunc.getOpcode();
791 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
792 "Unexpected instruction for shrinking");
793
794 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
795 if (!InsElt || !InsElt->hasOneUse())
796 return nullptr;
797
798 Type *DestTy = Trunc.getType();
799 Type *DestScalarTy = DestTy->getScalarType();
800 Value *VecOp = InsElt->getOperand(0);
801 Value *ScalarOp = InsElt->getOperand(1);
802 Value *Index = InsElt->getOperand(2);
803
804 if (match(VecOp, m_Undef())) {
805 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
806 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
807 UndefValue *NarrowUndef = UndefValue::get(DestTy);
808 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
809 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
810 }
811
812 return nullptr;
813}
814
816 if (Instruction *Result = commonCastTransforms(Trunc))
817 return Result;
818
819 Value *Src = Trunc.getOperand(0);
820 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
821 unsigned DestWidth = DestTy->getScalarSizeInBits();
822 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
823
824 // Attempt to truncate the entire input expression tree to the destination
825 // type. Only do this if the dest type is a simple type, don't convert the
826 // expression tree to something weird like i93 unless the source is also
827 // strange.
828 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
829 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
830
831 // If this cast is a truncate, evaluting in a different type always
832 // eliminates the cast, so it is always a win.
834 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
835 " to avoid cast: "
836 << Trunc << '\n');
837 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
838 assert(Res->getType() == DestTy);
839 return replaceInstUsesWith(Trunc, Res);
840 }
841
842 // For integer types, check if we can shorten the entire input expression to
843 // DestWidth * 2, which won't allow removing the truncate, but reducing the
844 // width may enable further optimizations, e.g. allowing for larger
845 // vectorization factors.
846 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
847 if (DestWidth * 2 < SrcWidth) {
848 auto *NewDestTy = DestITy->getExtendedType();
849 if (shouldChangeType(SrcTy, NewDestTy) &&
850 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
852 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
853 " to reduce the width of operand of"
854 << Trunc << '\n');
855 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
856 return new TruncInst(Res, DestTy);
857 }
858 }
859 }
860
861 // Test if the trunc is the user of a select which is part of a
862 // minimum or maximum operation. If so, don't do any more simplification.
863 // Even simplifying demanded bits can break the canonical form of a
864 // min/max.
865 Value *LHS, *RHS;
866 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
867 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
868 return nullptr;
869
870 // See if we can simplify any instructions used by the input whose sole
871 // purpose is to compute bits we don't care about.
873 return &Trunc;
874
875 if (DestWidth == 1) {
876 Value *Zero = Constant::getNullValue(SrcTy);
877 if (DestTy->isIntegerTy()) {
878 // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
879 // TODO: We canonicalize to more instructions here because we are probably
880 // lacking equivalent analysis for trunc relative to icmp. There may also
881 // be codegen concerns. If those trunc limitations were removed, we could
882 // remove this transform.
883 Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
884 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
885 }
886
887 // For vectors, we do not canonicalize all truncs to icmp, so optimize
888 // patterns that would be covered within visitICmpInst.
889 Value *X;
890 Constant *C;
891 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
892 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
893 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
894 Constant *MaskC = ConstantExpr::getShl(One, C);
895 Value *And = Builder.CreateAnd(X, MaskC);
896 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
897 }
899 m_Deferred(X))))) {
900 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
901 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
902 Constant *MaskC = ConstantExpr::getShl(One, C);
903 MaskC = ConstantExpr::getOr(MaskC, One);
904 Value *And = Builder.CreateAnd(X, MaskC);
905 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
906 }
907 }
908
909 Value *A, *B;
910 Constant *C;
911 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
912 unsigned AWidth = A->getType()->getScalarSizeInBits();
913 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
914 auto *OldSh = cast<Instruction>(Src);
915 bool IsExact = OldSh->isExact();
916
917 // If the shift is small enough, all zero bits created by the shift are
918 // removed by the trunc.
920 APInt(SrcWidth, MaxShiftAmt)))) {
921 auto GetNewShAmt = [&](unsigned Width) {
922 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
923 Constant *Cmp =
925 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
926 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
927 DL);
928 };
929
930 // trunc (lshr (sext A), C) --> ashr A, C
931 if (A->getType() == DestTy) {
932 Constant *ShAmt = GetNewShAmt(DestWidth);
933 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
934 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
935 : BinaryOperator::CreateAShr(A, ShAmt);
936 }
937 // The types are mismatched, so create a cast after shifting:
938 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
939 if (Src->hasOneUse()) {
940 Constant *ShAmt = GetNewShAmt(AWidth);
941 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
942 return CastInst::CreateIntegerCast(Shift, DestTy, true);
943 }
944 }
945 // TODO: Mask high bits with 'and'.
946 }
947
948 if (Instruction *I = narrowBinOp(Trunc))
949 return I;
950
952 return I;
953
954 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
955 return I;
956
957 if (Src->hasOneUse() &&
958 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
959 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
960 // dest type is native and cst < dest size.
961 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
962 !match(A, m_Shr(m_Value(), m_Constant()))) {
963 // Skip shifts of shift by constants. It undoes a combine in
964 // FoldShiftByConstant and is the extend in reg pattern.
965 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
966 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
967 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
968 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
969 ConstantExpr::getTrunc(C, DestTy));
970 }
971 }
972 }
973
974 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
975 return I;
976
977 // Whenever an element is extracted from a vector, and then truncated,
978 // canonicalize by converting it to a bitcast followed by an
979 // extractelement.
980 //
981 // Example (little endian):
982 // trunc (extractelement <4 x i64> %X, 0) to i32
983 // --->
984 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
985 Value *VecOp;
986 ConstantInt *Cst;
987 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
988 auto *VecOpTy = cast<VectorType>(VecOp->getType());
989 auto VecElts = VecOpTy->getElementCount();
990
991 // A badly fit destination size would result in an invalid cast.
992 if (SrcWidth % DestWidth == 0) {
993 uint64_t TruncRatio = SrcWidth / DestWidth;
994 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
995 uint64_t VecOpIdx = Cst->getZExtValue();
996 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
997 : VecOpIdx * TruncRatio;
998 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
999 "overflow 32-bits");
1000
1001 auto *BitCastTo =
1002 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
1003 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
1004 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
1005 }
1006 }
1007
1008 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1009 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
1010 m_Value(B))))) {
1011 unsigned AWidth = A->getType()->getScalarSizeInBits();
1012 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1013 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1014 Value *NarrowCtlz =
1015 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1016 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1017 }
1018 }
1019
1020 if (match(Src, m_VScale())) {
1021 if (Trunc.getFunction() &&
1022 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1023 Attribute Attr =
1024 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1025 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1026 if (Log2_32(*MaxVScale) < DestWidth) {
1027 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1028 return replaceInstUsesWith(Trunc, VScale);
1029 }
1030 }
1031 }
1032 }
1033
1034 return nullptr;
1035}
1036
1037Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1038 ZExtInst &Zext) {
1039 // If we are just checking for a icmp eq of a single bit and zext'ing it
1040 // to an integer, then shift the bit to the appropriate place and then
1041 // cast to integer to avoid the comparison.
1042
1043 // FIXME: This set of transforms does not check for extra uses and/or creates
1044 // an extra instruction (an optional final cast is not included
1045 // in the transform comments). We may also want to favor icmp over
1046 // shifts in cases of equal instructions because icmp has better
1047 // analysis in general (invert the transform).
1048
1049 const APInt *Op1CV;
1050 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1051
1052 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1053 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1054 Value *In = Cmp->getOperand(0);
1055 Value *Sh = ConstantInt::get(In->getType(),
1056 In->getType()->getScalarSizeInBits() - 1);
1057 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1058 if (In->getType() != Zext.getType())
1059 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1060
1061 return replaceInstUsesWith(Zext, In);
1062 }
1063
1064 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1065 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1066 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1067 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1068 if (Op1CV->isZero() && Cmp->isEquality() &&
1069 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1070 Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
1071 // If Op1C some other power of two, convert:
1072 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
1073
1074 // Exactly 1 possible 1? But not the high-bit because that is
1075 // canonicalized to this form.
1076 APInt KnownZeroMask(~Known.Zero);
1077 if (KnownZeroMask.isPowerOf2() &&
1078 (Zext.getType()->getScalarSizeInBits() !=
1079 KnownZeroMask.logBase2() + 1)) {
1080 uint32_t ShAmt = KnownZeroMask.logBase2();
1081 Value *In = Cmp->getOperand(0);
1082 if (ShAmt) {
1083 // Perform a logical shr by shiftamt.
1084 // Insert the shift to put the result in the low bit.
1085 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1086 In->getName() + ".lobit");
1087 }
1088
1089 // Toggle the low bit for "X == 0".
1090 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1091 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1092
1093 if (Zext.getType() == In->getType())
1094 return replaceInstUsesWith(Zext, In);
1095
1096 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1097 return replaceInstUsesWith(Zext, IntCast);
1098 }
1099 }
1100 }
1101
1102 if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1103 // Test if a bit is clear/set using a shifted-one mask:
1104 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1105 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1106 Value *X, *ShAmt;
1107 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1108 match(Cmp->getOperand(0),
1109 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1110 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1111 X = Builder.CreateNot(X);
1112 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1113 Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1114 return replaceInstUsesWith(Zext, And1);
1115 }
1116 }
1117
1118 return nullptr;
1119}
1120
1121/// Determine if the specified value can be computed in the specified wider type
1122/// and produce the same low bits. If not, return false.
1123///
1124/// If this function returns true, it can also return a non-zero number of bits
1125/// (in BitsToClear) which indicates that the value it computes is correct for
1126/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1127/// out. For example, to promote something like:
1128///
1129/// %B = trunc i64 %A to i32
1130/// %C = lshr i32 %B, 8
1131/// %E = zext i32 %C to i64
1132///
1133/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1134/// set to 8 to indicate that the promoted value needs to have bits 24-31
1135/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1136/// clear the top bits anyway, doing this has no extra cost.
1137///
1138/// This function works on both vectors and scalars.
1139static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1140 InstCombinerImpl &IC, Instruction *CxtI) {
1141 BitsToClear = 0;
1142 if (canAlwaysEvaluateInType(V, Ty))
1143 return true;
1144 if (canNotEvaluateInType(V, Ty))
1145 return false;
1146
1147 auto *I = cast<Instruction>(V);
1148 unsigned Tmp;
1149 switch (I->getOpcode()) {
1150 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1151 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1152 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1153 return true;
1154 case Instruction::And:
1155 case Instruction::Or:
1156 case Instruction::Xor:
1157 case Instruction::Add:
1158 case Instruction::Sub:
1159 case Instruction::Mul:
1160 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1161 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1162 return false;
1163 // These can all be promoted if neither operand has 'bits to clear'.
1164 if (BitsToClear == 0 && Tmp == 0)
1165 return true;
1166
1167 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1168 // other side, BitsToClear is ok.
1169 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1170 // We use MaskedValueIsZero here for generality, but the case we care
1171 // about the most is constant RHS.
1172 unsigned VSize = V->getType()->getScalarSizeInBits();
1173 if (IC.MaskedValueIsZero(I->getOperand(1),
1174 APInt::getHighBitsSet(VSize, BitsToClear),
1175 0, CxtI)) {
1176 // If this is an And instruction and all of the BitsToClear are
1177 // known to be zero we can reset BitsToClear.
1178 if (I->getOpcode() == Instruction::And)
1179 BitsToClear = 0;
1180 return true;
1181 }
1182 }
1183
1184 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1185 return false;
1186
1187 case Instruction::Shl: {
1188 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1189 // upper bits we can reduce BitsToClear by the shift amount.
1190 const APInt *Amt;
1191 if (match(I->getOperand(1), m_APInt(Amt))) {
1192 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1193 return false;
1194 uint64_t ShiftAmt = Amt->getZExtValue();
1195 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1196 return true;
1197 }
1198 return false;
1199 }
1200 case Instruction::LShr: {
1201 // We can promote lshr(x, cst) if we can promote x. This requires the
1202 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1203 const APInt *Amt;
1204 if (match(I->getOperand(1), m_APInt(Amt))) {
1205 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1206 return false;
1207 BitsToClear += Amt->getZExtValue();
1208 if (BitsToClear > V->getType()->getScalarSizeInBits())
1209 BitsToClear = V->getType()->getScalarSizeInBits();
1210 return true;
1211 }
1212 // Cannot promote variable LSHR.
1213 return false;
1214 }
1215 case Instruction::Select:
1216 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1217 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1218 // TODO: If important, we could handle the case when the BitsToClear are
1219 // known zero in the disagreeing side.
1220 Tmp != BitsToClear)
1221 return false;
1222 return true;
1223
1224 case Instruction::PHI: {
1225 // We can change a phi if we can change all operands. Note that we never
1226 // get into trouble with cyclic PHIs here because we only consider
1227 // instructions with a single use.
1228 PHINode *PN = cast<PHINode>(I);
1229 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1230 return false;
1231 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1232 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1233 // TODO: If important, we could handle the case when the BitsToClear
1234 // are known zero in the disagreeing input.
1235 Tmp != BitsToClear)
1236 return false;
1237 return true;
1238 }
1239 case Instruction::Call:
1240 // llvm.vscale() can always be executed in larger type, because the
1241 // value is automatically zero-extended.
1242 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1243 if (II->getIntrinsicID() == Intrinsic::vscale)
1244 return true;
1245 return false;
1246 default:
1247 // TODO: Can handle more cases here.
1248 return false;
1249 }
1250}
1251
1253 // If this zero extend is only used by a truncate, let the truncate be
1254 // eliminated before we try to optimize this zext.
1255 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1256 !isa<Constant>(Zext.getOperand(0)))
1257 return nullptr;
1258
1259 // If one of the common conversion will work, do it.
1260 if (Instruction *Result = commonCastTransforms(Zext))
1261 return Result;
1262
1263 Value *Src = Zext.getOperand(0);
1264 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1265
1266 // Try to extend the entire expression tree to the wide destination type.
1267 unsigned BitsToClear;
1268 if (shouldChangeType(SrcTy, DestTy) &&
1269 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1270 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1271 "Can't clear more bits than in SrcTy");
1272
1273 // Okay, we can transform this! Insert the new expression now.
1274 LLVM_DEBUG(
1275 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1276 " to avoid zero extend: "
1277 << Zext << '\n');
1278 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1279 assert(Res->getType() == DestTy);
1280
1281 // Preserve debug values referring to Src if the zext is its last use.
1282 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1283 if (SrcOp->hasOneUse())
1284 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1285
1286 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1287 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1288
1289 // If the high bits are already filled with zeros, just replace this
1290 // cast with the result.
1291 if (MaskedValueIsZero(Res,
1292 APInt::getHighBitsSet(DestBitSize,
1293 DestBitSize - SrcBitsKept),
1294 0, &Zext))
1295 return replaceInstUsesWith(Zext, Res);
1296
1297 // We need to emit an AND to clear the high bits.
1299 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1300 return BinaryOperator::CreateAnd(Res, C);
1301 }
1302
1303 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1304 // types and if the sizes are just right we can convert this into a logical
1305 // 'and' which will be much cheaper than the pair of casts.
1306 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1307 // TODO: Subsume this into EvaluateInDifferentType.
1308
1309 // Get the sizes of the types involved. We know that the intermediate type
1310 // will be smaller than A or C, but don't know the relation between A and C.
1311 Value *A = CSrc->getOperand(0);
1312 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1313 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1314 unsigned DstSize = DestTy->getScalarSizeInBits();
1315 // If we're actually extending zero bits, then if
1316 // SrcSize < DstSize: zext(a & mask)
1317 // SrcSize == DstSize: a & mask
1318 // SrcSize > DstSize: trunc(a) & mask
1319 if (SrcSize < DstSize) {
1320 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1321 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1322 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1323 return new ZExtInst(And, DestTy);
1324 }
1325
1326 if (SrcSize == DstSize) {
1327 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1328 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1329 AndValue));
1330 }
1331 if (SrcSize > DstSize) {
1332 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1333 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1334 return BinaryOperator::CreateAnd(Trunc,
1335 ConstantInt::get(Trunc->getType(),
1336 AndValue));
1337 }
1338 }
1339
1340 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1341 return transformZExtICmp(Cmp, Zext);
1342
1343 // zext(trunc(X) & C) -> (X & zext(C)).
1344 Constant *C;
1345 Value *X;
1346 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1347 X->getType() == DestTy)
1348 return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));
1349
1350 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1351 Value *And;
1352 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1354 X->getType() == DestTy) {
1355 Constant *ZC = ConstantExpr::getZExt(C, DestTy);
1356 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1357 }
1358
1359 // If we are truncating, masking, and then zexting back to the original type,
1360 // that's just a mask. This is not handled by canEvaluateZextd if the
1361 // intermediate values have extra uses. This could be generalized further for
1362 // a non-constant mask operand.
1363 // zext (and (trunc X), C) --> and X, (zext C)
1364 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1365 X->getType() == DestTy) {
1366 Constant *ZextC = ConstantExpr::getZExt(C, DestTy);
1367 return BinaryOperator::CreateAnd(X, ZextC);
1368 }
1369
1370 if (match(Src, m_VScale())) {
1371 if (Zext.getFunction() &&
1372 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1373 Attribute Attr =
1374 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1375 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1376 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1377 if (Log2_32(*MaxVScale) < TypeWidth) {
1378 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1379 return replaceInstUsesWith(Zext, VScale);
1380 }
1381 }
1382 }
1383 }
1384
1385 return nullptr;
1386}
1387
1388/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1389Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1390 SExtInst &Sext) {
1391 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1392 ICmpInst::Predicate Pred = Cmp->getPredicate();
1393
1394 // Don't bother if Op1 isn't of vector or integer type.
1395 if (!Op1->getType()->isIntOrIntVectorTy())
1396 return nullptr;
1397
1398 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1399 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1400 Value *Sh = ConstantInt::get(Op0->getType(),
1401 Op0->getType()->getScalarSizeInBits() - 1);
1402 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1403 if (In->getType() != Sext.getType())
1404 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1405
1406 return replaceInstUsesWith(Sext, In);
1407 }
1408
1409 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1410 // If we know that only one bit of the LHS of the icmp can be set and we
1411 // have an equality comparison with zero or a power of 2, we can transform
1412 // the icmp and sext into bitwise/integer operations.
1413 if (Cmp->hasOneUse() &&
1414 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1415 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1416
1417 APInt KnownZeroMask(~Known.Zero);
1418 if (KnownZeroMask.isPowerOf2()) {
1419 Value *In = Cmp->getOperand(0);
1420
1421 // If the icmp tests for a known zero bit we can constant fold it.
1422 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1423 Value *V = Pred == ICmpInst::ICMP_NE ?
1425 ConstantInt::getNullValue(Sext.getType());
1426 return replaceInstUsesWith(Sext, V);
1427 }
1428
1429 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1430 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1431 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1432 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1433 // Perform a right shift to place the desired bit in the LSB.
1434 if (ShiftAmt)
1435 In = Builder.CreateLShr(In,
1436 ConstantInt::get(In->getType(), ShiftAmt));
1437
1438 // At this point "In" is either 1 or 0. Subtract 1 to turn
1439 // {1, 0} -> {0, -1}.
1440 In = Builder.CreateAdd(In,
1441 ConstantInt::getAllOnesValue(In->getType()),
1442 "sext");
1443 } else {
1444 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1445 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1446 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1447 // Perform a left shift to place the desired bit in the MSB.
1448 if (ShiftAmt)
1449 In = Builder.CreateShl(In,
1450 ConstantInt::get(In->getType(), ShiftAmt));
1451
1452 // Distribute the bit over the whole bit width.
1453 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1454 KnownZeroMask.getBitWidth() - 1), "sext");
1455 }
1456
1457 if (Sext.getType() == In->getType())
1458 return replaceInstUsesWith(Sext, In);
1459 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1460 }
1461 }
1462 }
1463
1464 return nullptr;
1465}
1466
1467/// Return true if we can take the specified value and return it as type Ty
1468/// without inserting any new casts and without changing the value of the common
1469/// low bits. This is used by code that tries to promote integer operations to
1470/// a wider types will allow us to eliminate the extension.
1471///
1472/// This function works on both vectors and scalars.
1473///
1474static bool canEvaluateSExtd(Value *V, Type *Ty) {
1475 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1476 "Can't sign extend type to a smaller type");
1477 if (canAlwaysEvaluateInType(V, Ty))
1478 return true;
1479 if (canNotEvaluateInType(V, Ty))
1480 return false;
1481
1482 auto *I = cast<Instruction>(V);
1483 switch (I->getOpcode()) {
1484 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1485 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1486 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1487 return true;
1488 case Instruction::And:
1489 case Instruction::Or:
1490 case Instruction::Xor:
1491 case Instruction::Add:
1492 case Instruction::Sub:
1493 case Instruction::Mul:
1494 // These operators can all arbitrarily be extended if their inputs can.
1495 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1496 canEvaluateSExtd(I->getOperand(1), Ty);
1497
1498 //case Instruction::Shl: TODO
1499 //case Instruction::LShr: TODO
1500
1501 case Instruction::Select:
1502 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1503 canEvaluateSExtd(I->getOperand(2), Ty);
1504
1505 case Instruction::PHI: {
1506 // We can change a phi if we can change all operands. Note that we never
1507 // get into trouble with cyclic PHIs here because we only consider
1508 // instructions with a single use.
1509 PHINode *PN = cast<PHINode>(I);
1510 for (Value *IncValue : PN->incoming_values())
1511 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1512 return true;
1513 }
1514 default:
1515 // TODO: Can handle more cases here.
1516 break;
1517 }
1518
1519 return false;
1520}
1521
1523 // If this sign extend is only used by a truncate, let the truncate be
1524 // eliminated before we try to optimize this sext.
1525 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1526 return nullptr;
1527
1528 if (Instruction *I = commonCastTransforms(Sext))
1529 return I;
1530
1531 Value *Src = Sext.getOperand(0);
1532 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1533 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1534 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1535
1536 // If the value being extended is zero or positive, use a zext instead.
1537 if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))
1538 return CastInst::Create(Instruction::ZExt, Src, DestTy);
1539
1540 // Try to extend the entire expression tree to the wide destination type.
1541 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1542 // Okay, we can transform this! Insert the new expression now.
1543 LLVM_DEBUG(
1544 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1545 " to avoid sign extend: "
1546 << Sext << '\n');
1547 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1548 assert(Res->getType() == DestTy);
1549
1550 // If the high bits are already filled with sign bit, just replace this
1551 // cast with the result.
1552 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1553 return replaceInstUsesWith(Sext, Res);
1554
1555 // We need to emit a shl + ashr to do the sign extend.
1556 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1557 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1558 ShAmt);
1559 }
1560
1561 Value *X;
1562 if (match(Src, m_Trunc(m_Value(X)))) {
1563 // If the input has more sign bits than bits truncated, then convert
1564 // directly to final type.
1565 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1566 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1567 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1568
1569 // If input is a trunc from the destination type, then convert into shifts.
1570 if (Src->hasOneUse() && X->getType() == DestTy) {
1571 // sext (trunc X) --> ashr (shl X, C), C
1572 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1573 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1574 }
1575
1576 // If we are replacing shifted-in high zero bits with sign bits, convert
1577 // the logic shift to arithmetic shift and eliminate the cast to
1578 // intermediate type:
1579 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1580 Value *Y;
1581 if (Src->hasOneUse() &&
1583 m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1584 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1585 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1586 }
1587 }
1588
1589 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1590 return transformSExtICmp(Cmp, Sext);
1591
1592 // If the input is a shl/ashr pair of a same constant, then this is a sign
1593 // extension from a smaller value. If we could trust arbitrary bitwidth
1594 // integers, we could turn this into a truncate to the smaller bit and then
1595 // use a sext for the whole extension. Since we don't, look deeper and check
1596 // for a truncate. If the source and dest are the same type, eliminate the
1597 // trunc and extend and just do shifts. For example, turn:
1598 // %a = trunc i32 %i to i8
1599 // %b = shl i8 %a, C
1600 // %c = ashr i8 %b, C
1601 // %d = sext i8 %c to i32
1602 // into:
1603 // %a = shl i32 %i, 32-(8-C)
1604 // %d = ashr i32 %a, 32-(8-C)
1605 Value *A = nullptr;
1606 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1607 Constant *BA = nullptr, *CA = nullptr;
1608 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1609 m_Constant(CA))) &&
1610 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1611 Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1612 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1613 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1614 Constant *NewShAmt = ConstantExpr::getSub(
1615 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1616 NumLowbitsLeft);
1617 NewShAmt =
1619 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1620 return BinaryOperator::CreateAShr(A, NewShAmt);
1621 }
1622
1623 // Splatting a bit of constant-index across a value:
1624 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1625 // If the dest type is different, use a cast (adjust use check).
1626 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1627 m_SpecificInt(SrcBitSize - 1))))) {
1628 Type *XTy = X->getType();
1629 unsigned XBitSize = XTy->getScalarSizeInBits();
1630 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1631 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1632 if (XTy == DestTy)
1633 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1634 AshrAmtC);
1635 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1636 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1637 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1638 }
1639 }
1640
1641 if (match(Src, m_VScale())) {
1642 if (Sext.getFunction() &&
1643 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1644 Attribute Attr =
1645 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1646 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1647 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1648 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1649 return replaceInstUsesWith(Sext, VScale);
1650 }
1651 }
1652 }
1653 }
1654
1655 return nullptr;
1656}
1657
1658/// Return a Constant* for the specified floating-point constant if it fits
1659/// in the specified FP type without changing its value.
1660static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1661 bool losesInfo;
1662 APFloat F = CFP->getValueAPF();
1663 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1664 return !losesInfo;
1665}
1666
1668 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1669 return nullptr; // No constant folding of this.
1670 // See if the value can be truncated to half and then reextended.
1671 if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1672 return Type::getHalfTy(CFP->getContext());
1673 // See if the value can be truncated to float and then reextended.
1675 return Type::getFloatTy(CFP->getContext());
1676 if (CFP->getType()->isDoubleTy())
1677 return nullptr; // Won't shrink.
1679 return Type::getDoubleTy(CFP->getContext());
1680 // Don't try to shrink to various long double types.
1681 return nullptr;
1682}
1683
1684// Determine if this is a vector of ConstantFPs and if so, return the minimal
1685// type we can safely truncate all elements to.
1687 auto *CV = dyn_cast<Constant>(V);
1688 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1689 if (!CV || !CVVTy)
1690 return nullptr;
1691
1692 Type *MinType = nullptr;
1693
1694 unsigned NumElts = CVVTy->getNumElements();
1695
1696 // For fixed-width vectors we find the minimal type by looking
1697 // through the constant values of the vector.
1698 for (unsigned i = 0; i != NumElts; ++i) {
1699 if (isa<UndefValue>(CV->getAggregateElement(i)))
1700 continue;
1701
1702 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1703 if (!CFP)
1704 return nullptr;
1705
1706 Type *T = shrinkFPConstant(CFP);
1707 if (!T)
1708 return nullptr;
1709
1710 // If we haven't found a type yet or this type has a larger mantissa than
1711 // our previous type, this is our new minimal type.
1712 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1713 MinType = T;
1714 }
1715
1716 // Make a vector type from the minimal type.
1717 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1718}
1719
1720/// Find the minimum FP type we can safely truncate to.
1722 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1723 return FPExt->getOperand(0)->getType();
1724
1725 // If this value is a constant, return the constant in the smallest FP type
1726 // that can accurately represent it. This allows us to turn
1727 // (float)((double)X+2.0) into x+2.0f.
1728 if (auto *CFP = dyn_cast<ConstantFP>(V))
1729 if (Type *T = shrinkFPConstant(CFP))
1730 return T;
1731
1732 // We can only correctly find a minimum type for a scalable vector when it is
1733 // a splat. For splats of constant values the fpext is wrapped up as a
1734 // ConstantExpr.
1735 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1736 if (FPCExt->getOpcode() == Instruction::FPExt)
1737 return FPCExt->getOperand(0)->getType();
1738
1739 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1740 // vectors
1741 if (Type *T = shrinkFPConstantVector(V))
1742 return T;
1743
1744 return V->getType();
1745}
1746
1747/// Return true if the cast from integer to FP can be proven to be exact for all
1748/// possible inputs (the conversion does not lose any precision).
1750 CastInst::CastOps Opcode = I.getOpcode();
1751 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1752 "Unexpected cast");
1753 Value *Src = I.getOperand(0);
1754 Type *SrcTy = Src->getType();
1755 Type *FPTy = I.getType();
1756 bool IsSigned = Opcode == Instruction::SIToFP;
1757 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1758
1759 // Easy case - if the source integer type has less bits than the FP mantissa,
1760 // then the cast must be exact.
1761 int DestNumSigBits = FPTy->getFPMantissaWidth();
1762 if (SrcSize <= DestNumSigBits)
1763 return true;
1764
1765 // Cast from FP to integer and back to FP is independent of the intermediate
1766 // integer width because of poison on overflow.
1767 Value *F;
1768 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1769 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1770 // potential rounding of negative FP input values.
1771 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1772 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1773 SrcNumSigBits++;
1774
1775 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1776 // significant bits than the destination (and make sure neither type is
1777 // weird -- ppc_fp128).
1778 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1779 SrcNumSigBits <= DestNumSigBits)
1780 return true;
1781 }
1782
1783 // TODO:
1784 // Try harder to find if the source integer type has less significant bits.
1785 // For example, compute number of sign bits.
1786 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1787 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1788 SrcKnown.countMinLeadingZeros() -
1789 SrcKnown.countMinTrailingZeros();
1790 if (SigBits <= DestNumSigBits)
1791 return true;
1792
1793 return false;
1794}
1795
1798 return I;
1799
1800 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1801 // simplify this expression to avoid one or more of the trunc/extend
1802 // operations if we can do so without changing the numerical results.
1803 //
1804 // The exact manner in which the widths of the operands interact to limit
1805 // what we can and cannot do safely varies from operation to operation, and
1806 // is explained below in the various case statements.
1807 Type *Ty = FPT.getType();
1808 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1809 if (BO && BO->hasOneUse()) {
1810 Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1811 Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1812 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1813 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1814 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1815 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1816 unsigned DstWidth = Ty->getFPMantissaWidth();
1817 switch (BO->getOpcode()) {
1818 default: break;
1819 case Instruction::FAdd:
1820 case Instruction::FSub:
1821 // For addition and subtraction, the infinitely precise result can
1822 // essentially be arbitrarily wide; proving that double rounding
1823 // will not occur because the result of OpI is exact (as we will for
1824 // FMul, for example) is hopeless. However, we *can* nonetheless
1825 // frequently know that double rounding cannot occur (or that it is
1826 // innocuous) by taking advantage of the specific structure of
1827 // infinitely-precise results that admit double rounding.
1828 //
1829 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1830 // to represent both sources, we can guarantee that the double
1831 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1832 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1833 // for proof of this fact).
1834 //
1835 // Note: Figueroa does not consider the case where DstFormat !=
1836 // SrcFormat. It's possible (likely even!) that this analysis
1837 // could be tightened for those cases, but they are rare (the main
1838 // case of interest here is (float)((double)float + float)).
1839 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1840 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1841 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1842 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1843 RI->copyFastMathFlags(BO);
1844 return RI;
1845 }
1846 break;
1847 case Instruction::FMul:
1848 // For multiplication, the infinitely precise result has at most
1849 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1850 // that such a value can be exactly represented, then no double
1851 // rounding can possibly occur; we can safely perform the operation
1852 // in the destination format if it can represent both sources.
1853 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1854 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1855 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1857 }
1858 break;
1859 case Instruction::FDiv:
1860 // For division, we use again use the bound from Figueroa's
1861 // dissertation. I am entirely certain that this bound can be
1862 // tightened in the unbalanced operand case by an analysis based on
1863 // the diophantine rational approximation bound, but the well-known
1864 // condition used here is a good conservative first pass.
1865 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1866 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1867 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1868 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1870 }
1871 break;
1872 case Instruction::FRem: {
1873 // Remainder is straightforward. Remainder is always exact, so the
1874 // type of OpI doesn't enter into things at all. We simply evaluate
1875 // in whichever source type is larger, then convert to the
1876 // destination type.
1877 if (SrcWidth == OpWidth)
1878 break;
1879 Value *LHS, *RHS;
1880 if (LHSWidth == SrcWidth) {
1881 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1882 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1883 } else {
1884 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1885 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1886 }
1887
1888 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1889 return CastInst::CreateFPCast(ExactResult, Ty);
1890 }
1891 }
1892 }
1893
1894 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1895 Value *X;
1896 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1897 if (Op && Op->hasOneUse()) {
1898 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1900 if (isa<FPMathOperator>(Op))
1901 Builder.setFastMathFlags(Op->getFastMathFlags());
1902
1903 if (match(Op, m_FNeg(m_Value(X)))) {
1904 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1905
1906 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1907 }
1908
1909 // If we are truncating a select that has an extended operand, we can
1910 // narrow the other operand and do the select as a narrow op.
1911 Value *Cond, *X, *Y;
1912 if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1913 X->getType() == Ty) {
1914 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1915 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1916 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1917 return replaceInstUsesWith(FPT, Sel);
1918 }
1919 if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1920 X->getType() == Ty) {
1921 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1922 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1923 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1924 return replaceInstUsesWith(FPT, Sel);
1925 }
1926 }
1927
1928 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1929 switch (II->getIntrinsicID()) {
1930 default: break;
1931 case Intrinsic::ceil:
1932 case Intrinsic::fabs:
1933 case Intrinsic::floor:
1934 case Intrinsic::nearbyint:
1935 case Intrinsic::rint:
1936 case Intrinsic::round:
1937 case Intrinsic::roundeven:
1938 case Intrinsic::trunc: {
1939 Value *Src = II->getArgOperand(0);
1940 if (!Src->hasOneUse())
1941 break;
1942
1943 // Except for fabs, this transformation requires the input of the unary FP
1944 // operation to be itself an fpext from the type to which we're
1945 // truncating.
1946 if (II->getIntrinsicID() != Intrinsic::fabs) {
1947 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1948 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1949 break;
1950 }
1951
1952 // Do unary FP operation on smaller type.
1953 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1954 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1956 II->getIntrinsicID(), Ty);
1958 II->getOperandBundlesAsDefs(OpBundles);
1959 CallInst *NewCI =
1960 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1961 NewCI->copyFastMathFlags(II);
1962 return NewCI;
1963 }
1964 }
1965 }
1966
1967 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1968 return I;
1969
1970 Value *Src = FPT.getOperand(0);
1971 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1972 auto *FPCast = cast<CastInst>(Src);
1973 if (isKnownExactCastIntToFP(*FPCast, *this))
1974 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1975 }
1976
1977 return nullptr;
1978}
1979
1981 // If the source operand is a cast from integer to FP and known exact, then
1982 // cast the integer operand directly to the destination type.
1983 Type *Ty = FPExt.getType();
1984 Value *Src = FPExt.getOperand(0);
1985 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1986 auto *FPCast = cast<CastInst>(Src);
1987 if (isKnownExactCastIntToFP(*FPCast, *this))
1988 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1989 }
1990
1991 return commonCastTransforms(FPExt);
1992}
1993
1994/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1995/// This is safe if the intermediate type has enough bits in its mantissa to
1996/// accurately represent all values of X. For example, this won't work with
1997/// i64 -> float -> i64.
1999 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2000 return nullptr;
2001
2002 auto *OpI = cast<CastInst>(FI.getOperand(0));
2003 Value *X = OpI->getOperand(0);
2004 Type *XType = X->getType();
2005 Type *DestType = FI.getType();
2006 bool IsOutputSigned = isa<FPToSIInst>(FI);
2007
2008 // Since we can assume the conversion won't overflow, our decision as to
2009 // whether the input will fit in the float should depend on the minimum
2010 // of the input range and output range.
2011
2012 // This means this is also safe for a signed input and unsigned output, since
2013 // a negative input would lead to undefined behavior.
2014 if (!isKnownExactCastIntToFP(*OpI, *this)) {
2015 // The first cast may not round exactly based on the source integer width
2016 // and FP width, but the overflow UB rules can still allow this to fold.
2017 // If the destination type is narrow, that means the intermediate FP value
2018 // must be large enough to hold the source value exactly.
2019 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
2020 int OutputSize = (int)DestType->getScalarSizeInBits();
2021 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2022 return nullptr;
2023 }
2024
2025 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
2026 bool IsInputSigned = isa<SIToFPInst>(OpI);
2027 if (IsInputSigned && IsOutputSigned)
2028 return new SExtInst(X, DestType);
2029 return new ZExtInst(X, DestType);
2030 }
2031 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
2032 return new TruncInst(X, DestType);
2033
2034 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2035 return replaceInstUsesWith(FI, X);
2036}
2037
2039 if (Instruction *I = foldItoFPtoI(FI))
2040 return I;
2041
2042 return commonCastTransforms(FI);
2043}
2044
2046 if (Instruction *I = foldItoFPtoI(FI))
2047 return I;
2048
2049 return commonCastTransforms(FI);
2050}
2051
2053 return commonCastTransforms(CI);
2054}
2055
2057 return commonCastTransforms(CI);
2058}
2059
2061 // If the source integer type is not the intptr_t type for this target, do a
2062 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2063 // cast to be exposed to other transforms.
2064 unsigned AS = CI.getAddressSpace();
2065 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2067 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2068 DL.getIntPtrType(CI.getContext(), AS));
2070 return new IntToPtrInst(P, CI.getType());
2071 }
2072
2074 return I;
2075
2076 return nullptr;
2077}
2078
2079/// Implement the transforms for cast of pointer (bitcast/ptrtoint)
2081 Value *Src = CI.getOperand(0);
2082
2083 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
2084 // If casting the result of a getelementptr instruction with no offset, turn
2085 // this into a cast of the original pointer!
2086 if (GEP->hasAllZeroIndices() &&
2087 // If CI is an addrspacecast and GEP changes the poiner type, merging
2088 // GEP into CI would undo canonicalizing addrspacecast with different
2089 // pointer types, causing infinite loops.
2090 (!isa<AddrSpaceCastInst>(CI) ||
2091 GEP->getType() == GEP->getPointerOperandType())) {
2092 // Changing the cast operand is usually not a good idea but it is safe
2093 // here because the pointer operand is being replaced with another
2094 // pointer operand so the opcode doesn't need to change.
2095 return replaceOperand(CI, 0, GEP->getOperand(0));
2096 }
2097 }
2098
2099 return commonCastTransforms(CI);
2100}
2101
2103 // If the destination integer type is not the intptr_t type for this target,
2104 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2105 // to be exposed to other transforms.
2107 Type *SrcTy = SrcOp->getType();
2108 Type *Ty = CI.getType();
2109 unsigned AS = CI.getPointerAddressSpace();
2110 unsigned TySize = Ty->getScalarSizeInBits();
2111 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2112 if (TySize != PtrSize) {
2113 Type *IntPtrTy =
2114 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2115 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2116 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2117 }
2118
2119 if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
2120 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2121 // While this can increase the number of instructions it doesn't actually
2122 // increase the overall complexity since the arithmetic is just part of
2123 // the GEP otherwise.
2124 if (GEP->hasOneUse() &&
2125 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2126 return replaceInstUsesWith(CI,
2127 Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
2128 /*isSigned=*/false));
2129 }
2130 }
2131
2132 Value *Vec, *Scalar, *Index;
2134 m_Value(Scalar), m_Value(Index)))) &&
2135 Vec->getType() == Ty) {
2136 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2137 // Convert the scalar to int followed by insert to eliminate one cast:
2138 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2139 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2140 return InsertElementInst::Create(Vec, NewCast, Index);
2141 }
2142
2143 return commonPointerCastTransforms(CI);
2144}
2145
2146/// This input value (which is known to have vector type) is being zero extended
2147/// or truncated to the specified vector type. Since the zext/trunc is done
2148/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2149/// endianness will impact which end of the vector that is extended or
2150/// truncated.
2151///
2152/// A vector is always stored with index 0 at the lowest address, which
2153/// corresponds to the most significant bits for a big endian stored integer and
2154/// the least significant bits for little endian. A trunc/zext of an integer
2155/// impacts the big end of the integer. Thus, we need to add/remove elements at
2156/// the front of the vector for big endian targets, and the back of the vector
2157/// for little endian targets.
2158///
2159/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2160///
2161/// The source and destination vector types may have different element types.
2162static Instruction *
2164 InstCombinerImpl &IC) {
2165 // We can only do this optimization if the output is a multiple of the input
2166 // element size, or the input is a multiple of the output element size.
2167 // Convert the input type to have the same element type as the output.
2168 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2169
2170 if (SrcTy->getElementType() != DestTy->getElementType()) {
2171 // The input types don't need to be identical, but for now they must be the
2172 // same size. There is no specific reason we couldn't handle things like
2173 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2174 // there yet.
2175 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2176 DestTy->getElementType()->getPrimitiveSizeInBits())
2177 return nullptr;
2178
2179 SrcTy =
2180 FixedVectorType::get(DestTy->getElementType(),
2181 cast<FixedVectorType>(SrcTy)->getNumElements());
2182 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2183 }
2184
2185 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2186 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2187 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2188
2189 assert(SrcElts != DestElts && "Element counts should be different.");
2190
2191 // Now that the element types match, get the shuffle mask and RHS of the
2192 // shuffle to use, which depends on whether we're increasing or decreasing the
2193 // size of the input.
2194 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2195 ArrayRef<int> ShuffleMask;
2196 Value *V2;
2197
2198 if (SrcElts > DestElts) {
2199 // If we're shrinking the number of elements (rewriting an integer
2200 // truncate), just shuffle in the elements corresponding to the least
2201 // significant bits from the input and use poison as the second shuffle
2202 // input.
2203 V2 = PoisonValue::get(SrcTy);
2204 // Make sure the shuffle mask selects the "least significant bits" by
2205 // keeping elements from back of the src vector for big endian, and from the
2206 // front for little endian.
2207 ShuffleMask = ShuffleMaskStorage;
2208 if (IsBigEndian)
2209 ShuffleMask = ShuffleMask.take_back(DestElts);
2210 else
2211 ShuffleMask = ShuffleMask.take_front(DestElts);
2212 } else {
2213 // If we're increasing the number of elements (rewriting an integer zext),
2214 // shuffle in all of the elements from InVal. Fill the rest of the result
2215 // elements with zeros from a constant zero.
2216 V2 = Constant::getNullValue(SrcTy);
2217 // Use first elt from V2 when indicating zero in the shuffle mask.
2218 uint32_t NullElt = SrcElts;
2219 // Extend with null values in the "most significant bits" by adding elements
2220 // in front of the src vector for big endian, and at the back for little
2221 // endian.
2222 unsigned DeltaElts = DestElts - SrcElts;
2223 if (IsBigEndian)
2224 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2225 else
2226 ShuffleMaskStorage.append(DeltaElts, NullElt);
2227 ShuffleMask = ShuffleMaskStorage;
2228 }
2229
2230 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2231}
2232
2233static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2234 return Value % Ty->getPrimitiveSizeInBits() == 0;
2235}
2236
2237static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2238 return Value / Ty->getPrimitiveSizeInBits();
2239}
2240
2241/// V is a value which is inserted into a vector of VecEltTy.
2242/// Look through the value to see if we can decompose it into
2243/// insertions into the vector. See the example in the comment for
2244/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2245/// The type of V is always a non-zero multiple of VecEltTy's size.
2246/// Shift is the number of bits between the lsb of V and the lsb of
2247/// the vector.
2248///
2249/// This returns false if the pattern can't be matched or true if it can,
2250/// filling in Elements with the elements found here.
2251static bool collectInsertionElements(Value *V, unsigned Shift,
2252 SmallVectorImpl<Value *> &Elements,
2253 Type *VecEltTy, bool isBigEndian) {
2254 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2255 "Shift should be a multiple of the element type size");
2256
2257 // Undef values never contribute useful bits to the result.
2258 if (isa<UndefValue>(V)) return true;
2259
2260 // If we got down to a value of the right type, we win, try inserting into the
2261 // right element.
2262 if (V->getType() == VecEltTy) {
2263 // Inserting null doesn't actually insert any elements.
2264 if (Constant *C = dyn_cast<Constant>(V))
2265 if (C->isNullValue())
2266 return true;
2267
2268 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2269 if (isBigEndian)
2270 ElementIndex = Elements.size() - ElementIndex - 1;
2271
2272 // Fail if multiple elements are inserted into this slot.
2273 if (Elements[ElementIndex])
2274 return false;
2275
2276 Elements[ElementIndex] = V;
2277 return true;
2278 }
2279
2280 if (Constant *C = dyn_cast<Constant>(V)) {
2281 // Figure out the # elements this provides, and bitcast it or slice it up
2282 // as required.
2283 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2284 VecEltTy);
2285 // If the constant is the size of a vector element, we just need to bitcast
2286 // it to the right type so it gets properly inserted.
2287 if (NumElts == 1)
2289 Shift, Elements, VecEltTy, isBigEndian);
2290
2291 // Okay, this is a constant that covers multiple elements. Slice it up into
2292 // pieces and insert each element-sized piece into the vector.
2293 if (!isa<IntegerType>(C->getType()))
2294 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2295 C->getType()->getPrimitiveSizeInBits()));
2296 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2297 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2298
2299 for (unsigned i = 0; i != NumElts; ++i) {
2300 unsigned ShiftI = Shift+i*ElementSize;
2301 Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2302 ShiftI));
2303 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2304 if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2305 isBigEndian))
2306 return false;
2307 }
2308 return true;
2309 }
2310
2311 if (!V->hasOneUse()) return false;
2312
2313 Instruction *I = dyn_cast<Instruction>(V);
2314 if (!I) return false;
2315 switch (I->getOpcode()) {
2316 default: return false; // Unhandled case.
2317 case Instruction::BitCast:
2318 if (I->getOperand(0)->getType()->isVectorTy())
2319 return false;
2320 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2321 isBigEndian);
2322 case Instruction::ZExt:
2324 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2325 VecEltTy))
2326 return false;
2327 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2328 isBigEndian);
2329 case Instruction::Or:
2330 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2331 isBigEndian) &&
2332 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2333 isBigEndian);
2334 case Instruction::Shl: {
2335 // Must be shifting by a constant that is a multiple of the element size.
2336 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2337 if (!CI) return false;
2338 Shift += CI->getZExtValue();
2339 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2340 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2341 isBigEndian);
2342 }
2343
2344 }
2345}
2346
2347
2348/// If the input is an 'or' instruction, we may be doing shifts and ors to
2349/// assemble the elements of the vector manually.
2350/// Try to rip the code out and replace it with insertelements. This is to
2351/// optimize code like this:
2352///
2353/// %tmp37 = bitcast float %inc to i32
2354/// %tmp38 = zext i32 %tmp37 to i64
2355/// %tmp31 = bitcast float %inc5 to i32
2356/// %tmp32 = zext i32 %tmp31 to i64
2357/// %tmp33 = shl i64 %tmp32, 32
2358/// %ins35 = or i64 %tmp33, %tmp38
2359/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2360///
2361/// Into two insertelements that do "buildvector{%inc, %inc5}".
2363 InstCombinerImpl &IC) {
2364 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2365 Value *IntInput = CI.getOperand(0);
2366
2367 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2368 if (!collectInsertionElements(IntInput, 0, Elements,
2369 DestVecTy->getElementType(),
2370 IC.getDataLayout().isBigEndian()))
2371 return nullptr;
2372
2373 // If we succeeded, we know that all of the element are specified by Elements
2374 // or are zero if Elements has a null entry. Recast this as a set of
2375 // insertions.
2376 Value *Result = Constant::getNullValue(CI.getType());
2377 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2378 if (!Elements[i]) continue; // Unset element.
2379
2380 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2381 IC.Builder.getInt32(i));
2382 }
2383
2384 return Result;
2385}
2386
2387/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2388/// vector followed by extract element. The backend tends to handle bitcasts of
2389/// vectors better than bitcasts of scalars because vector registers are
2390/// usually not type-specific like scalar integer or scalar floating-point.
2392 InstCombinerImpl &IC) {
2393 Value *VecOp, *Index;
2394 if (!match(BitCast.getOperand(0),
2396 return nullptr;
2397
2398 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2399 // type to extract from.
2400 Type *DestType = BitCast.getType();
2401 VectorType *VecType = cast<VectorType>(VecOp->getType());
2402 if (VectorType::isValidElementType(DestType)) {
2403 auto *NewVecType = VectorType::get(DestType, VecType);
2404 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2405 return ExtractElementInst::Create(NewBC, Index);
2406 }
2407
2408 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2409 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2410 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2411 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2412 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2413
2414 return nullptr;
2415}
2416
2417/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2419 InstCombiner::BuilderTy &Builder) {
2420 Type *DestTy = BitCast.getType();
2421 BinaryOperator *BO;
2422
2423 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2424 !BO->isBitwiseLogicOp())
2425 return nullptr;
2426
2427 // FIXME: This transform is restricted to vector types to avoid backend
2428 // problems caused by creating potentially illegal operations. If a fix-up is
2429 // added to handle that situation, we can remove this check.
2430 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2431 return nullptr;
2432
2433 if (DestTy->isFPOrFPVectorTy()) {
2434 Value *X, *Y;
2435 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2436 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2438 if (X->getType()->isFPOrFPVectorTy() &&
2439 Y->getType()->isIntOrIntVectorTy()) {
2440 Value *CastedOp =
2441 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2442 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2443 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2444 }
2445 if (X->getType()->isIntOrIntVectorTy() &&
2446 Y->getType()->isFPOrFPVectorTy()) {
2447 Value *CastedOp =
2448 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2449 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2450 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2451 }
2452 }
2453 return nullptr;
2454 }
2455
2456 if (!DestTy->isIntOrIntVectorTy())
2457 return nullptr;
2458
2459 Value *X;
2460 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2461 X->getType() == DestTy && !isa<Constant>(X)) {
2462 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2463 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2464 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2465 }
2466
2467 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2468 X->getType() == DestTy && !isa<Constant>(X)) {
2469 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2470 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2471 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2472 }
2473
2474 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2475 // constant. This eases recognition of special constants for later ops.
2476 // Example:
2477 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2478 Constant *C;
2479 if (match(BO->getOperand(1), m_Constant(C))) {
2480 // bitcast (logic X, C) --> logic (bitcast X, C')
2481 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2482 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2483 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2484 }
2485
2486 return nullptr;
2487}
2488
2489/// Change the type of a select if we can eliminate a bitcast.
2491 InstCombiner::BuilderTy &Builder) {
2492 Value *Cond, *TVal, *FVal;
2493 if (!match(BitCast.getOperand(0),
2494 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2495 return nullptr;
2496
2497 // A vector select must maintain the same number of elements in its operands.
2498 Type *CondTy = Cond->getType();
2499 Type *DestTy = BitCast.getType();
2500 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2501 if (!DestTy->isVectorTy() ||
2502 CondVTy->getElementCount() !=
2503 cast<VectorType>(DestTy)->getElementCount())
2504 return nullptr;
2505
2506 // FIXME: This transform is restricted from changing the select between
2507 // scalars and vectors to avoid backend problems caused by creating
2508 // potentially illegal operations. If a fix-up is added to handle that
2509 // situation, we can remove this check.
2510 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2511 return nullptr;
2512
2513 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2514 Value *X;
2515 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2516 !isa<Constant>(X)) {
2517 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2518 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2519 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2520 }
2521
2522 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2523 !isa<Constant>(X)) {
2524 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2525 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2526 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2527 }
2528
2529 return nullptr;
2530}
2531
2532/// Check if all users of CI are StoreInsts.
2533static bool hasStoreUsersOnly(CastInst &CI) {
2534 for (User *U : CI.users()) {
2535 if (!isa<StoreInst>(U))
2536 return false;
2537 }
2538 return true;
2539}
2540
2541/// This function handles following case
2542///
2543/// A -> B cast
2544/// PHI
2545/// B -> A cast
2546///
2547/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2548/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2549Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2550 PHINode *PN) {
2551 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2552 if (hasStoreUsersOnly(CI))
2553 return nullptr;
2554
2555 Value *Src = CI.getOperand(0);
2556 Type *SrcTy = Src->getType(); // Type B
2557 Type *DestTy = CI.getType(); // Type A
2558
2559 SmallVector<PHINode *, 4> PhiWorklist;
2560 SmallSetVector<PHINode *, 4> OldPhiNodes;
2561
2562 // Find all of the A->B casts and PHI nodes.
2563 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2564 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2565 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2566 PhiWorklist.push_back(PN);
2567 OldPhiNodes.insert(PN);
2568 while (!PhiWorklist.empty()) {
2569 auto *OldPN = PhiWorklist.pop_back_val();
2570 for (Value *IncValue : OldPN->incoming_values()) {
2571 if (isa<Constant>(IncValue))
2572 continue;
2573
2574 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2575 // If there is a sequence of one or more load instructions, each loaded
2576 // value is used as address of later load instruction, bitcast is
2577 // necessary to change the value type, don't optimize it. For
2578 // simplicity we give up if the load address comes from another load.
2579 Value *Addr = LI->getOperand(0);
2580 if (Addr == &CI || isa<LoadInst>(Addr))
2581 return nullptr;
2582 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2583 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2584 // TODO: Remove this check when bitcast between vector and x86_amx
2585 // is replaced with a specific intrinsic.
2586 if (DestTy->isX86_AMXTy())
2587 return nullptr;
2588 if (LI->hasOneUse() && LI->isSimple())
2589 continue;
2590 // If a LoadInst has more than one use, changing the type of loaded
2591 // value may create another bitcast.
2592 return nullptr;
2593 }
2594
2595 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2596 if (OldPhiNodes.insert(PNode))
2597 PhiWorklist.push_back(PNode);
2598 continue;
2599 }
2600
2601 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2602 // We can't handle other instructions.
2603 if (!BCI)
2604 return nullptr;
2605
2606 // Verify it's a A->B cast.
2607 Type *TyA = BCI->getOperand(0)->getType();
2608 Type *TyB = BCI->getType();
2609 if (TyA != DestTy || TyB != SrcTy)
2610 return nullptr;
2611 }
2612 }
2613
2614 // Check that each user of each old PHI node is something that we can
2615 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2616 for (auto *OldPN : OldPhiNodes) {
2617 for (User *V : OldPN->users()) {
2618 if (auto *SI = dyn_cast<StoreInst>(V)) {
2619 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2620 return nullptr;
2621 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2622 // Verify it's a B->A cast.
2623 Type *TyB = BCI->getOperand(0)->getType();
2624 Type *TyA = BCI->getType();
2625 if (TyA != DestTy || TyB != SrcTy)
2626 return nullptr;
2627 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2628 // As long as the user is another old PHI node, then even if we don't
2629 // rewrite it, the PHI web we're considering won't have any users
2630 // outside itself, so it'll be dead.
2631 if (!OldPhiNodes.contains(PHI))
2632 return nullptr;
2633 } else {
2634 return nullptr;
2635 }
2636 }
2637 }
2638
2639 // For each old PHI node, create a corresponding new PHI node with a type A.
2641 for (auto *OldPN : OldPhiNodes) {
2642 Builder.SetInsertPoint(OldPN);
2643 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2644 NewPNodes[OldPN] = NewPN;
2645 }
2646
2647 // Fill in the operands of new PHI nodes.
2648 for (auto *OldPN : OldPhiNodes) {
2649 PHINode *NewPN = NewPNodes[OldPN];
2650 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2651 Value *V = OldPN->getOperand(j);
2652 Value *NewV = nullptr;
2653 if (auto *C = dyn_cast<Constant>(V)) {
2654 NewV = ConstantExpr::getBitCast(C, DestTy);
2655 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2656 // Explicitly perform load combine to make sure no opposing transform
2657 // can remove the bitcast in the meantime and trigger an infinite loop.
2659 NewV = combineLoadToNewType(*LI, DestTy);
2660 // Remove the old load and its use in the old phi, which itself becomes
2661 // dead once the whole transform finishes.
2664 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2665 NewV = BCI->getOperand(0);
2666 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2667 NewV = NewPNodes[PrevPN];
2668 }
2669 assert(NewV);
2670 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2671 }
2672 }
2673
2674 // Traverse all accumulated PHI nodes and process its users,
2675 // which are Stores and BitcCasts. Without this processing
2676 // NewPHI nodes could be replicated and could lead to extra
2677 // moves generated after DeSSA.
2678 // If there is a store with type B, change it to type A.
2679
2680
2681 // Replace users of BitCast B->A with NewPHI. These will help
2682 // later to get rid off a closure formed by OldPHI nodes.
2683 Instruction *RetVal = nullptr;
2684 for (auto *OldPN : OldPhiNodes) {
2685 PHINode *NewPN = NewPNodes[OldPN];
2686 for (User *V : make_early_inc_range(OldPN->users())) {
2687 if (auto *SI = dyn_cast<StoreInst>(V)) {
2688 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2690 auto *NewBC =
2691 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2692 SI->setOperand(0, NewBC);
2693 Worklist.push(SI);
2694 assert(hasStoreUsersOnly(*NewBC));
2695 }
2696 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2697 Type *TyB = BCI->getOperand(0)->getType();
2698 Type *TyA = BCI->getType();
2699 assert(TyA == DestTy && TyB == SrcTy);
2700 (void) TyA;
2701 (void) TyB;
2702 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2703 if (BCI == &CI)
2704 RetVal = I;
2705 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2706 assert(OldPhiNodes.contains(PHI));
2707 (void) PHI;
2708 } else {
2709 llvm_unreachable("all uses should be handled");
2710 }
2711 }
2712 }
2713
2714 return RetVal;
2715}
2716
2718 const DataLayout &DL) {
2719 Value *Src = CI.getOperand(0);
2720 PointerType *SrcPTy = cast<PointerType>(Src->getType());
2721 PointerType *DstPTy = cast<PointerType>(CI.getType());
2722
2723 // Bitcasts involving opaque pointers cannot be converted into a GEP.
2724 if (SrcPTy->isOpaque() || DstPTy->isOpaque())
2725 return nullptr;
2726
2727 Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
2728 Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
2729
2730 // When the type pointed to is not sized the cast cannot be
2731 // turned into a gep.
2732 if (!SrcElTy->isSized())
2733 return nullptr;
2734
2735 // If the source and destination are pointers, and this cast is equivalent
2736 // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2737 // This can enhance SROA and other transforms that want type-safe pointers.
2738 unsigned NumZeros = 0;
2739 while (SrcElTy && SrcElTy != DstElTy) {
2740 SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
2741 ++NumZeros;
2742 }
2743
2744 // If we found a path from the src to dest, create the getelementptr now.
2745 if (SrcElTy == DstElTy) {
2746 SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
2748 SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
2749
2750 // If the source pointer is dereferenceable, then assume it points to an
2751 // allocated object and apply "inbounds" to the GEP.
2752 bool CanBeNull, CanBeFreed;
2753 if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
2754 // In a non-default address space (not 0), a null pointer can not be
2755 // assumed inbounds, so ignore that case (dereferenceable_or_null).
2756 // The reason is that 'null' is not treated differently in these address
2757 // spaces, and we consequently ignore the 'gep inbounds' special case
2758 // for 'null' which allows 'inbounds' on 'null' if the indices are
2759 // zeros.
2760 if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
2761 GEP->setIsInBounds();
2762 }
2763 return GEP;
2764 }
2765 return nullptr;
2766}
2767
2769 // If the operands are integer typed then apply the integer transforms,
2770 // otherwise just apply the common ones.
2771 Value *Src = CI.getOperand(0);
2772 Type *SrcTy = Src->getType();
2773 Type *DestTy = CI.getType();
2774
2775 // Get rid of casts from one type to the same type. These are useless and can
2776 // be replaced by the operand.
2777 if (DestTy == Src->getType())
2778 return replaceInstUsesWith(CI, Src);
2779
2780 if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2781 // If we are casting a alloca to a pointer to a type of the same
2782 // size, rewrite the allocation instruction to allocate the "right" type.
2783 // There is no need to modify malloc calls because it is their bitcast that
2784 // needs to be cleaned up.
2785 if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2786 if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2787 return V;
2788
2790 return I;
2791 }
2792
2793 if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2794 // Beware: messing with this target-specific oddity may cause trouble.
2795 if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2796 Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2797 return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2799 }
2800
2801 if (isa<IntegerType>(SrcTy)) {
2802 // If this is a cast from an integer to vector, check to see if the input
2803 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2804 // the casts with a shuffle and (potentially) a bitcast.
2805 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2806 CastInst *SrcCast = cast<CastInst>(Src);
2807 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2808 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2810 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2811 return I;
2812 }
2813
2814 // If the input is an 'or' instruction, we may be doing shifts and ors to
2815 // assemble the elements of the vector manually. Try to rip the code out
2816 // and replace it with insertelements.
2817 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2818 return replaceInstUsesWith(CI, V);
2819 }
2820 }
2821
2822 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2823 if (SrcVTy->getNumElements() == 1) {
2824 // If our destination is not a vector, then make this a straight
2825 // scalar-scalar cast.
2826 if (!DestTy->isVectorTy()) {
2827 Value *Elem =
2830 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2831 }
2832
2833 // Otherwise, see if our source is an insert. If so, then use the scalar
2834 // component directly:
2835 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2836 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2837 return new BitCastInst(InsElt->getOperand(1), DestTy);
2838 }
2839
2840 // Convert an artificial vector insert into more analyzable bitwise logic.
2841 unsigned BitWidth = DestTy->getScalarSizeInBits();
2842 Value *X, *Y;
2843 uint64_t IndexC;
2845 m_Value(Y), m_ConstantInt(IndexC)))) &&
2846 DestTy->isIntegerTy() && X->getType() == DestTy &&
2847 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2848 // Adjust for big endian - the LSBs are at the high index.
2849 if (DL.isBigEndian())
2850 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2851
2852 // We only handle (endian-normalized) insert to index 0. Any other insert
2853 // would require a left-shift, so that is an extra instruction.
2854 if (IndexC == 0) {
2855 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2856 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2857 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2858 Value *AndX = Builder.CreateAnd(X, MaskC);
2859 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2860 return BinaryOperator::CreateOr(AndX, ZextY);
2861 }
2862 }
2863 }
2864
2865 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2866 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2867 // a bitcast to a vector with the same # elts.
2868 Value *ShufOp0 = Shuf->getOperand(0);
2869 Value *ShufOp1 = Shuf->getOperand(1);
2870 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2871 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2872 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2873 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2874 ShufElts == SrcVecElts) {
2875 BitCastInst *Tmp;
2876 // If either of the operands is a cast from CI.getType(), then
2877 // evaluating the shuffle in the casted destination's type will allow
2878 // us to eliminate at least one cast.
2879 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2880 Tmp->getOperand(0)->getType() == DestTy) ||
2881 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2882 Tmp->getOperand(0)->getType() == DestTy)) {
2883 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2884 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2885 // Return a new shuffle vector. Use the same element ID's, as we
2886 // know the vector types match #elts.
2887 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2888 }
2889 }
2890
2891 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2892 // as a byte/bit swap:
2893 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2894 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2895 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2896 Shuf->hasOneUse() && Shuf->isReverse()) {
2897 unsigned IntrinsicNum = 0;
2898 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2899 SrcTy->getScalarSizeInBits() == 8) {
2900 IntrinsicNum = Intrinsic::bswap;
2901 } else if (SrcTy->getScalarSizeInBits() == 1) {
2902 IntrinsicNum = Intrinsic::bitreverse;
2903 }
2904 if (IntrinsicNum != 0) {
2905 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2906 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2907 Function *BswapOrBitreverse =
2908 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2909 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2910 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2911 }
2912 }
2913 }
2914
2915 // Handle the A->B->A cast, and there is an intervening PHI node.
2916 if (PHINode *PN = dyn_cast<PHINode>(Src))
2917 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2918 return I;
2919
2920 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2921 return I;
2922
2924 return I;
2925
2927 return I;
2928
2929 if (SrcTy->isPointerTy())
2930 return commonPointerCastTransforms(CI);
2931 return commonCastTransforms(CI);
2932}
2933
2935 // If the destination pointer element type is not the same as the source's
2936 // first do a bitcast to the destination type, and then the addrspacecast.
2937 // This allows the cast to be exposed to other transforms.
2938 Value *Src = CI.getOperand(0);
2939 PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
2940 PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
2941
2942 if (!SrcTy->hasSameElementTypeAs(DestTy)) {
2943 Type *MidTy =
2945 // Handle vectors of pointers.
2946 if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2947 MidTy = VectorType::get(MidTy, VT->getElementCount());
2948
2949 Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
2950 return new AddrSpaceCastInst(NewBitCast, CI.getType());
2951 }
2952
2953 return commonPointerCastTransforms(CI);
2954}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Instruction * convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder, const DataLayout &DL)
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static Type * shrinkFPConstant(ConstantFP *CFP)
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Type * shrinkFPConstantVector(Value *V)
static Value * decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset)
Analyze 'Val', seeing if it is a simple linear expression.
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1494
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:366
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:125
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:140
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:105
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:96
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:226
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:233
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:380
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:271
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:266
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:680
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:675
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:682
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:747
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:744
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2629
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2118
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2066
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2104
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2655
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2662
static Constant * getOr(Constant *C1, Constant *C2)
Definition: Constants.cpp:2647
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2228
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2090
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:256
const APFloat & getValueAPF() const
Definition: Constants.h:297
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:777
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:284
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:406
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:861
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:848
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:468
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:704
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:670
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
This instruction compares its operands according to the predicate given to the constructor.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1920
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:97
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2361
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1696
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1990
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1934
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1577
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:975
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1136
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1360
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:297
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1924
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2286
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1672
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2016
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1339
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1398
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1250
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2085
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1379
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1442
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1284
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI)
If we find a cast of an allocation instruction, try to eliminate the cast by moving the type informat...
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:372
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:407
TargetLibraryInfo & TLI
Definition: InstCombiner.h:71
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:418
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:62
const DataLayout & DL
Definition: InstCombiner.h:73
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:482
AssumptionCache & AC
Definition: InstCombiner.h:70
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:442
DominatorTree & DT
Definition: InstCombiner.h:72
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:461
BuilderTy & Builder
Definition: InstCombiner.h:58
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:477
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:219
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:331
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:75
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:99
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Class to represent pointers.
Definition: DerivedTypes.h:632
bool hasSameElementTypeAs(PointerType *Other)
Return true if both pointer types have the same element type.
Definition: DerivedTypes.h:696
bool isOpaque() const
Definition: DerivedTypes.h:673
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
Definition: DerivedTypes.h:666
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1758
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:237
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:425
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:201
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:304
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:264
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:219
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
'undef' values are things that do not have specified contents.
Definition: Constants.h:1371
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1739
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:695
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:688
This class represents zero extension of integer types.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:790
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
VScaleVal_match m_VScale()
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:862
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:598
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:732
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2164
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
@ And
Bitwise or logical AND of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2064
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:244
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:217
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:238
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:303
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:233
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:239
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136