LLVM 19.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
54 break;
55 }
56 case Instruction::Trunc:
57 case Instruction::ZExt:
58 case Instruction::SExt:
59 // If the source type of the cast is the type we're trying for then we can
60 // just return the source. There's no need to insert it because it is not
61 // new.
62 if (I->getOperand(0)->getType() == Ty)
63 return I->getOperand(0);
64
65 // Otherwise, must be the same type of cast, so just reinsert a new one.
66 // This also handles the case of zext(trunc(x)) -> zext(x).
67 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
68 Opc == Instruction::SExt);
69 break;
70 case Instruction::Select: {
71 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
72 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
73 Res = SelectInst::Create(I->getOperand(0), True, False);
74 break;
75 }
76 case Instruction::PHI: {
77 PHINode *OPN = cast<PHINode>(I);
79 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
80 Value *V =
82 NPN->addIncoming(V, OPN->getIncomingBlock(i));
83 }
84 Res = NPN;
85 break;
86 }
87 case Instruction::FPToUI:
88 case Instruction::FPToSI:
89 Res = CastInst::Create(
90 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
91 break;
92 case Instruction::Call:
93 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
94 switch (II->getIntrinsicID()) {
95 default:
96 llvm_unreachable("Unsupported call!");
97 case Intrinsic::vscale: {
98 Function *Fn =
99 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
100 Res = CallInst::Create(Fn->getFunctionType(), Fn);
101 break;
102 }
103 }
104 }
105 break;
106 case Instruction::ShuffleVector: {
107 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
108 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
109 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
110 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
111 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
112 Res = new ShuffleVectorInst(Op0, Op1,
113 cast<ShuffleVectorInst>(I)->getShuffleMask());
114 break;
115 }
116 default:
117 // TODO: Can handle more cases here.
118 llvm_unreachable("Unreachable!");
119 }
120
121 Res->takeName(I);
122 return InsertNewInstWith(Res, I->getIterator());
123}
124
126InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
127 const CastInst *CI2) {
128 Type *SrcTy = CI1->getSrcTy();
129 Type *MidTy = CI1->getDestTy();
130 Type *DstTy = CI2->getDestTy();
131
132 Instruction::CastOps firstOp = CI1->getOpcode();
133 Instruction::CastOps secondOp = CI2->getOpcode();
134 Type *SrcIntPtrTy =
135 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
136 Type *MidIntPtrTy =
137 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
138 Type *DstIntPtrTy =
139 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
140 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
141 DstTy, SrcIntPtrTy, MidIntPtrTy,
142 DstIntPtrTy);
143
144 // We don't want to form an inttoptr or ptrtoint that converts to an integer
145 // type that differs from the pointer size.
146 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
147 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
148 Res = 0;
149
150 return Instruction::CastOps(Res);
151}
152
153/// Implement the transforms common to all CastInst visitors.
155 Value *Src = CI.getOperand(0);
156 Type *Ty = CI.getType();
157
158 if (auto *SrcC = dyn_cast<Constant>(Src))
159 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
160 return replaceInstUsesWith(CI, Res);
161
162 // Try to eliminate a cast of a cast.
163 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
164 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
165 // The first cast (CSrc) is eliminable so we need to fix up or replace
166 // the second cast (CI). CSrc will then have a good chance of being dead.
167 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
168 // Point debug users of the dying cast to the new one.
169 if (CSrc->hasOneUse())
170 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
171 return Res;
172 }
173 }
174
175 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
176 // We are casting a select. Try to fold the cast into the select if the
177 // select does not have a compare instruction with matching operand types
178 // or the select is likely better done in a narrow type.
179 // Creating a select with operands that are different sizes than its
180 // condition may inhibit other folds and lead to worse codegen.
181 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
182 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
183 (CI.getOpcode() == Instruction::Trunc &&
184 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
185
186 // If it's a bitcast involving vectors, make sure it has the same number
187 // of elements on both sides.
188 if (CI.getOpcode() != Instruction::BitCast ||
190 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
191 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
192 return NV;
193 }
194 }
195 }
196 }
197
198 // If we are casting a PHI, then fold the cast into the PHI.
199 if (auto *PN = dyn_cast<PHINode>(Src)) {
200 // Don't do this if it would create a PHI node with an illegal type from a
201 // legal type.
202 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
203 shouldChangeType(CI.getSrcTy(), CI.getType()))
204 if (Instruction *NV = foldOpIntoPhi(CI, PN))
205 return NV;
206 }
207
208 // Canonicalize a unary shuffle after the cast if neither operation changes
209 // the size or element size of the input vector.
210 // TODO: We could allow size-changing ops if that doesn't harm codegen.
211 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
212 Value *X;
213 ArrayRef<int> Mask;
214 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
215 // TODO: Allow scalable vectors?
216 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
217 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
218 if (SrcTy && DestTy &&
219 SrcTy->getNumElements() == DestTy->getNumElements() &&
220 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
221 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
222 return new ShuffleVectorInst(CastX, Mask);
223 }
224 }
225
226 return nullptr;
227}
228
229/// Constants and extensions/truncates from the destination type are always
230/// free to be evaluated in that type. This is a helper for canEvaluate*.
231static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
232 if (isa<Constant>(V))
233 return match(V, m_ImmConstant());
234
235 Value *X;
236 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
237 X->getType() == Ty)
238 return true;
239
240 return false;
241}
242
243/// Filter out values that we can not evaluate in the destination type for free.
244/// This is a helper for canEvaluate*.
245static bool canNotEvaluateInType(Value *V, Type *Ty) {
246 if (!isa<Instruction>(V))
247 return true;
248 // We don't extend or shrink something that has multiple uses -- doing so
249 // would require duplicating the instruction which isn't profitable.
250 if (!V->hasOneUse())
251 return true;
252
253 return false;
254}
255
256/// Return true if we can evaluate the specified expression tree as type Ty
257/// instead of its larger type, and arrive with the same value.
258/// This is used by code that tries to eliminate truncates.
259///
260/// Ty will always be a type smaller than V. We should return true if trunc(V)
261/// can be computed by computing V in the smaller type. If V is an instruction,
262/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
263/// makes sense if x and y can be efficiently truncated.
264///
265/// This function works on both vectors and scalars.
266///
268 Instruction *CxtI) {
269 if (canAlwaysEvaluateInType(V, Ty))
270 return true;
271 if (canNotEvaluateInType(V, Ty))
272 return false;
273
274 auto *I = cast<Instruction>(V);
275 Type *OrigTy = V->getType();
276 switch (I->getOpcode()) {
277 case Instruction::Add:
278 case Instruction::Sub:
279 case Instruction::Mul:
280 case Instruction::And:
281 case Instruction::Or:
282 case Instruction::Xor:
283 // These operators can all arbitrarily be extended or truncated.
284 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
285 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
286
287 case Instruction::UDiv:
288 case Instruction::URem: {
289 // UDiv and URem can be truncated if all the truncated bits are zero.
290 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
292 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
293 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
294 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
295 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
296 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
297 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
298 }
299 break;
300 }
301 case Instruction::Shl: {
302 // If we are truncating the result of this SHL, and if it's a shift of an
303 // inrange amount, we can always perform a SHL in a smaller type.
305 KnownBits AmtKnownBits =
306 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
307 if (AmtKnownBits.getMaxValue().ult(BitWidth))
308 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
309 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
310 break;
311 }
312 case Instruction::LShr: {
313 // If this is a truncate of a logical shr, we can truncate it to a smaller
314 // lshr iff we know that the bits we would otherwise be shifting in are
315 // already zeros.
316 // TODO: It is enough to check that the bits we would be shifting in are
317 // zero - use AmtKnownBits.getMaxValue().
318 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
320 KnownBits AmtKnownBits =
321 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
322 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
323 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
324 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
325 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
326 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
327 }
328 break;
329 }
330 case Instruction::AShr: {
331 // If this is a truncate of an arithmetic shr, we can truncate it to a
332 // smaller ashr iff we know that all the bits from the sign bit of the
333 // original type and the sign bit of the truncate type are similar.
334 // TODO: It is enough to check that the bits we would be shifting in are
335 // similar to sign bit of the truncate type.
336 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
338 KnownBits AmtKnownBits =
339 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
340 unsigned ShiftedBits = OrigBitWidth - BitWidth;
341 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
342 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
343 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
344 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
345 break;
346 }
347 case Instruction::Trunc:
348 // trunc(trunc(x)) -> trunc(x)
349 return true;
350 case Instruction::ZExt:
351 case Instruction::SExt:
352 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
353 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
354 return true;
355 case Instruction::Select: {
356 SelectInst *SI = cast<SelectInst>(I);
357 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
358 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
359 }
360 case Instruction::PHI: {
361 // We can change a phi if we can change all operands. Note that we never
362 // get into trouble with cyclic PHIs here because we only consider
363 // instructions with a single use.
364 PHINode *PN = cast<PHINode>(I);
365 for (Value *IncValue : PN->incoming_values())
366 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
367 return false;
368 return true;
369 }
370 case Instruction::FPToUI:
371 case Instruction::FPToSI: {
372 // If the integer type can hold the max FP value, it is safe to cast
373 // directly to that type. Otherwise, we may create poison via overflow
374 // that did not exist in the original code.
375 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
376 const fltSemantics &Semantics = InputTy->getFltSemantics();
377 uint32_t MinBitWidth =
379 I->getOpcode() == Instruction::FPToSI);
380 return Ty->getScalarSizeInBits() >= MinBitWidth;
381 }
382 case Instruction::ShuffleVector:
383 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
384 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
385 default:
386 // TODO: Can handle more cases here.
387 break;
388 }
389
390 return false;
391}
392
393/// Given a vector that is bitcast to an integer, optionally logically
394/// right-shifted, and truncated, convert it to an extractelement.
395/// Example (big endian):
396/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
397/// --->
398/// extractelement <4 x i32> %X, 1
400 InstCombinerImpl &IC) {
401 Value *TruncOp = Trunc.getOperand(0);
402 Type *DestType = Trunc.getType();
403 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
404 return nullptr;
405
406 Value *VecInput = nullptr;
407 ConstantInt *ShiftVal = nullptr;
408 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
409 m_LShr(m_BitCast(m_Value(VecInput)),
410 m_ConstantInt(ShiftVal)))) ||
411 !isa<VectorType>(VecInput->getType()))
412 return nullptr;
413
414 VectorType *VecType = cast<VectorType>(VecInput->getType());
415 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
416 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
417 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
418
419 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
420 return nullptr;
421
422 // If the element type of the vector doesn't match the result type,
423 // bitcast it to a vector type that we can extract from.
424 unsigned NumVecElts = VecWidth / DestWidth;
425 if (VecType->getElementType() != DestType) {
426 VecType = FixedVectorType::get(DestType, NumVecElts);
427 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
428 }
429
430 unsigned Elt = ShiftAmount / DestWidth;
431 if (IC.getDataLayout().isBigEndian())
432 Elt = NumVecElts - 1 - Elt;
433
434 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
435}
436
437/// Funnel/Rotate left/right may occur in a wider type than necessary because of
438/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
439Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
440 assert((isa<VectorType>(Trunc.getSrcTy()) ||
441 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
442 "Don't narrow to an illegal scalar type");
443
444 // Bail out on strange types. It is possible to handle some of these patterns
445 // even with non-power-of-2 sizes, but it is not a likely scenario.
446 Type *DestTy = Trunc.getType();
447 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
448 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
449 if (!isPowerOf2_32(NarrowWidth))
450 return nullptr;
451
452 // First, find an or'd pair of opposite shifts:
453 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
454 BinaryOperator *Or0, *Or1;
455 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
456 return nullptr;
457
458 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
459 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
460 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
461 Or0->getOpcode() == Or1->getOpcode())
462 return nullptr;
463
464 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
465 if (Or0->getOpcode() == BinaryOperator::LShr) {
466 std::swap(Or0, Or1);
467 std::swap(ShVal0, ShVal1);
468 std::swap(ShAmt0, ShAmt1);
469 }
470 assert(Or0->getOpcode() == BinaryOperator::Shl &&
471 Or1->getOpcode() == BinaryOperator::LShr &&
472 "Illegal or(shift,shift) pair");
473
474 // Match the shift amount operands for a funnel/rotate pattern. This always
475 // matches a subtraction on the R operand.
476 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
477 // The shift amounts may add up to the narrow bit width:
478 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
479 // If this is a funnel shift (different operands are shifted), then the
480 // shift amount can not over-shift (create poison) in the narrow type.
481 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
482 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
483 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
484 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
485 return L;
486
487 // The following patterns currently only work for rotation patterns.
488 // TODO: Add more general funnel-shift compatible patterns.
489 if (ShVal0 != ShVal1)
490 return nullptr;
491
492 // The shift amount may be masked with negation:
493 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
494 Value *X;
495 unsigned Mask = Width - 1;
496 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
498 return X;
499
500 // Same as above, but the shift amount may be extended after masking:
501 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
503 return X;
504
505 return nullptr;
506 };
507
508 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
509 bool IsFshl = true; // Sub on LSHR.
510 if (!ShAmt) {
511 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
512 IsFshl = false; // Sub on SHL.
513 }
514 if (!ShAmt)
515 return nullptr;
516
517 // The right-shifted value must have high zeros in the wide type (for example
518 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
519 // truncated, so those do not matter.
520 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
521 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
522 return nullptr;
523
524 // Adjust the width of ShAmt for narrowed funnel shift operation:
525 // - Zero-extend if ShAmt is narrower than the destination type.
526 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
527 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
528 // zext/trunc(ShAmt)).
529 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
530
531 Value *X, *Y;
532 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
533 if (ShVal0 != ShVal1)
534 Y = Builder.CreateTrunc(ShVal1, DestTy);
535 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
536 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
537 return CallInst::Create(F, {X, Y, NarrowShAmt});
538}
539
540/// Try to narrow the width of math or bitwise logic instructions by pulling a
541/// truncate ahead of binary operators.
542Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
543 Type *SrcTy = Trunc.getSrcTy();
544 Type *DestTy = Trunc.getType();
545 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
546 unsigned DestWidth = DestTy->getScalarSizeInBits();
547
548 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
549 return nullptr;
550
551 BinaryOperator *BinOp;
552 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
553 return nullptr;
554
555 Value *BinOp0 = BinOp->getOperand(0);
556 Value *BinOp1 = BinOp->getOperand(1);
557 switch (BinOp->getOpcode()) {
558 case Instruction::And:
559 case Instruction::Or:
560 case Instruction::Xor:
561 case Instruction::Add:
562 case Instruction::Sub:
563 case Instruction::Mul: {
564 Constant *C;
565 if (match(BinOp0, m_Constant(C))) {
566 // trunc (binop C, X) --> binop (trunc C', X)
567 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
568 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
569 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
570 }
571 if (match(BinOp1, m_Constant(C))) {
572 // trunc (binop X, C) --> binop (trunc X, C')
573 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
574 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
575 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
576 }
577 Value *X;
578 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
579 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
580 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
581 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
582 }
583 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
584 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
585 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
586 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
587 }
588 break;
589 }
590 case Instruction::LShr:
591 case Instruction::AShr: {
592 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
593 Value *A;
594 Constant *C;
595 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
596 unsigned MaxShiftAmt = SrcWidth - DestWidth;
597 // If the shift is small enough, all zero/sign bits created by the shift
598 // are removed by the trunc.
600 APInt(SrcWidth, MaxShiftAmt)))) {
601 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
602 bool IsExact = OldShift->isExact();
603 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
604 /*IsSigned*/ true, DL)) {
605 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
606 Value *Shift =
607 OldShift->getOpcode() == Instruction::AShr
608 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
609 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
610 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
611 }
612 }
613 }
614 break;
615 }
616 default: break;
617 }
618
619 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
620 return NarrowOr;
621
622 return nullptr;
623}
624
625/// Try to narrow the width of a splat shuffle. This could be generalized to any
626/// shuffle with a constant operand, but we limit the transform to avoid
627/// creating a shuffle type that targets may not be able to lower effectively.
629 InstCombiner::BuilderTy &Builder) {
630 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
631 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
632 all_equal(Shuf->getShuffleMask()) &&
633 Shuf->getType() == Shuf->getOperand(0)->getType()) {
634 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
635 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
636 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
637 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
638 }
639
640 return nullptr;
641}
642
643/// Try to narrow the width of an insert element. This could be generalized for
644/// any vector constant, but we limit the transform to insertion into undef to
645/// avoid potential backend problems from unsupported insertion widths. This
646/// could also be extended to handle the case of inserting a scalar constant
647/// into a vector variable.
649 InstCombiner::BuilderTy &Builder) {
650 Instruction::CastOps Opcode = Trunc.getOpcode();
651 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
652 "Unexpected instruction for shrinking");
653
654 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
655 if (!InsElt || !InsElt->hasOneUse())
656 return nullptr;
657
658 Type *DestTy = Trunc.getType();
659 Type *DestScalarTy = DestTy->getScalarType();
660 Value *VecOp = InsElt->getOperand(0);
661 Value *ScalarOp = InsElt->getOperand(1);
662 Value *Index = InsElt->getOperand(2);
663
664 if (match(VecOp, m_Undef())) {
665 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
666 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
667 UndefValue *NarrowUndef = UndefValue::get(DestTy);
668 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
669 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
670 }
671
672 return nullptr;
673}
674
676 if (Instruction *Result = commonCastTransforms(Trunc))
677 return Result;
678
679 Value *Src = Trunc.getOperand(0);
680 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
681 unsigned DestWidth = DestTy->getScalarSizeInBits();
682 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
683
684 // Attempt to truncate the entire input expression tree to the destination
685 // type. Only do this if the dest type is a simple type, don't convert the
686 // expression tree to something weird like i93 unless the source is also
687 // strange.
688 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
689 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
690
691 // If this cast is a truncate, evaluting in a different type always
692 // eliminates the cast, so it is always a win.
694 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
695 " to avoid cast: "
696 << Trunc << '\n');
697 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
698 assert(Res->getType() == DestTy);
699 return replaceInstUsesWith(Trunc, Res);
700 }
701
702 // For integer types, check if we can shorten the entire input expression to
703 // DestWidth * 2, which won't allow removing the truncate, but reducing the
704 // width may enable further optimizations, e.g. allowing for larger
705 // vectorization factors.
706 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
707 if (DestWidth * 2 < SrcWidth) {
708 auto *NewDestTy = DestITy->getExtendedType();
709 if (shouldChangeType(SrcTy, NewDestTy) &&
710 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
712 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
713 " to reduce the width of operand of"
714 << Trunc << '\n');
715 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
716 return new TruncInst(Res, DestTy);
717 }
718 }
719 }
720
721 // Test if the trunc is the user of a select which is part of a
722 // minimum or maximum operation. If so, don't do any more simplification.
723 // Even simplifying demanded bits can break the canonical form of a
724 // min/max.
725 Value *LHS, *RHS;
726 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
727 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
728 return nullptr;
729
730 // See if we can simplify any instructions used by the input whose sole
731 // purpose is to compute bits we don't care about.
733 return &Trunc;
734
735 if (DestWidth == 1) {
736 Value *Zero = Constant::getNullValue(SrcTy);
737
738 Value *X;
739 const APInt *C1;
740 Constant *C2;
741 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
742 m_ImmConstant(C2))))) {
743 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
744 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
745 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
746 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
747 }
748
749 Constant *C;
750 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
751 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
752 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
753 Constant *MaskC = ConstantExpr::getShl(One, C);
754 Value *And = Builder.CreateAnd(X, MaskC);
755 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
756 }
758 m_Deferred(X))))) {
759 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
760 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
761 Constant *MaskC = ConstantExpr::getShl(One, C);
762 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
763 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
764 }
765
766 {
767 const APInt *C;
768 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
769 // trunc (C << X) to i1 --> X == 0, where C is odd
770 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
771 }
772 }
773
774 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
775 Value *X, *Y;
776 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
777 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
778 }
779 }
780
781 Value *A, *B;
782 Constant *C;
783 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
784 unsigned AWidth = A->getType()->getScalarSizeInBits();
785 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
786 auto *OldSh = cast<Instruction>(Src);
787 bool IsExact = OldSh->isExact();
788
789 // If the shift is small enough, all zero bits created by the shift are
790 // removed by the trunc.
792 APInt(SrcWidth, MaxShiftAmt)))) {
793 auto GetNewShAmt = [&](unsigned Width) {
794 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
795 Constant *Cmp =
797 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
798 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
799 DL);
800 };
801
802 // trunc (lshr (sext A), C) --> ashr A, C
803 if (A->getType() == DestTy) {
804 Constant *ShAmt = GetNewShAmt(DestWidth);
805 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
806 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
807 : BinaryOperator::CreateAShr(A, ShAmt);
808 }
809 // The types are mismatched, so create a cast after shifting:
810 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
811 if (Src->hasOneUse()) {
812 Constant *ShAmt = GetNewShAmt(AWidth);
813 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
814 return CastInst::CreateIntegerCast(Shift, DestTy, true);
815 }
816 }
817 // TODO: Mask high bits with 'and'.
818 }
819
820 if (Instruction *I = narrowBinOp(Trunc))
821 return I;
822
824 return I;
825
826 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
827 return I;
828
829 if (Src->hasOneUse() &&
830 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
831 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
832 // dest type is native and cst < dest size.
833 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
834 !match(A, m_Shr(m_Value(), m_Constant()))) {
835 // Skip shifts of shift by constants. It undoes a combine in
836 // FoldShiftByConstant and is the extend in reg pattern.
837 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
838 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
839 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
840 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
841 ConstantExpr::getTrunc(C, DestTy));
842 }
843 }
844 }
845
846 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
847 return I;
848
849 // Whenever an element is extracted from a vector, and then truncated,
850 // canonicalize by converting it to a bitcast followed by an
851 // extractelement.
852 //
853 // Example (little endian):
854 // trunc (extractelement <4 x i64> %X, 0) to i32
855 // --->
856 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
857 Value *VecOp;
858 ConstantInt *Cst;
859 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
860 auto *VecOpTy = cast<VectorType>(VecOp->getType());
861 auto VecElts = VecOpTy->getElementCount();
862
863 // A badly fit destination size would result in an invalid cast.
864 if (SrcWidth % DestWidth == 0) {
865 uint64_t TruncRatio = SrcWidth / DestWidth;
866 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
867 uint64_t VecOpIdx = Cst->getZExtValue();
868 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
869 : VecOpIdx * TruncRatio;
870 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
871 "overflow 32-bits");
872
873 auto *BitCastTo =
874 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
875 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
876 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
877 }
878 }
879
880 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
881 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
882 m_Value(B))))) {
883 unsigned AWidth = A->getType()->getScalarSizeInBits();
884 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
885 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
886 Value *NarrowCtlz =
887 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
888 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
889 }
890 }
891
892 if (match(Src, m_VScale())) {
893 if (Trunc.getFunction() &&
894 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
895 Attribute Attr =
896 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
897 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
898 if (Log2_32(*MaxVScale) < DestWidth) {
899 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
900 return replaceInstUsesWith(Trunc, VScale);
901 }
902 }
903 }
904 }
905
906 bool Changed = false;
907 if (!Trunc.hasNoSignedWrap() &&
908 ComputeMaxSignificantBits(Src, /*Depth=*/0, &Trunc) <= DestWidth) {
909 Trunc.setHasNoSignedWrap(true);
910 Changed = true;
911 }
912 if (!Trunc.hasNoUnsignedWrap() &&
913 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
914 /*Depth=*/0, &Trunc)) {
915 Trunc.setHasNoUnsignedWrap(true);
916 Changed = true;
917 }
918
919 return Changed ? &Trunc : nullptr;
920}
921
922Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
923 ZExtInst &Zext) {
924 // If we are just checking for a icmp eq of a single bit and zext'ing it
925 // to an integer, then shift the bit to the appropriate place and then
926 // cast to integer to avoid the comparison.
927
928 // FIXME: This set of transforms does not check for extra uses and/or creates
929 // an extra instruction (an optional final cast is not included
930 // in the transform comments). We may also want to favor icmp over
931 // shifts in cases of equal instructions because icmp has better
932 // analysis in general (invert the transform).
933
934 const APInt *Op1CV;
935 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
936
937 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
938 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
939 Value *In = Cmp->getOperand(0);
940 Value *Sh = ConstantInt::get(In->getType(),
941 In->getType()->getScalarSizeInBits() - 1);
942 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
943 if (In->getType() != Zext.getType())
944 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
945
946 return replaceInstUsesWith(Zext, In);
947 }
948
949 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
950 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
951 // zext (X != 0) to i32 --> X iff X has only the low bit set.
952 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
953
954 if (Op1CV->isZero() && Cmp->isEquality()) {
955 // Exactly 1 possible 1? But not the high-bit because that is
956 // canonicalized to this form.
957 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
958 APInt KnownZeroMask(~Known.Zero);
959 uint32_t ShAmt = KnownZeroMask.logBase2();
960 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
961 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
962 if (IsExpectShAmt &&
963 (Cmp->getOperand(0)->getType() == Zext.getType() ||
964 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
965 Value *In = Cmp->getOperand(0);
966 if (ShAmt) {
967 // Perform a logical shr by shiftamt.
968 // Insert the shift to put the result in the low bit.
969 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
970 In->getName() + ".lobit");
971 }
972
973 // Toggle the low bit for "X == 0".
974 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
975 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
976
977 if (Zext.getType() == In->getType())
978 return replaceInstUsesWith(Zext, In);
979
980 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
981 return replaceInstUsesWith(Zext, IntCast);
982 }
983 }
984 }
985
986 if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
987 // Test if a bit is clear/set using a shifted-one mask:
988 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
989 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
990 Value *X, *ShAmt;
991 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
992 match(Cmp->getOperand(0),
993 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
994 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
995 X = Builder.CreateNot(X);
996 Value *Lshr = Builder.CreateLShr(X, ShAmt);
997 Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
998 return replaceInstUsesWith(Zext, And1);
999 }
1000 }
1001
1002 return nullptr;
1003}
1004
1005/// Determine if the specified value can be computed in the specified wider type
1006/// and produce the same low bits. If not, return false.
1007///
1008/// If this function returns true, it can also return a non-zero number of bits
1009/// (in BitsToClear) which indicates that the value it computes is correct for
1010/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1011/// out. For example, to promote something like:
1012///
1013/// %B = trunc i64 %A to i32
1014/// %C = lshr i32 %B, 8
1015/// %E = zext i32 %C to i64
1016///
1017/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1018/// set to 8 to indicate that the promoted value needs to have bits 24-31
1019/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1020/// clear the top bits anyway, doing this has no extra cost.
1021///
1022/// This function works on both vectors and scalars.
1023static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1024 InstCombinerImpl &IC, Instruction *CxtI) {
1025 BitsToClear = 0;
1026 if (canAlwaysEvaluateInType(V, Ty))
1027 return true;
1028 if (canNotEvaluateInType(V, Ty))
1029 return false;
1030
1031 auto *I = cast<Instruction>(V);
1032 unsigned Tmp;
1033 switch (I->getOpcode()) {
1034 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1035 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1036 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1037 return true;
1038 case Instruction::And:
1039 case Instruction::Or:
1040 case Instruction::Xor:
1041 case Instruction::Add:
1042 case Instruction::Sub:
1043 case Instruction::Mul:
1044 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1045 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1046 return false;
1047 // These can all be promoted if neither operand has 'bits to clear'.
1048 if (BitsToClear == 0 && Tmp == 0)
1049 return true;
1050
1051 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1052 // other side, BitsToClear is ok.
1053 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1054 // We use MaskedValueIsZero here for generality, but the case we care
1055 // about the most is constant RHS.
1056 unsigned VSize = V->getType()->getScalarSizeInBits();
1057 if (IC.MaskedValueIsZero(I->getOperand(1),
1058 APInt::getHighBitsSet(VSize, BitsToClear),
1059 0, CxtI)) {
1060 // If this is an And instruction and all of the BitsToClear are
1061 // known to be zero we can reset BitsToClear.
1062 if (I->getOpcode() == Instruction::And)
1063 BitsToClear = 0;
1064 return true;
1065 }
1066 }
1067
1068 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1069 return false;
1070
1071 case Instruction::Shl: {
1072 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1073 // upper bits we can reduce BitsToClear by the shift amount.
1074 const APInt *Amt;
1075 if (match(I->getOperand(1), m_APInt(Amt))) {
1076 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1077 return false;
1078 uint64_t ShiftAmt = Amt->getZExtValue();
1079 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1080 return true;
1081 }
1082 return false;
1083 }
1084 case Instruction::LShr: {
1085 // We can promote lshr(x, cst) if we can promote x. This requires the
1086 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1087 const APInt *Amt;
1088 if (match(I->getOperand(1), m_APInt(Amt))) {
1089 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1090 return false;
1091 BitsToClear += Amt->getZExtValue();
1092 if (BitsToClear > V->getType()->getScalarSizeInBits())
1093 BitsToClear = V->getType()->getScalarSizeInBits();
1094 return true;
1095 }
1096 // Cannot promote variable LSHR.
1097 return false;
1098 }
1099 case Instruction::Select:
1100 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1101 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1102 // TODO: If important, we could handle the case when the BitsToClear are
1103 // known zero in the disagreeing side.
1104 Tmp != BitsToClear)
1105 return false;
1106 return true;
1107
1108 case Instruction::PHI: {
1109 // We can change a phi if we can change all operands. Note that we never
1110 // get into trouble with cyclic PHIs here because we only consider
1111 // instructions with a single use.
1112 PHINode *PN = cast<PHINode>(I);
1113 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1114 return false;
1115 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1116 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1117 // TODO: If important, we could handle the case when the BitsToClear
1118 // are known zero in the disagreeing input.
1119 Tmp != BitsToClear)
1120 return false;
1121 return true;
1122 }
1123 case Instruction::Call:
1124 // llvm.vscale() can always be executed in larger type, because the
1125 // value is automatically zero-extended.
1126 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1127 if (II->getIntrinsicID() == Intrinsic::vscale)
1128 return true;
1129 return false;
1130 default:
1131 // TODO: Can handle more cases here.
1132 return false;
1133 }
1134}
1135
1137 // If this zero extend is only used by a truncate, let the truncate be
1138 // eliminated before we try to optimize this zext.
1139 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1140 !isa<Constant>(Zext.getOperand(0)))
1141 return nullptr;
1142
1143 // If one of the common conversion will work, do it.
1144 if (Instruction *Result = commonCastTransforms(Zext))
1145 return Result;
1146
1147 Value *Src = Zext.getOperand(0);
1148 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1149
1150 // zext nneg bool x -> 0
1151 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1153
1154 // Try to extend the entire expression tree to the wide destination type.
1155 unsigned BitsToClear;
1156 if (shouldChangeType(SrcTy, DestTy) &&
1157 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1158 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1159 "Can't clear more bits than in SrcTy");
1160
1161 // Okay, we can transform this! Insert the new expression now.
1162 LLVM_DEBUG(
1163 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1164 " to avoid zero extend: "
1165 << Zext << '\n');
1166 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1167 assert(Res->getType() == DestTy);
1168
1169 // Preserve debug values referring to Src if the zext is its last use.
1170 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1171 if (SrcOp->hasOneUse())
1172 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1173
1174 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1175 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1176
1177 // If the high bits are already filled with zeros, just replace this
1178 // cast with the result.
1179 if (MaskedValueIsZero(Res,
1180 APInt::getHighBitsSet(DestBitSize,
1181 DestBitSize - SrcBitsKept),
1182 0, &Zext))
1183 return replaceInstUsesWith(Zext, Res);
1184
1185 // We need to emit an AND to clear the high bits.
1186 Constant *C = ConstantInt::get(Res->getType(),
1187 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1188 return BinaryOperator::CreateAnd(Res, C);
1189 }
1190
1191 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1192 // types and if the sizes are just right we can convert this into a logical
1193 // 'and' which will be much cheaper than the pair of casts.
1194 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1195 // TODO: Subsume this into EvaluateInDifferentType.
1196
1197 // Get the sizes of the types involved. We know that the intermediate type
1198 // will be smaller than A or C, but don't know the relation between A and C.
1199 Value *A = CSrc->getOperand(0);
1200 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1201 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1202 unsigned DstSize = DestTy->getScalarSizeInBits();
1203 // If we're actually extending zero bits, then if
1204 // SrcSize < DstSize: zext(a & mask)
1205 // SrcSize == DstSize: a & mask
1206 // SrcSize > DstSize: trunc(a) & mask
1207 if (SrcSize < DstSize) {
1208 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1209 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1210 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1211 return new ZExtInst(And, DestTy);
1212 }
1213
1214 if (SrcSize == DstSize) {
1215 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1216 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1217 AndValue));
1218 }
1219 if (SrcSize > DstSize) {
1220 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1221 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1222 return BinaryOperator::CreateAnd(Trunc,
1223 ConstantInt::get(Trunc->getType(),
1224 AndValue));
1225 }
1226 }
1227
1228 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1229 return transformZExtICmp(Cmp, Zext);
1230
1231 // zext(trunc(X) & C) -> (X & zext(C)).
1232 Constant *C;
1233 Value *X;
1234 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1235 X->getType() == DestTy)
1236 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1237
1238 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1239 Value *And;
1240 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1242 X->getType() == DestTy) {
1243 Value *ZC = Builder.CreateZExt(C, DestTy);
1244 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1245 }
1246
1247 // If we are truncating, masking, and then zexting back to the original type,
1248 // that's just a mask. This is not handled by canEvaluateZextd if the
1249 // intermediate values have extra uses. This could be generalized further for
1250 // a non-constant mask operand.
1251 // zext (and (trunc X), C) --> and X, (zext C)
1252 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1253 X->getType() == DestTy) {
1254 Value *ZextC = Builder.CreateZExt(C, DestTy);
1255 return BinaryOperator::CreateAnd(X, ZextC);
1256 }
1257
1258 if (match(Src, m_VScale())) {
1259 if (Zext.getFunction() &&
1260 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1261 Attribute Attr =
1262 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1263 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1264 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1265 if (Log2_32(*MaxVScale) < TypeWidth) {
1266 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1267 return replaceInstUsesWith(Zext, VScale);
1268 }
1269 }
1270 }
1271 }
1272
1273 if (!Zext.hasNonNeg()) {
1274 // If this zero extend is only used by a shift, add nneg flag.
1275 if (Zext.hasOneUse() &&
1276 SrcTy->getScalarSizeInBits() >
1277 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1278 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1279 Zext.setNonNeg();
1280 return &Zext;
1281 }
1282
1283 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1284 Zext.setNonNeg();
1285 return &Zext;
1286 }
1287 }
1288
1289 return nullptr;
1290}
1291
1292/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1293Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1294 SExtInst &Sext) {
1295 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1296 ICmpInst::Predicate Pred = Cmp->getPredicate();
1297
1298 // Don't bother if Op1 isn't of vector or integer type.
1299 if (!Op1->getType()->isIntOrIntVectorTy())
1300 return nullptr;
1301
1302 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1303 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1304 Value *Sh = ConstantInt::get(Op0->getType(),
1305 Op0->getType()->getScalarSizeInBits() - 1);
1306 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1307 if (In->getType() != Sext.getType())
1308 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1309
1310 return replaceInstUsesWith(Sext, In);
1311 }
1312
1313 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1314 // If we know that only one bit of the LHS of the icmp can be set and we
1315 // have an equality comparison with zero or a power of 2, we can transform
1316 // the icmp and sext into bitwise/integer operations.
1317 if (Cmp->hasOneUse() &&
1318 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1319 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1320
1321 APInt KnownZeroMask(~Known.Zero);
1322 if (KnownZeroMask.isPowerOf2()) {
1323 Value *In = Cmp->getOperand(0);
1324
1325 // If the icmp tests for a known zero bit we can constant fold it.
1326 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1327 Value *V = Pred == ICmpInst::ICMP_NE ?
1329 ConstantInt::getNullValue(Sext.getType());
1330 return replaceInstUsesWith(Sext, V);
1331 }
1332
1333 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1334 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1335 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1336 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1337 // Perform a right shift to place the desired bit in the LSB.
1338 if (ShiftAmt)
1339 In = Builder.CreateLShr(In,
1340 ConstantInt::get(In->getType(), ShiftAmt));
1341
1342 // At this point "In" is either 1 or 0. Subtract 1 to turn
1343 // {1, 0} -> {0, -1}.
1344 In = Builder.CreateAdd(In,
1345 ConstantInt::getAllOnesValue(In->getType()),
1346 "sext");
1347 } else {
1348 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1349 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1350 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1351 // Perform a left shift to place the desired bit in the MSB.
1352 if (ShiftAmt)
1353 In = Builder.CreateShl(In,
1354 ConstantInt::get(In->getType(), ShiftAmt));
1355
1356 // Distribute the bit over the whole bit width.
1357 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1358 KnownZeroMask.getBitWidth() - 1), "sext");
1359 }
1360
1361 if (Sext.getType() == In->getType())
1362 return replaceInstUsesWith(Sext, In);
1363 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1364 }
1365 }
1366 }
1367
1368 return nullptr;
1369}
1370
1371/// Return true if we can take the specified value and return it as type Ty
1372/// without inserting any new casts and without changing the value of the common
1373/// low bits. This is used by code that tries to promote integer operations to
1374/// a wider types will allow us to eliminate the extension.
1375///
1376/// This function works on both vectors and scalars.
1377///
1378static bool canEvaluateSExtd(Value *V, Type *Ty) {
1379 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1380 "Can't sign extend type to a smaller type");
1381 if (canAlwaysEvaluateInType(V, Ty))
1382 return true;
1383 if (canNotEvaluateInType(V, Ty))
1384 return false;
1385
1386 auto *I = cast<Instruction>(V);
1387 switch (I->getOpcode()) {
1388 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1389 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1390 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1391 return true;
1392 case Instruction::And:
1393 case Instruction::Or:
1394 case Instruction::Xor:
1395 case Instruction::Add:
1396 case Instruction::Sub:
1397 case Instruction::Mul:
1398 // These operators can all arbitrarily be extended if their inputs can.
1399 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1400 canEvaluateSExtd(I->getOperand(1), Ty);
1401
1402 //case Instruction::Shl: TODO
1403 //case Instruction::LShr: TODO
1404
1405 case Instruction::Select:
1406 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1407 canEvaluateSExtd(I->getOperand(2), Ty);
1408
1409 case Instruction::PHI: {
1410 // We can change a phi if we can change all operands. Note that we never
1411 // get into trouble with cyclic PHIs here because we only consider
1412 // instructions with a single use.
1413 PHINode *PN = cast<PHINode>(I);
1414 for (Value *IncValue : PN->incoming_values())
1415 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1416 return true;
1417 }
1418 default:
1419 // TODO: Can handle more cases here.
1420 break;
1421 }
1422
1423 return false;
1424}
1425
1427 // If this sign extend is only used by a truncate, let the truncate be
1428 // eliminated before we try to optimize this sext.
1429 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1430 return nullptr;
1431
1432 if (Instruction *I = commonCastTransforms(Sext))
1433 return I;
1434
1435 Value *Src = Sext.getOperand(0);
1436 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1437 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1438 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1439
1440 // If the value being extended is zero or positive, use a zext instead.
1441 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1442 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1443 CI->setNonNeg(true);
1444 return CI;
1445 }
1446
1447 // Try to extend the entire expression tree to the wide destination type.
1448 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1449 // Okay, we can transform this! Insert the new expression now.
1450 LLVM_DEBUG(
1451 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1452 " to avoid sign extend: "
1453 << Sext << '\n');
1454 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1455 assert(Res->getType() == DestTy);
1456
1457 // If the high bits are already filled with sign bit, just replace this
1458 // cast with the result.
1459 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1460 return replaceInstUsesWith(Sext, Res);
1461
1462 // We need to emit a shl + ashr to do the sign extend.
1463 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1464 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1465 ShAmt);
1466 }
1467
1468 Value *X;
1469 if (match(Src, m_Trunc(m_Value(X)))) {
1470 // If the input has more sign bits than bits truncated, then convert
1471 // directly to final type.
1472 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1473 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1474 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1475
1476 // If input is a trunc from the destination type, then convert into shifts.
1477 if (Src->hasOneUse() && X->getType() == DestTy) {
1478 // sext (trunc X) --> ashr (shl X, C), C
1479 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1480 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1481 }
1482
1483 // If we are replacing shifted-in high zero bits with sign bits, convert
1484 // the logic shift to arithmetic shift and eliminate the cast to
1485 // intermediate type:
1486 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1487 Value *Y;
1488 if (Src->hasOneUse() &&
1490 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1491 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1492 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1493 }
1494 }
1495
1496 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1497 return transformSExtICmp(Cmp, Sext);
1498
1499 // If the input is a shl/ashr pair of a same constant, then this is a sign
1500 // extension from a smaller value. If we could trust arbitrary bitwidth
1501 // integers, we could turn this into a truncate to the smaller bit and then
1502 // use a sext for the whole extension. Since we don't, look deeper and check
1503 // for a truncate. If the source and dest are the same type, eliminate the
1504 // trunc and extend and just do shifts. For example, turn:
1505 // %a = trunc i32 %i to i8
1506 // %b = shl i8 %a, C
1507 // %c = ashr i8 %b, C
1508 // %d = sext i8 %c to i32
1509 // into:
1510 // %a = shl i32 %i, 32-(8-C)
1511 // %d = ashr i32 %a, 32-(8-C)
1512 Value *A = nullptr;
1513 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1514 Constant *BA = nullptr, *CA = nullptr;
1515 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1516 m_ImmConstant(CA))) &&
1517 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1518 Constant *WideCurrShAmt =
1519 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1520 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1521 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1522 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1523 Constant *NewShAmt = ConstantExpr::getSub(
1524 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1525 NumLowbitsLeft);
1526 NewShAmt =
1528 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1529 return BinaryOperator::CreateAShr(A, NewShAmt);
1530 }
1531
1532 // Splatting a bit of constant-index across a value:
1533 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1534 // If the dest type is different, use a cast (adjust use check).
1535 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1536 m_SpecificInt(SrcBitSize - 1))))) {
1537 Type *XTy = X->getType();
1538 unsigned XBitSize = XTy->getScalarSizeInBits();
1539 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1540 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1541 if (XTy == DestTy)
1542 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1543 AshrAmtC);
1544 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1545 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1546 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1547 }
1548 }
1549
1550 if (match(Src, m_VScale())) {
1551 if (Sext.getFunction() &&
1552 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1553 Attribute Attr =
1554 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1555 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1556 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1557 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1558 return replaceInstUsesWith(Sext, VScale);
1559 }
1560 }
1561 }
1562 }
1563
1564 return nullptr;
1565}
1566
1567/// Return a Constant* for the specified floating-point constant if it fits
1568/// in the specified FP type without changing its value.
1569static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1570 bool losesInfo;
1571 APFloat F = CFP->getValueAPF();
1572 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1573 return !losesInfo;
1574}
1575
1576static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1577 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1578 return nullptr; // No constant folding of this.
1579 // See if the value can be truncated to bfloat and then reextended.
1580 if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
1581 return Type::getBFloatTy(CFP->getContext());
1582 // See if the value can be truncated to half and then reextended.
1583 if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
1584 return Type::getHalfTy(CFP->getContext());
1585 // See if the value can be truncated to float and then reextended.
1587 return Type::getFloatTy(CFP->getContext());
1588 if (CFP->getType()->isDoubleTy())
1589 return nullptr; // Won't shrink.
1591 return Type::getDoubleTy(CFP->getContext());
1592 // Don't try to shrink to various long double types.
1593 return nullptr;
1594}
1595
1596// Determine if this is a vector of ConstantFPs and if so, return the minimal
1597// type we can safely truncate all elements to.
1598static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1599 auto *CV = dyn_cast<Constant>(V);
1600 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1601 if (!CV || !CVVTy)
1602 return nullptr;
1603
1604 Type *MinType = nullptr;
1605
1606 unsigned NumElts = CVVTy->getNumElements();
1607
1608 // For fixed-width vectors we find the minimal type by looking
1609 // through the constant values of the vector.
1610 for (unsigned i = 0; i != NumElts; ++i) {
1611 if (isa<UndefValue>(CV->getAggregateElement(i)))
1612 continue;
1613
1614 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1615 if (!CFP)
1616 return nullptr;
1617
1618 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1619 if (!T)
1620 return nullptr;
1621
1622 // If we haven't found a type yet or this type has a larger mantissa than
1623 // our previous type, this is our new minimal type.
1624 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1625 MinType = T;
1626 }
1627
1628 // Make a vector type from the minimal type.
1629 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1630}
1631
1632/// Find the minimum FP type we can safely truncate to.
1633static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1634 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1635 return FPExt->getOperand(0)->getType();
1636
1637 // If this value is a constant, return the constant in the smallest FP type
1638 // that can accurately represent it. This allows us to turn
1639 // (float)((double)X+2.0) into x+2.0f.
1640 if (auto *CFP = dyn_cast<ConstantFP>(V))
1641 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1642 return T;
1643
1644 // We can only correctly find a minimum type for a scalable vector when it is
1645 // a splat. For splats of constant values the fpext is wrapped up as a
1646 // ConstantExpr.
1647 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1648 if (FPCExt->getOpcode() == Instruction::FPExt)
1649 return FPCExt->getOperand(0)->getType();
1650
1651 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1652 // vectors
1653 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1654 return T;
1655
1656 return V->getType();
1657}
1658
1659/// Return true if the cast from integer to FP can be proven to be exact for all
1660/// possible inputs (the conversion does not lose any precision).
1662 CastInst::CastOps Opcode = I.getOpcode();
1663 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1664 "Unexpected cast");
1665 Value *Src = I.getOperand(0);
1666 Type *SrcTy = Src->getType();
1667 Type *FPTy = I.getType();
1668 bool IsSigned = Opcode == Instruction::SIToFP;
1669 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1670
1671 // Easy case - if the source integer type has less bits than the FP mantissa,
1672 // then the cast must be exact.
1673 int DestNumSigBits = FPTy->getFPMantissaWidth();
1674 if (SrcSize <= DestNumSigBits)
1675 return true;
1676
1677 // Cast from FP to integer and back to FP is independent of the intermediate
1678 // integer width because of poison on overflow.
1679 Value *F;
1680 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1681 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1682 // potential rounding of negative FP input values.
1683 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1684 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1685 SrcNumSigBits++;
1686
1687 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1688 // significant bits than the destination (and make sure neither type is
1689 // weird -- ppc_fp128).
1690 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1691 SrcNumSigBits <= DestNumSigBits)
1692 return true;
1693 }
1694
1695 // TODO:
1696 // Try harder to find if the source integer type has less significant bits.
1697 // For example, compute number of sign bits.
1698 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1699 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1700 SrcKnown.countMinLeadingZeros() -
1701 SrcKnown.countMinTrailingZeros();
1702 if (SigBits <= DestNumSigBits)
1703 return true;
1704
1705 return false;
1706}
1707
1710 return I;
1711
1712 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1713 // simplify this expression to avoid one or more of the trunc/extend
1714 // operations if we can do so without changing the numerical results.
1715 //
1716 // The exact manner in which the widths of the operands interact to limit
1717 // what we can and cannot do safely varies from operation to operation, and
1718 // is explained below in the various case statements.
1719 Type *Ty = FPT.getType();
1720 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1721 if (BO && BO->hasOneUse()) {
1722 Type *LHSMinType =
1723 getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
1724 Type *RHSMinType =
1725 getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
1726 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1727 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1728 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1729 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1730 unsigned DstWidth = Ty->getFPMantissaWidth();
1731 switch (BO->getOpcode()) {
1732 default: break;
1733 case Instruction::FAdd:
1734 case Instruction::FSub:
1735 // For addition and subtraction, the infinitely precise result can
1736 // essentially be arbitrarily wide; proving that double rounding
1737 // will not occur because the result of OpI is exact (as we will for
1738 // FMul, for example) is hopeless. However, we *can* nonetheless
1739 // frequently know that double rounding cannot occur (or that it is
1740 // innocuous) by taking advantage of the specific structure of
1741 // infinitely-precise results that admit double rounding.
1742 //
1743 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1744 // to represent both sources, we can guarantee that the double
1745 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1746 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1747 // for proof of this fact).
1748 //
1749 // Note: Figueroa does not consider the case where DstFormat !=
1750 // SrcFormat. It's possible (likely even!) that this analysis
1751 // could be tightened for those cases, but they are rare (the main
1752 // case of interest here is (float)((double)float + float)).
1753 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1754 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1755 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1756 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1757 RI->copyFastMathFlags(BO);
1758 return RI;
1759 }
1760 break;
1761 case Instruction::FMul:
1762 // For multiplication, the infinitely precise result has at most
1763 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1764 // that such a value can be exactly represented, then no double
1765 // rounding can possibly occur; we can safely perform the operation
1766 // in the destination format if it can represent both sources.
1767 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1768 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1769 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1771 }
1772 break;
1773 case Instruction::FDiv:
1774 // For division, we use again use the bound from Figueroa's
1775 // dissertation. I am entirely certain that this bound can be
1776 // tightened in the unbalanced operand case by an analysis based on
1777 // the diophantine rational approximation bound, but the well-known
1778 // condition used here is a good conservative first pass.
1779 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1780 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1781 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1782 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1784 }
1785 break;
1786 case Instruction::FRem: {
1787 // Remainder is straightforward. Remainder is always exact, so the
1788 // type of OpI doesn't enter into things at all. We simply evaluate
1789 // in whichever source type is larger, then convert to the
1790 // destination type.
1791 if (SrcWidth == OpWidth)
1792 break;
1793 Value *LHS, *RHS;
1794 if (LHSWidth == SrcWidth) {
1795 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1796 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1797 } else {
1798 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1799 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1800 }
1801
1802 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1803 return CastInst::CreateFPCast(ExactResult, Ty);
1804 }
1805 }
1806 }
1807
1808 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1809 Value *X;
1810 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1811 if (Op && Op->hasOneUse()) {
1812 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1814 if (isa<FPMathOperator>(Op))
1815 Builder.setFastMathFlags(Op->getFastMathFlags());
1816
1817 if (match(Op, m_FNeg(m_Value(X)))) {
1818 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1819
1820 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1821 }
1822
1823 // If we are truncating a select that has an extended operand, we can
1824 // narrow the other operand and do the select as a narrow op.
1825 Value *Cond, *X, *Y;
1827 X->getType() == Ty) {
1828 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1829 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1830 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1831 return replaceInstUsesWith(FPT, Sel);
1832 }
1834 X->getType() == Ty) {
1835 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1836 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1837 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1838 return replaceInstUsesWith(FPT, Sel);
1839 }
1840 }
1841
1842 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1843 switch (II->getIntrinsicID()) {
1844 default: break;
1845 case Intrinsic::ceil:
1846 case Intrinsic::fabs:
1847 case Intrinsic::floor:
1848 case Intrinsic::nearbyint:
1849 case Intrinsic::rint:
1850 case Intrinsic::round:
1851 case Intrinsic::roundeven:
1852 case Intrinsic::trunc: {
1853 Value *Src = II->getArgOperand(0);
1854 if (!Src->hasOneUse())
1855 break;
1856
1857 // Except for fabs, this transformation requires the input of the unary FP
1858 // operation to be itself an fpext from the type to which we're
1859 // truncating.
1860 if (II->getIntrinsicID() != Intrinsic::fabs) {
1861 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1862 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1863 break;
1864 }
1865
1866 // Do unary FP operation on smaller type.
1867 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1868 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1870 II->getIntrinsicID(), Ty);
1872 II->getOperandBundlesAsDefs(OpBundles);
1873 CallInst *NewCI =
1874 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1875 NewCI->copyFastMathFlags(II);
1876 return NewCI;
1877 }
1878 }
1879 }
1880
1881 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1882 return I;
1883
1884 Value *Src = FPT.getOperand(0);
1885 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1886 auto *FPCast = cast<CastInst>(Src);
1887 if (isKnownExactCastIntToFP(*FPCast, *this))
1888 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1889 }
1890
1891 return nullptr;
1892}
1893
1895 // If the source operand is a cast from integer to FP and known exact, then
1896 // cast the integer operand directly to the destination type.
1897 Type *Ty = FPExt.getType();
1898 Value *Src = FPExt.getOperand(0);
1899 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1900 auto *FPCast = cast<CastInst>(Src);
1901 if (isKnownExactCastIntToFP(*FPCast, *this))
1902 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1903 }
1904
1905 return commonCastTransforms(FPExt);
1906}
1907
1908/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1909/// This is safe if the intermediate type has enough bits in its mantissa to
1910/// accurately represent all values of X. For example, this won't work with
1911/// i64 -> float -> i64.
1913 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1914 return nullptr;
1915
1916 auto *OpI = cast<CastInst>(FI.getOperand(0));
1917 Value *X = OpI->getOperand(0);
1918 Type *XType = X->getType();
1919 Type *DestType = FI.getType();
1920 bool IsOutputSigned = isa<FPToSIInst>(FI);
1921
1922 // Since we can assume the conversion won't overflow, our decision as to
1923 // whether the input will fit in the float should depend on the minimum
1924 // of the input range and output range.
1925
1926 // This means this is also safe for a signed input and unsigned output, since
1927 // a negative input would lead to undefined behavior.
1928 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1929 // The first cast may not round exactly based on the source integer width
1930 // and FP width, but the overflow UB rules can still allow this to fold.
1931 // If the destination type is narrow, that means the intermediate FP value
1932 // must be large enough to hold the source value exactly.
1933 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1934 int OutputSize = (int)DestType->getScalarSizeInBits();
1935 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1936 return nullptr;
1937 }
1938
1939 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1940 bool IsInputSigned = isa<SIToFPInst>(OpI);
1941 if (IsInputSigned && IsOutputSigned)
1942 return new SExtInst(X, DestType);
1943 return new ZExtInst(X, DestType);
1944 }
1945 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1946 return new TruncInst(X, DestType);
1947
1948 assert(XType == DestType && "Unexpected types for int to FP to int casts");
1949 return replaceInstUsesWith(FI, X);
1950}
1951
1953 // fpto{u/s}i non-norm --> 0
1954 FPClassTest Mask =
1955 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
1956 KnownFPClass FPClass =
1957 computeKnownFPClass(FI.getOperand(0), Mask, /*Depth=*/0,
1959 if (FPClass.isKnownNever(Mask))
1961
1962 return nullptr;
1963}
1964
1966 if (Instruction *I = foldItoFPtoI(FI))
1967 return I;
1968
1969 if (Instruction *I = foldFPtoI(FI, *this))
1970 return I;
1971
1972 return commonCastTransforms(FI);
1973}
1974
1976 if (Instruction *I = foldItoFPtoI(FI))
1977 return I;
1978
1979 if (Instruction *I = foldFPtoI(FI, *this))
1980 return I;
1981
1982 return commonCastTransforms(FI);
1983}
1984
1986 if (Instruction *R = commonCastTransforms(CI))
1987 return R;
1988 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
1989 CI.setNonNeg();
1990 return &CI;
1991 }
1992 return nullptr;
1993}
1994
1996 if (Instruction *R = commonCastTransforms(CI))
1997 return R;
1998 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
1999 auto *UI =
2000 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2001 UI->setNonNeg(true);
2002 return UI;
2003 }
2004 return nullptr;
2005}
2006
2008 // If the source integer type is not the intptr_t type for this target, do a
2009 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2010 // cast to be exposed to other transforms.
2011 unsigned AS = CI.getAddressSpace();
2012 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2014 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2015 DL.getIntPtrType(CI.getContext(), AS));
2017 return new IntToPtrInst(P, CI.getType());
2018 }
2019
2021 return I;
2022
2023 return nullptr;
2024}
2025
2027 // If the destination integer type is not the intptr_t type for this target,
2028 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2029 // to be exposed to other transforms.
2031 Type *SrcTy = SrcOp->getType();
2032 Type *Ty = CI.getType();
2033 unsigned AS = CI.getPointerAddressSpace();
2034 unsigned TySize = Ty->getScalarSizeInBits();
2035 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2036 if (TySize != PtrSize) {
2037 Type *IntPtrTy =
2038 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2039 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2040 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2041 }
2042
2043 // (ptrtoint (ptrmask P, M))
2044 // -> (and (ptrtoint P), M)
2045 // This is generally beneficial as `and` is better supported than `ptrmask`.
2046 Value *Ptr, *Mask;
2047 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
2048 m_Value(Mask)))) &&
2049 Mask->getType() == Ty)
2050 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2051
2052 if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
2053 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2054 // While this can increase the number of instructions it doesn't actually
2055 // increase the overall complexity since the arithmetic is just part of
2056 // the GEP otherwise.
2057 if (GEP->hasOneUse() &&
2058 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2059 return replaceInstUsesWith(
2060 CI, Builder.CreateIntCast(EmitGEPOffset(cast<GEPOperator>(GEP)), Ty,
2061 /*isSigned=*/false));
2062 }
2063 }
2064
2065 Value *Vec, *Scalar, *Index;
2067 m_Value(Scalar), m_Value(Index)))) &&
2068 Vec->getType() == Ty) {
2069 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2070 // Convert the scalar to int followed by insert to eliminate one cast:
2071 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2072 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2073 return InsertElementInst::Create(Vec, NewCast, Index);
2074 }
2075
2076 return commonCastTransforms(CI);
2077}
2078
2079/// This input value (which is known to have vector type) is being zero extended
2080/// or truncated to the specified vector type. Since the zext/trunc is done
2081/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2082/// endianness will impact which end of the vector that is extended or
2083/// truncated.
2084///
2085/// A vector is always stored with index 0 at the lowest address, which
2086/// corresponds to the most significant bits for a big endian stored integer and
2087/// the least significant bits for little endian. A trunc/zext of an integer
2088/// impacts the big end of the integer. Thus, we need to add/remove elements at
2089/// the front of the vector for big endian targets, and the back of the vector
2090/// for little endian targets.
2091///
2092/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2093///
2094/// The source and destination vector types may have different element types.
2095static Instruction *
2097 InstCombinerImpl &IC) {
2098 // We can only do this optimization if the output is a multiple of the input
2099 // element size, or the input is a multiple of the output element size.
2100 // Convert the input type to have the same element type as the output.
2101 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2102
2103 if (SrcTy->getElementType() != DestTy->getElementType()) {
2104 // The input types don't need to be identical, but for now they must be the
2105 // same size. There is no specific reason we couldn't handle things like
2106 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2107 // there yet.
2108 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2109 DestTy->getElementType()->getPrimitiveSizeInBits())
2110 return nullptr;
2111
2112 SrcTy =
2113 FixedVectorType::get(DestTy->getElementType(),
2114 cast<FixedVectorType>(SrcTy)->getNumElements());
2115 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2116 }
2117
2118 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2119 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2120 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2121
2122 assert(SrcElts != DestElts && "Element counts should be different.");
2123
2124 // Now that the element types match, get the shuffle mask and RHS of the
2125 // shuffle to use, which depends on whether we're increasing or decreasing the
2126 // size of the input.
2127 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2128 ArrayRef<int> ShuffleMask;
2129 Value *V2;
2130
2131 if (SrcElts > DestElts) {
2132 // If we're shrinking the number of elements (rewriting an integer
2133 // truncate), just shuffle in the elements corresponding to the least
2134 // significant bits from the input and use poison as the second shuffle
2135 // input.
2136 V2 = PoisonValue::get(SrcTy);
2137 // Make sure the shuffle mask selects the "least significant bits" by
2138 // keeping elements from back of the src vector for big endian, and from the
2139 // front for little endian.
2140 ShuffleMask = ShuffleMaskStorage;
2141 if (IsBigEndian)
2142 ShuffleMask = ShuffleMask.take_back(DestElts);
2143 else
2144 ShuffleMask = ShuffleMask.take_front(DestElts);
2145 } else {
2146 // If we're increasing the number of elements (rewriting an integer zext),
2147 // shuffle in all of the elements from InVal. Fill the rest of the result
2148 // elements with zeros from a constant zero.
2149 V2 = Constant::getNullValue(SrcTy);
2150 // Use first elt from V2 when indicating zero in the shuffle mask.
2151 uint32_t NullElt = SrcElts;
2152 // Extend with null values in the "most significant bits" by adding elements
2153 // in front of the src vector for big endian, and at the back for little
2154 // endian.
2155 unsigned DeltaElts = DestElts - SrcElts;
2156 if (IsBigEndian)
2157 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2158 else
2159 ShuffleMaskStorage.append(DeltaElts, NullElt);
2160 ShuffleMask = ShuffleMaskStorage;
2161 }
2162
2163 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2164}
2165
2166static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2167 return Value % Ty->getPrimitiveSizeInBits() == 0;
2168}
2169
2170static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2171 return Value / Ty->getPrimitiveSizeInBits();
2172}
2173
2174/// V is a value which is inserted into a vector of VecEltTy.
2175/// Look through the value to see if we can decompose it into
2176/// insertions into the vector. See the example in the comment for
2177/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2178/// The type of V is always a non-zero multiple of VecEltTy's size.
2179/// Shift is the number of bits between the lsb of V and the lsb of
2180/// the vector.
2181///
2182/// This returns false if the pattern can't be matched or true if it can,
2183/// filling in Elements with the elements found here.
2184static bool collectInsertionElements(Value *V, unsigned Shift,
2185 SmallVectorImpl<Value *> &Elements,
2186 Type *VecEltTy, bool isBigEndian) {
2187 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2188 "Shift should be a multiple of the element type size");
2189
2190 // Undef values never contribute useful bits to the result.
2191 if (isa<UndefValue>(V)) return true;
2192
2193 // If we got down to a value of the right type, we win, try inserting into the
2194 // right element.
2195 if (V->getType() == VecEltTy) {
2196 // Inserting null doesn't actually insert any elements.
2197 if (Constant *C = dyn_cast<Constant>(V))
2198 if (C->isNullValue())
2199 return true;
2200
2201 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2202 if (isBigEndian)
2203 ElementIndex = Elements.size() - ElementIndex - 1;
2204
2205 // Fail if multiple elements are inserted into this slot.
2206 if (Elements[ElementIndex])
2207 return false;
2208
2209 Elements[ElementIndex] = V;
2210 return true;
2211 }
2212
2213 if (Constant *C = dyn_cast<Constant>(V)) {
2214 // Figure out the # elements this provides, and bitcast it or slice it up
2215 // as required.
2216 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2217 VecEltTy);
2218 // If the constant is the size of a vector element, we just need to bitcast
2219 // it to the right type so it gets properly inserted.
2220 if (NumElts == 1)
2222 Shift, Elements, VecEltTy, isBigEndian);
2223
2224 // Okay, this is a constant that covers multiple elements. Slice it up into
2225 // pieces and insert each element-sized piece into the vector.
2226 if (!isa<IntegerType>(C->getType()))
2227 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2228 C->getType()->getPrimitiveSizeInBits()));
2229 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2230 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2231
2232 for (unsigned i = 0; i != NumElts; ++i) {
2233 unsigned ShiftI = i * ElementSize;
2235 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2236 if (!Piece)
2237 return false;
2238
2239 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2240 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2241 isBigEndian))
2242 return false;
2243 }
2244 return true;
2245 }
2246
2247 if (!V->hasOneUse()) return false;
2248
2249 Instruction *I = dyn_cast<Instruction>(V);
2250 if (!I) return false;
2251 switch (I->getOpcode()) {
2252 default: return false; // Unhandled case.
2253 case Instruction::BitCast:
2254 if (I->getOperand(0)->getType()->isVectorTy())
2255 return false;
2256 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2257 isBigEndian);
2258 case Instruction::ZExt:
2260 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2261 VecEltTy))
2262 return false;
2263 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2264 isBigEndian);
2265 case Instruction::Or:
2266 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2267 isBigEndian) &&
2268 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2269 isBigEndian);
2270 case Instruction::Shl: {
2271 // Must be shifting by a constant that is a multiple of the element size.
2272 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2273 if (!CI) return false;
2274 Shift += CI->getZExtValue();
2275 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2276 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2277 isBigEndian);
2278 }
2279
2280 }
2281}
2282
2283
2284/// If the input is an 'or' instruction, we may be doing shifts and ors to
2285/// assemble the elements of the vector manually.
2286/// Try to rip the code out and replace it with insertelements. This is to
2287/// optimize code like this:
2288///
2289/// %tmp37 = bitcast float %inc to i32
2290/// %tmp38 = zext i32 %tmp37 to i64
2291/// %tmp31 = bitcast float %inc5 to i32
2292/// %tmp32 = zext i32 %tmp31 to i64
2293/// %tmp33 = shl i64 %tmp32, 32
2294/// %ins35 = or i64 %tmp33, %tmp38
2295/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2296///
2297/// Into two insertelements that do "buildvector{%inc, %inc5}".
2299 InstCombinerImpl &IC) {
2300 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2301 Value *IntInput = CI.getOperand(0);
2302
2303 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2304 if (!collectInsertionElements(IntInput, 0, Elements,
2305 DestVecTy->getElementType(),
2306 IC.getDataLayout().isBigEndian()))
2307 return nullptr;
2308
2309 // If we succeeded, we know that all of the element are specified by Elements
2310 // or are zero if Elements has a null entry. Recast this as a set of
2311 // insertions.
2312 Value *Result = Constant::getNullValue(CI.getType());
2313 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2314 if (!Elements[i]) continue; // Unset element.
2315
2316 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2317 IC.Builder.getInt32(i));
2318 }
2319
2320 return Result;
2321}
2322
2323/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2324/// vector followed by extract element. The backend tends to handle bitcasts of
2325/// vectors better than bitcasts of scalars because vector registers are
2326/// usually not type-specific like scalar integer or scalar floating-point.
2328 InstCombinerImpl &IC) {
2329 Value *VecOp, *Index;
2330 if (!match(BitCast.getOperand(0),
2332 return nullptr;
2333
2334 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2335 // type to extract from.
2336 Type *DestType = BitCast.getType();
2337 VectorType *VecType = cast<VectorType>(VecOp->getType());
2338 if (VectorType::isValidElementType(DestType)) {
2339 auto *NewVecType = VectorType::get(DestType, VecType);
2340 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2341 return ExtractElementInst::Create(NewBC, Index);
2342 }
2343
2344 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2345 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2346 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2347 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2348 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2349
2350 return nullptr;
2351}
2352
2353/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2355 InstCombiner::BuilderTy &Builder) {
2356 Type *DestTy = BitCast.getType();
2357 BinaryOperator *BO;
2358
2359 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2360 !BO->isBitwiseLogicOp())
2361 return nullptr;
2362
2363 // FIXME: This transform is restricted to vector types to avoid backend
2364 // problems caused by creating potentially illegal operations. If a fix-up is
2365 // added to handle that situation, we can remove this check.
2366 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2367 return nullptr;
2368
2369 if (DestTy->isFPOrFPVectorTy()) {
2370 Value *X, *Y;
2371 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2372 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2374 if (X->getType()->isFPOrFPVectorTy() &&
2375 Y->getType()->isIntOrIntVectorTy()) {
2376 Value *CastedOp =
2377 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2378 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2379 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2380 }
2381 if (X->getType()->isIntOrIntVectorTy() &&
2382 Y->getType()->isFPOrFPVectorTy()) {
2383 Value *CastedOp =
2384 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2385 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2386 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2387 }
2388 }
2389 return nullptr;
2390 }
2391
2392 if (!DestTy->isIntOrIntVectorTy())
2393 return nullptr;
2394
2395 Value *X;
2396 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2397 X->getType() == DestTy && !isa<Constant>(X)) {
2398 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2399 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2400 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2401 }
2402
2403 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2404 X->getType() == DestTy && !isa<Constant>(X)) {
2405 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2406 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2407 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2408 }
2409
2410 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2411 // constant. This eases recognition of special constants for later ops.
2412 // Example:
2413 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2414 Constant *C;
2415 if (match(BO->getOperand(1), m_Constant(C))) {
2416 // bitcast (logic X, C) --> logic (bitcast X, C')
2417 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2418 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2419 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2420 }
2421
2422 return nullptr;
2423}
2424
2425/// Change the type of a select if we can eliminate a bitcast.
2427 InstCombiner::BuilderTy &Builder) {
2428 Value *Cond, *TVal, *FVal;
2429 if (!match(BitCast.getOperand(0),
2430 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2431 return nullptr;
2432
2433 // A vector select must maintain the same number of elements in its operands.
2434 Type *CondTy = Cond->getType();
2435 Type *DestTy = BitCast.getType();
2436 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2437 if (!DestTy->isVectorTy() ||
2438 CondVTy->getElementCount() !=
2439 cast<VectorType>(DestTy)->getElementCount())
2440 return nullptr;
2441
2442 // FIXME: This transform is restricted from changing the select between
2443 // scalars and vectors to avoid backend problems caused by creating
2444 // potentially illegal operations. If a fix-up is added to handle that
2445 // situation, we can remove this check.
2446 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2447 return nullptr;
2448
2449 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2450 Value *X;
2451 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2452 !isa<Constant>(X)) {
2453 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2454 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2455 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2456 }
2457
2458 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2459 !isa<Constant>(X)) {
2460 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2461 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2462 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2463 }
2464
2465 return nullptr;
2466}
2467
2468/// Check if all users of CI are StoreInsts.
2469static bool hasStoreUsersOnly(CastInst &CI) {
2470 for (User *U : CI.users()) {
2471 if (!isa<StoreInst>(U))
2472 return false;
2473 }
2474 return true;
2475}
2476
2477/// This function handles following case
2478///
2479/// A -> B cast
2480/// PHI
2481/// B -> A cast
2482///
2483/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2484/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2485Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2486 PHINode *PN) {
2487 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2488 if (hasStoreUsersOnly(CI))
2489 return nullptr;
2490
2491 Value *Src = CI.getOperand(0);
2492 Type *SrcTy = Src->getType(); // Type B
2493 Type *DestTy = CI.getType(); // Type A
2494
2495 SmallVector<PHINode *, 4> PhiWorklist;
2496 SmallSetVector<PHINode *, 4> OldPhiNodes;
2497
2498 // Find all of the A->B casts and PHI nodes.
2499 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2500 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2501 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2502 PhiWorklist.push_back(PN);
2503 OldPhiNodes.insert(PN);
2504 while (!PhiWorklist.empty()) {
2505 auto *OldPN = PhiWorklist.pop_back_val();
2506 for (Value *IncValue : OldPN->incoming_values()) {
2507 if (isa<Constant>(IncValue))
2508 continue;
2509
2510 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2511 // If there is a sequence of one or more load instructions, each loaded
2512 // value is used as address of later load instruction, bitcast is
2513 // necessary to change the value type, don't optimize it. For
2514 // simplicity we give up if the load address comes from another load.
2515 Value *Addr = LI->getOperand(0);
2516 if (Addr == &CI || isa<LoadInst>(Addr))
2517 return nullptr;
2518 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2519 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2520 // TODO: Remove this check when bitcast between vector and x86_amx
2521 // is replaced with a specific intrinsic.
2522 if (DestTy->isX86_AMXTy())
2523 return nullptr;
2524 if (LI->hasOneUse() && LI->isSimple())
2525 continue;
2526 // If a LoadInst has more than one use, changing the type of loaded
2527 // value may create another bitcast.
2528 return nullptr;
2529 }
2530
2531 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2532 if (OldPhiNodes.insert(PNode))
2533 PhiWorklist.push_back(PNode);
2534 continue;
2535 }
2536
2537 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2538 // We can't handle other instructions.
2539 if (!BCI)
2540 return nullptr;
2541
2542 // Verify it's a A->B cast.
2543 Type *TyA = BCI->getOperand(0)->getType();
2544 Type *TyB = BCI->getType();
2545 if (TyA != DestTy || TyB != SrcTy)
2546 return nullptr;
2547 }
2548 }
2549
2550 // Check that each user of each old PHI node is something that we can
2551 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2552 for (auto *OldPN : OldPhiNodes) {
2553 for (User *V : OldPN->users()) {
2554 if (auto *SI = dyn_cast<StoreInst>(V)) {
2555 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2556 return nullptr;
2557 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2558 // Verify it's a B->A cast.
2559 Type *TyB = BCI->getOperand(0)->getType();
2560 Type *TyA = BCI->getType();
2561 if (TyA != DestTy || TyB != SrcTy)
2562 return nullptr;
2563 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2564 // As long as the user is another old PHI node, then even if we don't
2565 // rewrite it, the PHI web we're considering won't have any users
2566 // outside itself, so it'll be dead.
2567 if (!OldPhiNodes.contains(PHI))
2568 return nullptr;
2569 } else {
2570 return nullptr;
2571 }
2572 }
2573 }
2574
2575 // For each old PHI node, create a corresponding new PHI node with a type A.
2577 for (auto *OldPN : OldPhiNodes) {
2578 Builder.SetInsertPoint(OldPN);
2579 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2580 NewPNodes[OldPN] = NewPN;
2581 }
2582
2583 // Fill in the operands of new PHI nodes.
2584 for (auto *OldPN : OldPhiNodes) {
2585 PHINode *NewPN = NewPNodes[OldPN];
2586 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2587 Value *V = OldPN->getOperand(j);
2588 Value *NewV = nullptr;
2589 if (auto *C = dyn_cast<Constant>(V)) {
2590 NewV = ConstantExpr::getBitCast(C, DestTy);
2591 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2592 // Explicitly perform load combine to make sure no opposing transform
2593 // can remove the bitcast in the meantime and trigger an infinite loop.
2595 NewV = combineLoadToNewType(*LI, DestTy);
2596 // Remove the old load and its use in the old phi, which itself becomes
2597 // dead once the whole transform finishes.
2600 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2601 NewV = BCI->getOperand(0);
2602 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2603 NewV = NewPNodes[PrevPN];
2604 }
2605 assert(NewV);
2606 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2607 }
2608 }
2609
2610 // Traverse all accumulated PHI nodes and process its users,
2611 // which are Stores and BitcCasts. Without this processing
2612 // NewPHI nodes could be replicated and could lead to extra
2613 // moves generated after DeSSA.
2614 // If there is a store with type B, change it to type A.
2615
2616
2617 // Replace users of BitCast B->A with NewPHI. These will help
2618 // later to get rid off a closure formed by OldPHI nodes.
2619 Instruction *RetVal = nullptr;
2620 for (auto *OldPN : OldPhiNodes) {
2621 PHINode *NewPN = NewPNodes[OldPN];
2622 for (User *V : make_early_inc_range(OldPN->users())) {
2623 if (auto *SI = dyn_cast<StoreInst>(V)) {
2624 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2626 auto *NewBC =
2627 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2628 SI->setOperand(0, NewBC);
2629 Worklist.push(SI);
2630 assert(hasStoreUsersOnly(*NewBC));
2631 }
2632 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2633 Type *TyB = BCI->getOperand(0)->getType();
2634 Type *TyA = BCI->getType();
2635 assert(TyA == DestTy && TyB == SrcTy);
2636 (void) TyA;
2637 (void) TyB;
2638 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2639 if (BCI == &CI)
2640 RetVal = I;
2641 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2642 assert(OldPhiNodes.contains(PHI));
2643 (void) PHI;
2644 } else {
2645 llvm_unreachable("all uses should be handled");
2646 }
2647 }
2648 }
2649
2650 return RetVal;
2651}
2652
2654 // If the operands are integer typed then apply the integer transforms,
2655 // otherwise just apply the common ones.
2656 Value *Src = CI.getOperand(0);
2657 Type *SrcTy = Src->getType();
2658 Type *DestTy = CI.getType();
2659
2660 // Get rid of casts from one type to the same type. These are useless and can
2661 // be replaced by the operand.
2662 if (DestTy == Src->getType())
2663 return replaceInstUsesWith(CI, Src);
2664
2665 if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2666 // Beware: messing with this target-specific oddity may cause trouble.
2667 if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2668 Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2669 return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2671 }
2672
2673 if (isa<IntegerType>(SrcTy)) {
2674 // If this is a cast from an integer to vector, check to see if the input
2675 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2676 // the casts with a shuffle and (potentially) a bitcast.
2677 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2678 CastInst *SrcCast = cast<CastInst>(Src);
2679 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2680 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2682 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2683 return I;
2684 }
2685
2686 // If the input is an 'or' instruction, we may be doing shifts and ors to
2687 // assemble the elements of the vector manually. Try to rip the code out
2688 // and replace it with insertelements.
2689 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2690 return replaceInstUsesWith(CI, V);
2691 }
2692 }
2693
2694 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2695 if (SrcVTy->getNumElements() == 1) {
2696 // If our destination is not a vector, then make this a straight
2697 // scalar-scalar cast.
2698 if (!DestTy->isVectorTy()) {
2699 Value *Elem =
2702 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2703 }
2704
2705 // Otherwise, see if our source is an insert. If so, then use the scalar
2706 // component directly:
2707 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2708 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2709 return new BitCastInst(InsElt->getOperand(1), DestTy);
2710 }
2711
2712 // Convert an artificial vector insert into more analyzable bitwise logic.
2713 unsigned BitWidth = DestTy->getScalarSizeInBits();
2714 Value *X, *Y;
2715 uint64_t IndexC;
2717 m_Value(Y), m_ConstantInt(IndexC)))) &&
2718 DestTy->isIntegerTy() && X->getType() == DestTy &&
2719 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2720 // Adjust for big endian - the LSBs are at the high index.
2721 if (DL.isBigEndian())
2722 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2723
2724 // We only handle (endian-normalized) insert to index 0. Any other insert
2725 // would require a left-shift, so that is an extra instruction.
2726 if (IndexC == 0) {
2727 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2728 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2729 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2730 Value *AndX = Builder.CreateAnd(X, MaskC);
2731 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2732 return BinaryOperator::CreateOr(AndX, ZextY);
2733 }
2734 }
2735 }
2736
2737 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2738 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2739 // a bitcast to a vector with the same # elts.
2740 Value *ShufOp0 = Shuf->getOperand(0);
2741 Value *ShufOp1 = Shuf->getOperand(1);
2742 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2743 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2744 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2745 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2746 ShufElts == SrcVecElts) {
2747 BitCastInst *Tmp;
2748 // If either of the operands is a cast from CI.getType(), then
2749 // evaluating the shuffle in the casted destination's type will allow
2750 // us to eliminate at least one cast.
2751 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2752 Tmp->getOperand(0)->getType() == DestTy) ||
2753 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2754 Tmp->getOperand(0)->getType() == DestTy)) {
2755 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2756 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2757 // Return a new shuffle vector. Use the same element ID's, as we
2758 // know the vector types match #elts.
2759 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2760 }
2761 }
2762
2763 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2764 // as a byte/bit swap:
2765 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2766 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2767 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2768 Shuf->hasOneUse() && Shuf->isReverse()) {
2769 unsigned IntrinsicNum = 0;
2770 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2771 SrcTy->getScalarSizeInBits() == 8) {
2772 IntrinsicNum = Intrinsic::bswap;
2773 } else if (SrcTy->getScalarSizeInBits() == 1) {
2774 IntrinsicNum = Intrinsic::bitreverse;
2775 }
2776 if (IntrinsicNum != 0) {
2777 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2778 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2779 Function *BswapOrBitreverse =
2780 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2781 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2782 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2783 }
2784 }
2785 }
2786
2787 // Handle the A->B->A cast, and there is an intervening PHI node.
2788 if (PHINode *PN = dyn_cast<PHINode>(Src))
2789 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2790 return I;
2791
2792 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2793 return I;
2794
2796 return I;
2797
2799 return I;
2800
2801 return commonCastTransforms(CI);
2802}
2803
2805 return commonCastTransforms(CI);
2806}
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Type * shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat)
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Type * getMinimumFPType(Value *V, bool PreferBFloat)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1725
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:417
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:332
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:336
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:935
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:930
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a Trunc or BitCast cast instruction.
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:937
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2542
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2140
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2098
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:791
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:298
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
bool isBigEndian() const
Definition: DataLayout.h:239
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2101
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1654
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:311
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DominatorTree & DT
Definition: InstCombiner.h:74
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:447
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:342
unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:457
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:301
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static Type * getBFloatTy(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:201
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:191
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
This class represents zero extension of integer types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:613
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:966
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:869
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
Definition: PatternMatch.h:974
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:586
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:887
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:593
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:848
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
VScaleVal_match m_VScale()
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:692
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:343
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2711
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, unsigned Depth, const SimplifyQuery &SQ)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96