LLVM 19.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
54 break;
55 }
56 case Instruction::Trunc:
57 case Instruction::ZExt:
58 case Instruction::SExt:
59 // If the source type of the cast is the type we're trying for then we can
60 // just return the source. There's no need to insert it because it is not
61 // new.
62 if (I->getOperand(0)->getType() == Ty)
63 return I->getOperand(0);
64
65 // Otherwise, must be the same type of cast, so just reinsert a new one.
66 // This also handles the case of zext(trunc(x)) -> zext(x).
67 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
68 Opc == Instruction::SExt);
69 break;
70 case Instruction::Select: {
71 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
72 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
73 Res = SelectInst::Create(I->getOperand(0), True, False);
74 break;
75 }
76 case Instruction::PHI: {
77 PHINode *OPN = cast<PHINode>(I);
79 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
80 Value *V =
82 NPN->addIncoming(V, OPN->getIncomingBlock(i));
83 }
84 Res = NPN;
85 break;
86 }
87 case Instruction::FPToUI:
88 case Instruction::FPToSI:
89 Res = CastInst::Create(
90 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
91 break;
92 case Instruction::Call:
93 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
94 switch (II->getIntrinsicID()) {
95 default:
96 llvm_unreachable("Unsupported call!");
97 case Intrinsic::vscale: {
98 Function *Fn =
99 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
100 Res = CallInst::Create(Fn->getFunctionType(), Fn);
101 break;
102 }
103 }
104 }
105 break;
106 case Instruction::ShuffleVector: {
107 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
108 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
109 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
110 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
111 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
112 Res = new ShuffleVectorInst(Op0, Op1,
113 cast<ShuffleVectorInst>(I)->getShuffleMask());
114 break;
115 }
116 default:
117 // TODO: Can handle more cases here.
118 llvm_unreachable("Unreachable!");
119 }
120
121 Res->takeName(I);
122 return InsertNewInstWith(Res, I->getIterator());
123}
124
126InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
127 const CastInst *CI2) {
128 Type *SrcTy = CI1->getSrcTy();
129 Type *MidTy = CI1->getDestTy();
130 Type *DstTy = CI2->getDestTy();
131
132 Instruction::CastOps firstOp = CI1->getOpcode();
133 Instruction::CastOps secondOp = CI2->getOpcode();
134 Type *SrcIntPtrTy =
135 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
136 Type *MidIntPtrTy =
137 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
138 Type *DstIntPtrTy =
139 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
140 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
141 DstTy, SrcIntPtrTy, MidIntPtrTy,
142 DstIntPtrTy);
143
144 // We don't want to form an inttoptr or ptrtoint that converts to an integer
145 // type that differs from the pointer size.
146 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
147 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
148 Res = 0;
149
150 return Instruction::CastOps(Res);
151}
152
153/// Implement the transforms common to all CastInst visitors.
155 Value *Src = CI.getOperand(0);
156 Type *Ty = CI.getType();
157
158 if (auto *SrcC = dyn_cast<Constant>(Src))
159 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
160 return replaceInstUsesWith(CI, Res);
161
162 // Try to eliminate a cast of a cast.
163 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
164 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
165 // The first cast (CSrc) is eliminable so we need to fix up or replace
166 // the second cast (CI). CSrc will then have a good chance of being dead.
167 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
168 // Point debug users of the dying cast to the new one.
169 if (CSrc->hasOneUse())
170 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
171 return Res;
172 }
173 }
174
175 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
176 // We are casting a select. Try to fold the cast into the select if the
177 // select does not have a compare instruction with matching operand types
178 // or the select is likely better done in a narrow type.
179 // Creating a select with operands that are different sizes than its
180 // condition may inhibit other folds and lead to worse codegen.
181 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
182 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
183 (CI.getOpcode() == Instruction::Trunc &&
184 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
185
186 // If it's a bitcast involving vectors, make sure it has the same number
187 // of elements on both sides.
188 if (CI.getOpcode() != Instruction::BitCast ||
190 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
191 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
192 return NV;
193 }
194 }
195 }
196 }
197
198 // If we are casting a PHI, then fold the cast into the PHI.
199 if (auto *PN = dyn_cast<PHINode>(Src)) {
200 // Don't do this if it would create a PHI node with an illegal type from a
201 // legal type.
202 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
203 shouldChangeType(CI.getSrcTy(), CI.getType()))
204 if (Instruction *NV = foldOpIntoPhi(CI, PN))
205 return NV;
206 }
207
208 // Canonicalize a unary shuffle after the cast if neither operation changes
209 // the size or element size of the input vector.
210 // TODO: We could allow size-changing ops if that doesn't harm codegen.
211 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
212 Value *X;
213 ArrayRef<int> Mask;
214 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
215 // TODO: Allow scalable vectors?
216 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
217 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
218 if (SrcTy && DestTy &&
219 SrcTy->getNumElements() == DestTy->getNumElements() &&
220 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
221 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
222 return new ShuffleVectorInst(CastX, Mask);
223 }
224 }
225
226 return nullptr;
227}
228
229/// Constants and extensions/truncates from the destination type are always
230/// free to be evaluated in that type. This is a helper for canEvaluate*.
231static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
232 if (isa<Constant>(V))
233 return match(V, m_ImmConstant());
234
235 Value *X;
236 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
237 X->getType() == Ty)
238 return true;
239
240 return false;
241}
242
243/// Filter out values that we can not evaluate in the destination type for free.
244/// This is a helper for canEvaluate*.
245static bool canNotEvaluateInType(Value *V, Type *Ty) {
246 if (!isa<Instruction>(V))
247 return true;
248 // We don't extend or shrink something that has multiple uses -- doing so
249 // would require duplicating the instruction which isn't profitable.
250 if (!V->hasOneUse())
251 return true;
252
253 return false;
254}
255
256/// Return true if we can evaluate the specified expression tree as type Ty
257/// instead of its larger type, and arrive with the same value.
258/// This is used by code that tries to eliminate truncates.
259///
260/// Ty will always be a type smaller than V. We should return true if trunc(V)
261/// can be computed by computing V in the smaller type. If V is an instruction,
262/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
263/// makes sense if x and y can be efficiently truncated.
264///
265/// This function works on both vectors and scalars.
266///
268 Instruction *CxtI) {
269 if (canAlwaysEvaluateInType(V, Ty))
270 return true;
271 if (canNotEvaluateInType(V, Ty))
272 return false;
273
274 auto *I = cast<Instruction>(V);
275 Type *OrigTy = V->getType();
276 switch (I->getOpcode()) {
277 case Instruction::Add:
278 case Instruction::Sub:
279 case Instruction::Mul:
280 case Instruction::And:
281 case Instruction::Or:
282 case Instruction::Xor:
283 // These operators can all arbitrarily be extended or truncated.
284 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
285 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
286
287 case Instruction::UDiv:
288 case Instruction::URem: {
289 // UDiv and URem can be truncated if all the truncated bits are zero.
290 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
292 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
293 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
294 // Do not preserve the original context instruction. Simplifying div/rem
295 // based on later context may introduce a trap.
296 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, I) &&
297 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, I)) {
298 return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&
299 canEvaluateTruncated(I->getOperand(1), Ty, IC, I);
300 }
301 break;
302 }
303 case Instruction::Shl: {
304 // If we are truncating the result of this SHL, and if it's a shift of an
305 // inrange amount, we can always perform a SHL in a smaller type.
307 KnownBits AmtKnownBits =
308 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
309 if (AmtKnownBits.getMaxValue().ult(BitWidth))
310 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
311 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
312 break;
313 }
314 case Instruction::LShr: {
315 // If this is a truncate of a logical shr, we can truncate it to a smaller
316 // lshr iff we know that the bits we would otherwise be shifting in are
317 // already zeros.
318 // TODO: It is enough to check that the bits we would be shifting in are
319 // zero - use AmtKnownBits.getMaxValue().
320 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
322 KnownBits AmtKnownBits =
323 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
324 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
325 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
326 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
327 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
328 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
329 }
330 break;
331 }
332 case Instruction::AShr: {
333 // If this is a truncate of an arithmetic shr, we can truncate it to a
334 // smaller ashr iff we know that all the bits from the sign bit of the
335 // original type and the sign bit of the truncate type are similar.
336 // TODO: It is enough to check that the bits we would be shifting in are
337 // similar to sign bit of the truncate type.
338 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
340 KnownBits AmtKnownBits =
341 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
342 unsigned ShiftedBits = OrigBitWidth - BitWidth;
343 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
344 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
345 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
346 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
347 break;
348 }
349 case Instruction::Trunc:
350 // trunc(trunc(x)) -> trunc(x)
351 return true;
352 case Instruction::ZExt:
353 case Instruction::SExt:
354 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
355 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
356 return true;
357 case Instruction::Select: {
358 SelectInst *SI = cast<SelectInst>(I);
359 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
360 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
361 }
362 case Instruction::PHI: {
363 // We can change a phi if we can change all operands. Note that we never
364 // get into trouble with cyclic PHIs here because we only consider
365 // instructions with a single use.
366 PHINode *PN = cast<PHINode>(I);
367 for (Value *IncValue : PN->incoming_values())
368 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
369 return false;
370 return true;
371 }
372 case Instruction::FPToUI:
373 case Instruction::FPToSI: {
374 // If the integer type can hold the max FP value, it is safe to cast
375 // directly to that type. Otherwise, we may create poison via overflow
376 // that did not exist in the original code.
377 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
378 const fltSemantics &Semantics = InputTy->getFltSemantics();
379 uint32_t MinBitWidth =
381 I->getOpcode() == Instruction::FPToSI);
382 return Ty->getScalarSizeInBits() >= MinBitWidth;
383 }
384 case Instruction::ShuffleVector:
385 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
386 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
387 default:
388 // TODO: Can handle more cases here.
389 break;
390 }
391
392 return false;
393}
394
395/// Given a vector that is bitcast to an integer, optionally logically
396/// right-shifted, and truncated, convert it to an extractelement.
397/// Example (big endian):
398/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
399/// --->
400/// extractelement <4 x i32> %X, 1
402 InstCombinerImpl &IC) {
403 Value *TruncOp = Trunc.getOperand(0);
404 Type *DestType = Trunc.getType();
405 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
406 return nullptr;
407
408 Value *VecInput = nullptr;
409 ConstantInt *ShiftVal = nullptr;
410 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
411 m_LShr(m_BitCast(m_Value(VecInput)),
412 m_ConstantInt(ShiftVal)))) ||
413 !isa<VectorType>(VecInput->getType()))
414 return nullptr;
415
416 VectorType *VecType = cast<VectorType>(VecInput->getType());
417 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
418 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
419 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
420
421 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
422 return nullptr;
423
424 // If the element type of the vector doesn't match the result type,
425 // bitcast it to a vector type that we can extract from.
426 unsigned NumVecElts = VecWidth / DestWidth;
427 if (VecType->getElementType() != DestType) {
428 VecType = FixedVectorType::get(DestType, NumVecElts);
429 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
430 }
431
432 unsigned Elt = ShiftAmount / DestWidth;
433 if (IC.getDataLayout().isBigEndian())
434 Elt = NumVecElts - 1 - Elt;
435
436 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
437}
438
439/// Funnel/Rotate left/right may occur in a wider type than necessary because of
440/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
441Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
442 assert((isa<VectorType>(Trunc.getSrcTy()) ||
443 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
444 "Don't narrow to an illegal scalar type");
445
446 // Bail out on strange types. It is possible to handle some of these patterns
447 // even with non-power-of-2 sizes, but it is not a likely scenario.
448 Type *DestTy = Trunc.getType();
449 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
450 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
451 if (!isPowerOf2_32(NarrowWidth))
452 return nullptr;
453
454 // First, find an or'd pair of opposite shifts:
455 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
456 BinaryOperator *Or0, *Or1;
457 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
458 return nullptr;
459
460 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
461 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
462 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
463 Or0->getOpcode() == Or1->getOpcode())
464 return nullptr;
465
466 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
467 if (Or0->getOpcode() == BinaryOperator::LShr) {
468 std::swap(Or0, Or1);
469 std::swap(ShVal0, ShVal1);
470 std::swap(ShAmt0, ShAmt1);
471 }
472 assert(Or0->getOpcode() == BinaryOperator::Shl &&
473 Or1->getOpcode() == BinaryOperator::LShr &&
474 "Illegal or(shift,shift) pair");
475
476 // Match the shift amount operands for a funnel/rotate pattern. This always
477 // matches a subtraction on the R operand.
478 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
479 // The shift amounts may add up to the narrow bit width:
480 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
481 // If this is a funnel shift (different operands are shifted), then the
482 // shift amount can not over-shift (create poison) in the narrow type.
483 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
484 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
485 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
486 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
487 return L;
488
489 // The following patterns currently only work for rotation patterns.
490 // TODO: Add more general funnel-shift compatible patterns.
491 if (ShVal0 != ShVal1)
492 return nullptr;
493
494 // The shift amount may be masked with negation:
495 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
496 Value *X;
497 unsigned Mask = Width - 1;
498 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
500 return X;
501
502 // Same as above, but the shift amount may be extended after masking:
503 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
505 return X;
506
507 return nullptr;
508 };
509
510 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
511 bool IsFshl = true; // Sub on LSHR.
512 if (!ShAmt) {
513 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
514 IsFshl = false; // Sub on SHL.
515 }
516 if (!ShAmt)
517 return nullptr;
518
519 // The right-shifted value must have high zeros in the wide type (for example
520 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
521 // truncated, so those do not matter.
522 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
523 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
524 return nullptr;
525
526 // Adjust the width of ShAmt for narrowed funnel shift operation:
527 // - Zero-extend if ShAmt is narrower than the destination type.
528 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
529 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
530 // zext/trunc(ShAmt)).
531 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
532
533 Value *X, *Y;
534 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
535 if (ShVal0 != ShVal1)
536 Y = Builder.CreateTrunc(ShVal1, DestTy);
537 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
538 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
539 return CallInst::Create(F, {X, Y, NarrowShAmt});
540}
541
542/// Try to narrow the width of math or bitwise logic instructions by pulling a
543/// truncate ahead of binary operators.
544Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
545 Type *SrcTy = Trunc.getSrcTy();
546 Type *DestTy = Trunc.getType();
547 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
548 unsigned DestWidth = DestTy->getScalarSizeInBits();
549
550 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
551 return nullptr;
552
553 BinaryOperator *BinOp;
554 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
555 return nullptr;
556
557 Value *BinOp0 = BinOp->getOperand(0);
558 Value *BinOp1 = BinOp->getOperand(1);
559 switch (BinOp->getOpcode()) {
560 case Instruction::And:
561 case Instruction::Or:
562 case Instruction::Xor:
563 case Instruction::Add:
564 case Instruction::Sub:
565 case Instruction::Mul: {
566 Constant *C;
567 if (match(BinOp0, m_Constant(C))) {
568 // trunc (binop C, X) --> binop (trunc C', X)
569 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
570 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
571 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
572 }
573 if (match(BinOp1, m_Constant(C))) {
574 // trunc (binop X, C) --> binop (trunc X, C')
575 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
576 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
577 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
578 }
579 Value *X;
580 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
581 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
582 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
583 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
584 }
585 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
586 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
587 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
588 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
589 }
590 break;
591 }
592 case Instruction::LShr:
593 case Instruction::AShr: {
594 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
595 Value *A;
596 Constant *C;
597 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
598 unsigned MaxShiftAmt = SrcWidth - DestWidth;
599 // If the shift is small enough, all zero/sign bits created by the shift
600 // are removed by the trunc.
602 APInt(SrcWidth, MaxShiftAmt)))) {
603 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
604 bool IsExact = OldShift->isExact();
605 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
606 /*IsSigned*/ true, DL)) {
607 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
608 Value *Shift =
609 OldShift->getOpcode() == Instruction::AShr
610 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
611 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
612 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
613 }
614 }
615 }
616 break;
617 }
618 default: break;
619 }
620
621 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
622 return NarrowOr;
623
624 return nullptr;
625}
626
627/// Try to narrow the width of a splat shuffle. This could be generalized to any
628/// shuffle with a constant operand, but we limit the transform to avoid
629/// creating a shuffle type that targets may not be able to lower effectively.
631 InstCombiner::BuilderTy &Builder) {
632 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
633 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
634 all_equal(Shuf->getShuffleMask()) &&
635 Shuf->getType() == Shuf->getOperand(0)->getType()) {
636 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
637 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
638 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
639 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
640 }
641
642 return nullptr;
643}
644
645/// Try to narrow the width of an insert element. This could be generalized for
646/// any vector constant, but we limit the transform to insertion into undef to
647/// avoid potential backend problems from unsupported insertion widths. This
648/// could also be extended to handle the case of inserting a scalar constant
649/// into a vector variable.
651 InstCombiner::BuilderTy &Builder) {
652 Instruction::CastOps Opcode = Trunc.getOpcode();
653 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
654 "Unexpected instruction for shrinking");
655
656 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
657 if (!InsElt || !InsElt->hasOneUse())
658 return nullptr;
659
660 Type *DestTy = Trunc.getType();
661 Type *DestScalarTy = DestTy->getScalarType();
662 Value *VecOp = InsElt->getOperand(0);
663 Value *ScalarOp = InsElt->getOperand(1);
664 Value *Index = InsElt->getOperand(2);
665
666 if (match(VecOp, m_Undef())) {
667 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
668 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
669 UndefValue *NarrowUndef = UndefValue::get(DestTy);
670 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
671 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
672 }
673
674 return nullptr;
675}
676
678 if (Instruction *Result = commonCastTransforms(Trunc))
679 return Result;
680
681 Value *Src = Trunc.getOperand(0);
682 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
683 unsigned DestWidth = DestTy->getScalarSizeInBits();
684 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
685
686 // Attempt to truncate the entire input expression tree to the destination
687 // type. Only do this if the dest type is a simple type, don't convert the
688 // expression tree to something weird like i93 unless the source is also
689 // strange.
690 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
691 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
692
693 // If this cast is a truncate, evaluting in a different type always
694 // eliminates the cast, so it is always a win.
696 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
697 " to avoid cast: "
698 << Trunc << '\n');
699 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
700 assert(Res->getType() == DestTy);
701 return replaceInstUsesWith(Trunc, Res);
702 }
703
704 // For integer types, check if we can shorten the entire input expression to
705 // DestWidth * 2, which won't allow removing the truncate, but reducing the
706 // width may enable further optimizations, e.g. allowing for larger
707 // vectorization factors.
708 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
709 if (DestWidth * 2 < SrcWidth) {
710 auto *NewDestTy = DestITy->getExtendedType();
711 if (shouldChangeType(SrcTy, NewDestTy) &&
712 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
714 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
715 " to reduce the width of operand of"
716 << Trunc << '\n');
717 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
718 return new TruncInst(Res, DestTy);
719 }
720 }
721 }
722
723 // Test if the trunc is the user of a select which is part of a
724 // minimum or maximum operation. If so, don't do any more simplification.
725 // Even simplifying demanded bits can break the canonical form of a
726 // min/max.
727 Value *LHS, *RHS;
728 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
729 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
730 return nullptr;
731
732 // See if we can simplify any instructions used by the input whose sole
733 // purpose is to compute bits we don't care about.
735 return &Trunc;
736
737 if (DestWidth == 1) {
738 Value *Zero = Constant::getNullValue(SrcTy);
739
740 Value *X;
741 const APInt *C1;
742 Constant *C2;
743 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
744 m_ImmConstant(C2))))) {
745 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
746 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
747 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
748 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
749 }
750
751 Constant *C;
752 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
753 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
754 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
755 Value *MaskC = Builder.CreateShl(One, C);
756 Value *And = Builder.CreateAnd(X, MaskC);
757 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
758 }
760 m_Deferred(X))))) {
761 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
762 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
763 Value *MaskC = Builder.CreateShl(One, C);
764 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
765 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
766 }
767
768 {
769 const APInt *C;
770 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
771 // trunc (C << X) to i1 --> X == 0, where C is odd
772 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
773 }
774 }
775
776 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
777 Value *X, *Y;
778 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
779 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
780 }
781 }
782
783 Value *A, *B;
784 Constant *C;
785 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
786 unsigned AWidth = A->getType()->getScalarSizeInBits();
787 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
788 auto *OldSh = cast<Instruction>(Src);
789 bool IsExact = OldSh->isExact();
790
791 // If the shift is small enough, all zero bits created by the shift are
792 // removed by the trunc.
794 APInt(SrcWidth, MaxShiftAmt)))) {
795 auto GetNewShAmt = [&](unsigned Width) {
796 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
797 Constant *Cmp =
799 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
800 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
801 DL);
802 };
803
804 // trunc (lshr (sext A), C) --> ashr A, C
805 if (A->getType() == DestTy) {
806 Constant *ShAmt = GetNewShAmt(DestWidth);
807 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
808 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
809 : BinaryOperator::CreateAShr(A, ShAmt);
810 }
811 // The types are mismatched, so create a cast after shifting:
812 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
813 if (Src->hasOneUse()) {
814 Constant *ShAmt = GetNewShAmt(AWidth);
815 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
816 return CastInst::CreateIntegerCast(Shift, DestTy, true);
817 }
818 }
819 // TODO: Mask high bits with 'and'.
820 }
821
822 if (Instruction *I = narrowBinOp(Trunc))
823 return I;
824
826 return I;
827
828 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
829 return I;
830
831 if (Src->hasOneUse() &&
832 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
833 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
834 // dest type is native and cst < dest size.
835 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
836 !match(A, m_Shr(m_Value(), m_Constant()))) {
837 // Skip shifts of shift by constants. It undoes a combine in
838 // FoldShiftByConstant and is the extend in reg pattern.
839 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
840 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
841 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
842 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
843 ConstantExpr::getTrunc(C, DestTy));
844 }
845 }
846 }
847
848 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
849 return I;
850
851 // Whenever an element is extracted from a vector, and then truncated,
852 // canonicalize by converting it to a bitcast followed by an
853 // extractelement.
854 //
855 // Example (little endian):
856 // trunc (extractelement <4 x i64> %X, 0) to i32
857 // --->
858 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
859 Value *VecOp;
860 ConstantInt *Cst;
861 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
862 auto *VecOpTy = cast<VectorType>(VecOp->getType());
863 auto VecElts = VecOpTy->getElementCount();
864
865 // A badly fit destination size would result in an invalid cast.
866 if (SrcWidth % DestWidth == 0) {
867 uint64_t TruncRatio = SrcWidth / DestWidth;
868 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
869 uint64_t VecOpIdx = Cst->getZExtValue();
870 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
871 : VecOpIdx * TruncRatio;
872 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
873 "overflow 32-bits");
874
875 auto *BitCastTo =
876 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
877 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
878 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
879 }
880 }
881
882 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
883 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
884 m_Value(B))))) {
885 unsigned AWidth = A->getType()->getScalarSizeInBits();
886 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
887 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
888 Value *NarrowCtlz =
889 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
890 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
891 }
892 }
893
894 if (match(Src, m_VScale())) {
895 if (Trunc.getFunction() &&
896 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
897 Attribute Attr =
898 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
899 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
900 if (Log2_32(*MaxVScale) < DestWidth) {
901 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
902 return replaceInstUsesWith(Trunc, VScale);
903 }
904 }
905 }
906 }
907
908 bool Changed = false;
909 if (!Trunc.hasNoSignedWrap() &&
910 ComputeMaxSignificantBits(Src, /*Depth=*/0, &Trunc) <= DestWidth) {
911 Trunc.setHasNoSignedWrap(true);
912 Changed = true;
913 }
914 if (!Trunc.hasNoUnsignedWrap() &&
915 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
916 /*Depth=*/0, &Trunc)) {
917 Trunc.setHasNoUnsignedWrap(true);
918 Changed = true;
919 }
920
921 return Changed ? &Trunc : nullptr;
922}
923
924Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
925 ZExtInst &Zext) {
926 // If we are just checking for a icmp eq of a single bit and zext'ing it
927 // to an integer, then shift the bit to the appropriate place and then
928 // cast to integer to avoid the comparison.
929
930 // FIXME: This set of transforms does not check for extra uses and/or creates
931 // an extra instruction (an optional final cast is not included
932 // in the transform comments). We may also want to favor icmp over
933 // shifts in cases of equal instructions because icmp has better
934 // analysis in general (invert the transform).
935
936 const APInt *Op1CV;
937 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
938
939 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
940 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
941 Value *In = Cmp->getOperand(0);
942 Value *Sh = ConstantInt::get(In->getType(),
943 In->getType()->getScalarSizeInBits() - 1);
944 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
945 if (In->getType() != Zext.getType())
946 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
947
948 return replaceInstUsesWith(Zext, In);
949 }
950
951 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
952 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
953 // zext (X != 0) to i32 --> X iff X has only the low bit set.
954 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
955
956 if (Op1CV->isZero() && Cmp->isEquality()) {
957 // Exactly 1 possible 1? But not the high-bit because that is
958 // canonicalized to this form.
959 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
960 APInt KnownZeroMask(~Known.Zero);
961 uint32_t ShAmt = KnownZeroMask.logBase2();
962 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
963 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
964 if (IsExpectShAmt &&
965 (Cmp->getOperand(0)->getType() == Zext.getType() ||
966 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
967 Value *In = Cmp->getOperand(0);
968 if (ShAmt) {
969 // Perform a logical shr by shiftamt.
970 // Insert the shift to put the result in the low bit.
971 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
972 In->getName() + ".lobit");
973 }
974
975 // Toggle the low bit for "X == 0".
976 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
977 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
978
979 if (Zext.getType() == In->getType())
980 return replaceInstUsesWith(Zext, In);
981
982 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
983 return replaceInstUsesWith(Zext, IntCast);
984 }
985 }
986 }
987
988 if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
989 // Test if a bit is clear/set using a shifted-one mask:
990 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
991 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
992 Value *X, *ShAmt;
993 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
994 match(Cmp->getOperand(0),
995 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
996 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
997 X = Builder.CreateNot(X);
998 Value *Lshr = Builder.CreateLShr(X, ShAmt);
999 Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1000 return replaceInstUsesWith(Zext, And1);
1001 }
1002 }
1003
1004 return nullptr;
1005}
1006
1007/// Determine if the specified value can be computed in the specified wider type
1008/// and produce the same low bits. If not, return false.
1009///
1010/// If this function returns true, it can also return a non-zero number of bits
1011/// (in BitsToClear) which indicates that the value it computes is correct for
1012/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1013/// out. For example, to promote something like:
1014///
1015/// %B = trunc i64 %A to i32
1016/// %C = lshr i32 %B, 8
1017/// %E = zext i32 %C to i64
1018///
1019/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1020/// set to 8 to indicate that the promoted value needs to have bits 24-31
1021/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1022/// clear the top bits anyway, doing this has no extra cost.
1023///
1024/// This function works on both vectors and scalars.
1025static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1026 InstCombinerImpl &IC, Instruction *CxtI) {
1027 BitsToClear = 0;
1028 if (canAlwaysEvaluateInType(V, Ty))
1029 return true;
1030 if (canNotEvaluateInType(V, Ty))
1031 return false;
1032
1033 auto *I = cast<Instruction>(V);
1034 unsigned Tmp;
1035 switch (I->getOpcode()) {
1036 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1037 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1038 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1039 return true;
1040 case Instruction::And:
1041 case Instruction::Or:
1042 case Instruction::Xor:
1043 case Instruction::Add:
1044 case Instruction::Sub:
1045 case Instruction::Mul:
1046 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1047 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1048 return false;
1049 // These can all be promoted if neither operand has 'bits to clear'.
1050 if (BitsToClear == 0 && Tmp == 0)
1051 return true;
1052
1053 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1054 // other side, BitsToClear is ok.
1055 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1056 // We use MaskedValueIsZero here for generality, but the case we care
1057 // about the most is constant RHS.
1058 unsigned VSize = V->getType()->getScalarSizeInBits();
1059 if (IC.MaskedValueIsZero(I->getOperand(1),
1060 APInt::getHighBitsSet(VSize, BitsToClear),
1061 0, CxtI)) {
1062 // If this is an And instruction and all of the BitsToClear are
1063 // known to be zero we can reset BitsToClear.
1064 if (I->getOpcode() == Instruction::And)
1065 BitsToClear = 0;
1066 return true;
1067 }
1068 }
1069
1070 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1071 return false;
1072
1073 case Instruction::Shl: {
1074 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1075 // upper bits we can reduce BitsToClear by the shift amount.
1076 const APInt *Amt;
1077 if (match(I->getOperand(1), m_APInt(Amt))) {
1078 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1079 return false;
1080 uint64_t ShiftAmt = Amt->getZExtValue();
1081 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1082 return true;
1083 }
1084 return false;
1085 }
1086 case Instruction::LShr: {
1087 // We can promote lshr(x, cst) if we can promote x. This requires the
1088 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1089 const APInt *Amt;
1090 if (match(I->getOperand(1), m_APInt(Amt))) {
1091 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1092 return false;
1093 BitsToClear += Amt->getZExtValue();
1094 if (BitsToClear > V->getType()->getScalarSizeInBits())
1095 BitsToClear = V->getType()->getScalarSizeInBits();
1096 return true;
1097 }
1098 // Cannot promote variable LSHR.
1099 return false;
1100 }
1101 case Instruction::Select:
1102 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1103 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1104 // TODO: If important, we could handle the case when the BitsToClear are
1105 // known zero in the disagreeing side.
1106 Tmp != BitsToClear)
1107 return false;
1108 return true;
1109
1110 case Instruction::PHI: {
1111 // We can change a phi if we can change all operands. Note that we never
1112 // get into trouble with cyclic PHIs here because we only consider
1113 // instructions with a single use.
1114 PHINode *PN = cast<PHINode>(I);
1115 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1116 return false;
1117 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1118 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1119 // TODO: If important, we could handle the case when the BitsToClear
1120 // are known zero in the disagreeing input.
1121 Tmp != BitsToClear)
1122 return false;
1123 return true;
1124 }
1125 case Instruction::Call:
1126 // llvm.vscale() can always be executed in larger type, because the
1127 // value is automatically zero-extended.
1128 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1129 if (II->getIntrinsicID() == Intrinsic::vscale)
1130 return true;
1131 return false;
1132 default:
1133 // TODO: Can handle more cases here.
1134 return false;
1135 }
1136}
1137
1139 // If this zero extend is only used by a truncate, let the truncate be
1140 // eliminated before we try to optimize this zext.
1141 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1142 !isa<Constant>(Zext.getOperand(0)))
1143 return nullptr;
1144
1145 // If one of the common conversion will work, do it.
1146 if (Instruction *Result = commonCastTransforms(Zext))
1147 return Result;
1148
1149 Value *Src = Zext.getOperand(0);
1150 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1151
1152 // zext nneg bool x -> 0
1153 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1155
1156 // Try to extend the entire expression tree to the wide destination type.
1157 unsigned BitsToClear;
1158 if (shouldChangeType(SrcTy, DestTy) &&
1159 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1160 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1161 "Can't clear more bits than in SrcTy");
1162
1163 // Okay, we can transform this! Insert the new expression now.
1164 LLVM_DEBUG(
1165 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1166 " to avoid zero extend: "
1167 << Zext << '\n');
1168 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1169 assert(Res->getType() == DestTy);
1170
1171 // Preserve debug values referring to Src if the zext is its last use.
1172 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1173 if (SrcOp->hasOneUse())
1174 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1175
1176 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1177 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1178
1179 // If the high bits are already filled with zeros, just replace this
1180 // cast with the result.
1181 if (MaskedValueIsZero(Res,
1182 APInt::getHighBitsSet(DestBitSize,
1183 DestBitSize - SrcBitsKept),
1184 0, &Zext))
1185 return replaceInstUsesWith(Zext, Res);
1186
1187 // We need to emit an AND to clear the high bits.
1188 Constant *C = ConstantInt::get(Res->getType(),
1189 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1190 return BinaryOperator::CreateAnd(Res, C);
1191 }
1192
1193 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1194 // types and if the sizes are just right we can convert this into a logical
1195 // 'and' which will be much cheaper than the pair of casts.
1196 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1197 // TODO: Subsume this into EvaluateInDifferentType.
1198
1199 // Get the sizes of the types involved. We know that the intermediate type
1200 // will be smaller than A or C, but don't know the relation between A and C.
1201 Value *A = CSrc->getOperand(0);
1202 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1203 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1204 unsigned DstSize = DestTy->getScalarSizeInBits();
1205 // If we're actually extending zero bits, then if
1206 // SrcSize < DstSize: zext(a & mask)
1207 // SrcSize == DstSize: a & mask
1208 // SrcSize > DstSize: trunc(a) & mask
1209 if (SrcSize < DstSize) {
1210 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1211 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1212 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1213 return new ZExtInst(And, DestTy);
1214 }
1215
1216 if (SrcSize == DstSize) {
1217 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1218 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1219 AndValue));
1220 }
1221 if (SrcSize > DstSize) {
1222 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1223 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1224 return BinaryOperator::CreateAnd(Trunc,
1225 ConstantInt::get(Trunc->getType(),
1226 AndValue));
1227 }
1228 }
1229
1230 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1231 return transformZExtICmp(Cmp, Zext);
1232
1233 // zext(trunc(X) & C) -> (X & zext(C)).
1234 Constant *C;
1235 Value *X;
1236 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1237 X->getType() == DestTy)
1238 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1239
1240 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1241 Value *And;
1242 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1244 X->getType() == DestTy) {
1245 Value *ZC = Builder.CreateZExt(C, DestTy);
1246 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1247 }
1248
1249 // If we are truncating, masking, and then zexting back to the original type,
1250 // that's just a mask. This is not handled by canEvaluateZextd if the
1251 // intermediate values have extra uses. This could be generalized further for
1252 // a non-constant mask operand.
1253 // zext (and (trunc X), C) --> and X, (zext C)
1254 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1255 X->getType() == DestTy) {
1256 Value *ZextC = Builder.CreateZExt(C, DestTy);
1257 return BinaryOperator::CreateAnd(X, ZextC);
1258 }
1259
1260 if (match(Src, m_VScale())) {
1261 if (Zext.getFunction() &&
1262 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1263 Attribute Attr =
1264 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1265 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1266 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1267 if (Log2_32(*MaxVScale) < TypeWidth) {
1268 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1269 return replaceInstUsesWith(Zext, VScale);
1270 }
1271 }
1272 }
1273 }
1274
1275 if (!Zext.hasNonNeg()) {
1276 // If this zero extend is only used by a shift, add nneg flag.
1277 if (Zext.hasOneUse() &&
1278 SrcTy->getScalarSizeInBits() >
1279 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1280 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1281 Zext.setNonNeg();
1282 return &Zext;
1283 }
1284
1285 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1286 Zext.setNonNeg();
1287 return &Zext;
1288 }
1289 }
1290
1291 return nullptr;
1292}
1293
1294/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1295Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1296 SExtInst &Sext) {
1297 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1298 ICmpInst::Predicate Pred = Cmp->getPredicate();
1299
1300 // Don't bother if Op1 isn't of vector or integer type.
1301 if (!Op1->getType()->isIntOrIntVectorTy())
1302 return nullptr;
1303
1304 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1305 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1306 Value *Sh = ConstantInt::get(Op0->getType(),
1307 Op0->getType()->getScalarSizeInBits() - 1);
1308 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1309 if (In->getType() != Sext.getType())
1310 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1311
1312 return replaceInstUsesWith(Sext, In);
1313 }
1314
1315 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1316 // If we know that only one bit of the LHS of the icmp can be set and we
1317 // have an equality comparison with zero or a power of 2, we can transform
1318 // the icmp and sext into bitwise/integer operations.
1319 if (Cmp->hasOneUse() &&
1320 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1321 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1322
1323 APInt KnownZeroMask(~Known.Zero);
1324 if (KnownZeroMask.isPowerOf2()) {
1325 Value *In = Cmp->getOperand(0);
1326
1327 // If the icmp tests for a known zero bit we can constant fold it.
1328 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1329 Value *V = Pred == ICmpInst::ICMP_NE ?
1331 ConstantInt::getNullValue(Sext.getType());
1332 return replaceInstUsesWith(Sext, V);
1333 }
1334
1335 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1336 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1337 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1338 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1339 // Perform a right shift to place the desired bit in the LSB.
1340 if (ShiftAmt)
1341 In = Builder.CreateLShr(In,
1342 ConstantInt::get(In->getType(), ShiftAmt));
1343
1344 // At this point "In" is either 1 or 0. Subtract 1 to turn
1345 // {1, 0} -> {0, -1}.
1346 In = Builder.CreateAdd(In,
1347 ConstantInt::getAllOnesValue(In->getType()),
1348 "sext");
1349 } else {
1350 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1351 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1352 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1353 // Perform a left shift to place the desired bit in the MSB.
1354 if (ShiftAmt)
1355 In = Builder.CreateShl(In,
1356 ConstantInt::get(In->getType(), ShiftAmt));
1357
1358 // Distribute the bit over the whole bit width.
1359 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1360 KnownZeroMask.getBitWidth() - 1), "sext");
1361 }
1362
1363 if (Sext.getType() == In->getType())
1364 return replaceInstUsesWith(Sext, In);
1365 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1366 }
1367 }
1368 }
1369
1370 return nullptr;
1371}
1372
1373/// Return true if we can take the specified value and return it as type Ty
1374/// without inserting any new casts and without changing the value of the common
1375/// low bits. This is used by code that tries to promote integer operations to
1376/// a wider types will allow us to eliminate the extension.
1377///
1378/// This function works on both vectors and scalars.
1379///
1380static bool canEvaluateSExtd(Value *V, Type *Ty) {
1381 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1382 "Can't sign extend type to a smaller type");
1383 if (canAlwaysEvaluateInType(V, Ty))
1384 return true;
1385 if (canNotEvaluateInType(V, Ty))
1386 return false;
1387
1388 auto *I = cast<Instruction>(V);
1389 switch (I->getOpcode()) {
1390 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1391 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1392 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1393 return true;
1394 case Instruction::And:
1395 case Instruction::Or:
1396 case Instruction::Xor:
1397 case Instruction::Add:
1398 case Instruction::Sub:
1399 case Instruction::Mul:
1400 // These operators can all arbitrarily be extended if their inputs can.
1401 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1402 canEvaluateSExtd(I->getOperand(1), Ty);
1403
1404 //case Instruction::Shl: TODO
1405 //case Instruction::LShr: TODO
1406
1407 case Instruction::Select:
1408 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1409 canEvaluateSExtd(I->getOperand(2), Ty);
1410
1411 case Instruction::PHI: {
1412 // We can change a phi if we can change all operands. Note that we never
1413 // get into trouble with cyclic PHIs here because we only consider
1414 // instructions with a single use.
1415 PHINode *PN = cast<PHINode>(I);
1416 for (Value *IncValue : PN->incoming_values())
1417 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1418 return true;
1419 }
1420 default:
1421 // TODO: Can handle more cases here.
1422 break;
1423 }
1424
1425 return false;
1426}
1427
1429 // If this sign extend is only used by a truncate, let the truncate be
1430 // eliminated before we try to optimize this sext.
1431 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1432 return nullptr;
1433
1434 if (Instruction *I = commonCastTransforms(Sext))
1435 return I;
1436
1437 Value *Src = Sext.getOperand(0);
1438 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1439 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1440 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1441
1442 // If the value being extended is zero or positive, use a zext instead.
1443 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1444 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1445 CI->setNonNeg(true);
1446 return CI;
1447 }
1448
1449 // Try to extend the entire expression tree to the wide destination type.
1450 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1451 // Okay, we can transform this! Insert the new expression now.
1452 LLVM_DEBUG(
1453 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1454 " to avoid sign extend: "
1455 << Sext << '\n');
1456 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1457 assert(Res->getType() == DestTy);
1458
1459 // If the high bits are already filled with sign bit, just replace this
1460 // cast with the result.
1461 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1462 return replaceInstUsesWith(Sext, Res);
1463
1464 // We need to emit a shl + ashr to do the sign extend.
1465 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1466 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1467 ShAmt);
1468 }
1469
1470 Value *X;
1471 if (match(Src, m_Trunc(m_Value(X)))) {
1472 // If the input has more sign bits than bits truncated, then convert
1473 // directly to final type.
1474 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1475 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1476 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1477
1478 // If input is a trunc from the destination type, then convert into shifts.
1479 if (Src->hasOneUse() && X->getType() == DestTy) {
1480 // sext (trunc X) --> ashr (shl X, C), C
1481 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1482 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1483 }
1484
1485 // If we are replacing shifted-in high zero bits with sign bits, convert
1486 // the logic shift to arithmetic shift and eliminate the cast to
1487 // intermediate type:
1488 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1489 Value *Y;
1490 if (Src->hasOneUse() &&
1492 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1493 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1494 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1495 }
1496 }
1497
1498 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1499 return transformSExtICmp(Cmp, Sext);
1500
1501 // If the input is a shl/ashr pair of a same constant, then this is a sign
1502 // extension from a smaller value. If we could trust arbitrary bitwidth
1503 // integers, we could turn this into a truncate to the smaller bit and then
1504 // use a sext for the whole extension. Since we don't, look deeper and check
1505 // for a truncate. If the source and dest are the same type, eliminate the
1506 // trunc and extend and just do shifts. For example, turn:
1507 // %a = trunc i32 %i to i8
1508 // %b = shl i8 %a, C
1509 // %c = ashr i8 %b, C
1510 // %d = sext i8 %c to i32
1511 // into:
1512 // %a = shl i32 %i, 32-(8-C)
1513 // %d = ashr i32 %a, 32-(8-C)
1514 Value *A = nullptr;
1515 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1516 Constant *BA = nullptr, *CA = nullptr;
1517 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1518 m_ImmConstant(CA))) &&
1519 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1520 Constant *WideCurrShAmt =
1521 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1522 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1523 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1524 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1525 Constant *NewShAmt = ConstantExpr::getSub(
1526 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1527 NumLowbitsLeft);
1528 NewShAmt =
1530 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1531 return BinaryOperator::CreateAShr(A, NewShAmt);
1532 }
1533
1534 // Splatting a bit of constant-index across a value:
1535 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1536 // If the dest type is different, use a cast (adjust use check).
1537 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1538 m_SpecificInt(SrcBitSize - 1))))) {
1539 Type *XTy = X->getType();
1540 unsigned XBitSize = XTy->getScalarSizeInBits();
1541 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1542 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1543 if (XTy == DestTy)
1544 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1545 AshrAmtC);
1546 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1547 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1548 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1549 }
1550 }
1551
1552 if (match(Src, m_VScale())) {
1553 if (Sext.getFunction() &&
1554 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1555 Attribute Attr =
1556 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1557 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1558 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1559 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1560 return replaceInstUsesWith(Sext, VScale);
1561 }
1562 }
1563 }
1564 }
1565
1566 return nullptr;
1567}
1568
1569/// Return a Constant* for the specified floating-point constant if it fits
1570/// in the specified FP type without changing its value.
1571static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1572 bool losesInfo;
1573 APFloat F = CFP->getValueAPF();
1574 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1575 return !losesInfo;
1576}
1577
1578static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1579 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1580 return nullptr; // No constant folding of this.
1581 // See if the value can be truncated to bfloat and then reextended.
1582 if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
1583 return Type::getBFloatTy(CFP->getContext());
1584 // See if the value can be truncated to half and then reextended.
1585 if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
1586 return Type::getHalfTy(CFP->getContext());
1587 // See if the value can be truncated to float and then reextended.
1589 return Type::getFloatTy(CFP->getContext());
1590 if (CFP->getType()->isDoubleTy())
1591 return nullptr; // Won't shrink.
1593 return Type::getDoubleTy(CFP->getContext());
1594 // Don't try to shrink to various long double types.
1595 return nullptr;
1596}
1597
1598// Determine if this is a vector of ConstantFPs and if so, return the minimal
1599// type we can safely truncate all elements to.
1600static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1601 auto *CV = dyn_cast<Constant>(V);
1602 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1603 if (!CV || !CVVTy)
1604 return nullptr;
1605
1606 Type *MinType = nullptr;
1607
1608 unsigned NumElts = CVVTy->getNumElements();
1609
1610 // For fixed-width vectors we find the minimal type by looking
1611 // through the constant values of the vector.
1612 for (unsigned i = 0; i != NumElts; ++i) {
1613 if (isa<UndefValue>(CV->getAggregateElement(i)))
1614 continue;
1615
1616 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1617 if (!CFP)
1618 return nullptr;
1619
1620 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1621 if (!T)
1622 return nullptr;
1623
1624 // If we haven't found a type yet or this type has a larger mantissa than
1625 // our previous type, this is our new minimal type.
1626 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1627 MinType = T;
1628 }
1629
1630 // Make a vector type from the minimal type.
1631 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1632}
1633
1634/// Find the minimum FP type we can safely truncate to.
1635static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1636 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1637 return FPExt->getOperand(0)->getType();
1638
1639 // If this value is a constant, return the constant in the smallest FP type
1640 // that can accurately represent it. This allows us to turn
1641 // (float)((double)X+2.0) into x+2.0f.
1642 if (auto *CFP = dyn_cast<ConstantFP>(V))
1643 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1644 return T;
1645
1646 // We can only correctly find a minimum type for a scalable vector when it is
1647 // a splat. For splats of constant values the fpext is wrapped up as a
1648 // ConstantExpr.
1649 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1650 if (FPCExt->getOpcode() == Instruction::FPExt)
1651 return FPCExt->getOperand(0)->getType();
1652
1653 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1654 // vectors
1655 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1656 return T;
1657
1658 return V->getType();
1659}
1660
1661/// Return true if the cast from integer to FP can be proven to be exact for all
1662/// possible inputs (the conversion does not lose any precision).
1664 CastInst::CastOps Opcode = I.getOpcode();
1665 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1666 "Unexpected cast");
1667 Value *Src = I.getOperand(0);
1668 Type *SrcTy = Src->getType();
1669 Type *FPTy = I.getType();
1670 bool IsSigned = Opcode == Instruction::SIToFP;
1671 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1672
1673 // Easy case - if the source integer type has less bits than the FP mantissa,
1674 // then the cast must be exact.
1675 int DestNumSigBits = FPTy->getFPMantissaWidth();
1676 if (SrcSize <= DestNumSigBits)
1677 return true;
1678
1679 // Cast from FP to integer and back to FP is independent of the intermediate
1680 // integer width because of poison on overflow.
1681 Value *F;
1682 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1683 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1684 // potential rounding of negative FP input values.
1685 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1686 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1687 SrcNumSigBits++;
1688
1689 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1690 // significant bits than the destination (and make sure neither type is
1691 // weird -- ppc_fp128).
1692 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1693 SrcNumSigBits <= DestNumSigBits)
1694 return true;
1695 }
1696
1697 // TODO:
1698 // Try harder to find if the source integer type has less significant bits.
1699 // For example, compute number of sign bits.
1700 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1701 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1702 SrcKnown.countMinLeadingZeros() -
1703 SrcKnown.countMinTrailingZeros();
1704 if (SigBits <= DestNumSigBits)
1705 return true;
1706
1707 return false;
1708}
1709
1712 return I;
1713
1714 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1715 // simplify this expression to avoid one or more of the trunc/extend
1716 // operations if we can do so without changing the numerical results.
1717 //
1718 // The exact manner in which the widths of the operands interact to limit
1719 // what we can and cannot do safely varies from operation to operation, and
1720 // is explained below in the various case statements.
1721 Type *Ty = FPT.getType();
1722 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1723 if (BO && BO->hasOneUse()) {
1724 Type *LHSMinType =
1725 getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
1726 Type *RHSMinType =
1727 getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
1728 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1729 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1730 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1731 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1732 unsigned DstWidth = Ty->getFPMantissaWidth();
1733 switch (BO->getOpcode()) {
1734 default: break;
1735 case Instruction::FAdd:
1736 case Instruction::FSub:
1737 // For addition and subtraction, the infinitely precise result can
1738 // essentially be arbitrarily wide; proving that double rounding
1739 // will not occur because the result of OpI is exact (as we will for
1740 // FMul, for example) is hopeless. However, we *can* nonetheless
1741 // frequently know that double rounding cannot occur (or that it is
1742 // innocuous) by taking advantage of the specific structure of
1743 // infinitely-precise results that admit double rounding.
1744 //
1745 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1746 // to represent both sources, we can guarantee that the double
1747 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1748 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1749 // for proof of this fact).
1750 //
1751 // Note: Figueroa does not consider the case where DstFormat !=
1752 // SrcFormat. It's possible (likely even!) that this analysis
1753 // could be tightened for those cases, but they are rare (the main
1754 // case of interest here is (float)((double)float + float)).
1755 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1756 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1757 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1758 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1759 RI->copyFastMathFlags(BO);
1760 return RI;
1761 }
1762 break;
1763 case Instruction::FMul:
1764 // For multiplication, the infinitely precise result has at most
1765 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1766 // that such a value can be exactly represented, then no double
1767 // rounding can possibly occur; we can safely perform the operation
1768 // in the destination format if it can represent both sources.
1769 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1770 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1771 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1773 }
1774 break;
1775 case Instruction::FDiv:
1776 // For division, we use again use the bound from Figueroa's
1777 // dissertation. I am entirely certain that this bound can be
1778 // tightened in the unbalanced operand case by an analysis based on
1779 // the diophantine rational approximation bound, but the well-known
1780 // condition used here is a good conservative first pass.
1781 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1782 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1783 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1784 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1786 }
1787 break;
1788 case Instruction::FRem: {
1789 // Remainder is straightforward. Remainder is always exact, so the
1790 // type of OpI doesn't enter into things at all. We simply evaluate
1791 // in whichever source type is larger, then convert to the
1792 // destination type.
1793 if (SrcWidth == OpWidth)
1794 break;
1795 Value *LHS, *RHS;
1796 if (LHSWidth == SrcWidth) {
1797 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1798 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1799 } else {
1800 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1801 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1802 }
1803
1804 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1805 return CastInst::CreateFPCast(ExactResult, Ty);
1806 }
1807 }
1808 }
1809
1810 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1811 Value *X;
1812 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1813 if (Op && Op->hasOneUse()) {
1814 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1816 if (isa<FPMathOperator>(Op))
1817 Builder.setFastMathFlags(Op->getFastMathFlags());
1818
1819 if (match(Op, m_FNeg(m_Value(X)))) {
1820 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1821
1822 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1823 }
1824
1825 // If we are truncating a select that has an extended operand, we can
1826 // narrow the other operand and do the select as a narrow op.
1827 Value *Cond, *X, *Y;
1829 X->getType() == Ty) {
1830 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1831 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1832 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1833 return replaceInstUsesWith(FPT, Sel);
1834 }
1836 X->getType() == Ty) {
1837 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1838 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1839 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1840 return replaceInstUsesWith(FPT, Sel);
1841 }
1842 }
1843
1844 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1845 switch (II->getIntrinsicID()) {
1846 default: break;
1847 case Intrinsic::ceil:
1848 case Intrinsic::fabs:
1849 case Intrinsic::floor:
1850 case Intrinsic::nearbyint:
1851 case Intrinsic::rint:
1852 case Intrinsic::round:
1853 case Intrinsic::roundeven:
1854 case Intrinsic::trunc: {
1855 Value *Src = II->getArgOperand(0);
1856 if (!Src->hasOneUse())
1857 break;
1858
1859 // Except for fabs, this transformation requires the input of the unary FP
1860 // operation to be itself an fpext from the type to which we're
1861 // truncating.
1862 if (II->getIntrinsicID() != Intrinsic::fabs) {
1863 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1864 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1865 break;
1866 }
1867
1868 // Do unary FP operation on smaller type.
1869 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1870 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1872 II->getIntrinsicID(), Ty);
1874 II->getOperandBundlesAsDefs(OpBundles);
1875 CallInst *NewCI =
1876 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1877 NewCI->copyFastMathFlags(II);
1878 return NewCI;
1879 }
1880 }
1881 }
1882
1883 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1884 return I;
1885
1886 Value *Src = FPT.getOperand(0);
1887 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1888 auto *FPCast = cast<CastInst>(Src);
1889 if (isKnownExactCastIntToFP(*FPCast, *this))
1890 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1891 }
1892
1893 return nullptr;
1894}
1895
1897 // If the source operand is a cast from integer to FP and known exact, then
1898 // cast the integer operand directly to the destination type.
1899 Type *Ty = FPExt.getType();
1900 Value *Src = FPExt.getOperand(0);
1901 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1902 auto *FPCast = cast<CastInst>(Src);
1903 if (isKnownExactCastIntToFP(*FPCast, *this))
1904 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1905 }
1906
1907 return commonCastTransforms(FPExt);
1908}
1909
1910/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1911/// This is safe if the intermediate type has enough bits in its mantissa to
1912/// accurately represent all values of X. For example, this won't work with
1913/// i64 -> float -> i64.
1915 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1916 return nullptr;
1917
1918 auto *OpI = cast<CastInst>(FI.getOperand(0));
1919 Value *X = OpI->getOperand(0);
1920 Type *XType = X->getType();
1921 Type *DestType = FI.getType();
1922 bool IsOutputSigned = isa<FPToSIInst>(FI);
1923
1924 // Since we can assume the conversion won't overflow, our decision as to
1925 // whether the input will fit in the float should depend on the minimum
1926 // of the input range and output range.
1927
1928 // This means this is also safe for a signed input and unsigned output, since
1929 // a negative input would lead to undefined behavior.
1930 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1931 // The first cast may not round exactly based on the source integer width
1932 // and FP width, but the overflow UB rules can still allow this to fold.
1933 // If the destination type is narrow, that means the intermediate FP value
1934 // must be large enough to hold the source value exactly.
1935 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1936 int OutputSize = (int)DestType->getScalarSizeInBits();
1937 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1938 return nullptr;
1939 }
1940
1941 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1942 bool IsInputSigned = isa<SIToFPInst>(OpI);
1943 if (IsInputSigned && IsOutputSigned)
1944 return new SExtInst(X, DestType);
1945 return new ZExtInst(X, DestType);
1946 }
1947 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1948 return new TruncInst(X, DestType);
1949
1950 assert(XType == DestType && "Unexpected types for int to FP to int casts");
1951 return replaceInstUsesWith(FI, X);
1952}
1953
1955 // fpto{u/s}i non-norm --> 0
1956 FPClassTest Mask =
1957 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
1958 KnownFPClass FPClass =
1959 computeKnownFPClass(FI.getOperand(0), Mask, /*Depth=*/0,
1961 if (FPClass.isKnownNever(Mask))
1963
1964 return nullptr;
1965}
1966
1968 if (Instruction *I = foldItoFPtoI(FI))
1969 return I;
1970
1971 if (Instruction *I = foldFPtoI(FI, *this))
1972 return I;
1973
1974 return commonCastTransforms(FI);
1975}
1976
1978 if (Instruction *I = foldItoFPtoI(FI))
1979 return I;
1980
1981 if (Instruction *I = foldFPtoI(FI, *this))
1982 return I;
1983
1984 return commonCastTransforms(FI);
1985}
1986
1988 if (Instruction *R = commonCastTransforms(CI))
1989 return R;
1990 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
1991 CI.setNonNeg();
1992 return &CI;
1993 }
1994 return nullptr;
1995}
1996
1998 if (Instruction *R = commonCastTransforms(CI))
1999 return R;
2000 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2001 auto *UI =
2002 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2003 UI->setNonNeg(true);
2004 return UI;
2005 }
2006 return nullptr;
2007}
2008
2010 // If the source integer type is not the intptr_t type for this target, do a
2011 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2012 // cast to be exposed to other transforms.
2013 unsigned AS = CI.getAddressSpace();
2014 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2016 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2017 DL.getIntPtrType(CI.getContext(), AS));
2019 return new IntToPtrInst(P, CI.getType());
2020 }
2021
2023 return I;
2024
2025 return nullptr;
2026}
2027
2029 // If the destination integer type is not the intptr_t type for this target,
2030 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2031 // to be exposed to other transforms.
2033 Type *SrcTy = SrcOp->getType();
2034 Type *Ty = CI.getType();
2035 unsigned AS = CI.getPointerAddressSpace();
2036 unsigned TySize = Ty->getScalarSizeInBits();
2037 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2038 if (TySize != PtrSize) {
2039 Type *IntPtrTy =
2040 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2041 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2042 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2043 }
2044
2045 // (ptrtoint (ptrmask P, M))
2046 // -> (and (ptrtoint P), M)
2047 // This is generally beneficial as `and` is better supported than `ptrmask`.
2048 Value *Ptr, *Mask;
2049 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
2050 m_Value(Mask)))) &&
2051 Mask->getType() == Ty)
2052 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2053
2054 if (auto *GEP = dyn_cast<GEPOperator>(SrcOp)) {
2055 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2056 // While this can increase the number of instructions it doesn't actually
2057 // increase the overall complexity since the arithmetic is just part of
2058 // the GEP otherwise.
2059 if (GEP->hasOneUse() &&
2060 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2061 return replaceInstUsesWith(CI,
2062 Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
2063 /*isSigned=*/false));
2064 }
2065
2066 // (ptrtoint (gep (inttoptr Base), ...)) -> Base + Offset
2067 Value *Base;
2068 if (GEP->hasOneUse() &&
2069 match(GEP->getPointerOperand(), m_OneUse(m_IntToPtr(m_Value(Base)))) &&
2070 Base->getType() == Ty) {
2071 Value *Offset = EmitGEPOffset(GEP);
2072 auto *NewOp = BinaryOperator::CreateAdd(Base, Offset);
2073 if (GEP->hasNoUnsignedWrap() ||
2074 (GEP->hasNoUnsignedSignedWrap() &&
2076 NewOp->setHasNoUnsignedWrap(true);
2077 return NewOp;
2078 }
2079 }
2080
2081 Value *Vec, *Scalar, *Index;
2083 m_Value(Scalar), m_Value(Index)))) &&
2084 Vec->getType() == Ty) {
2085 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2086 // Convert the scalar to int followed by insert to eliminate one cast:
2087 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2088 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2089 return InsertElementInst::Create(Vec, NewCast, Index);
2090 }
2091
2092 return commonCastTransforms(CI);
2093}
2094
2095/// This input value (which is known to have vector type) is being zero extended
2096/// or truncated to the specified vector type. Since the zext/trunc is done
2097/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2098/// endianness will impact which end of the vector that is extended or
2099/// truncated.
2100///
2101/// A vector is always stored with index 0 at the lowest address, which
2102/// corresponds to the most significant bits for a big endian stored integer and
2103/// the least significant bits for little endian. A trunc/zext of an integer
2104/// impacts the big end of the integer. Thus, we need to add/remove elements at
2105/// the front of the vector for big endian targets, and the back of the vector
2106/// for little endian targets.
2107///
2108/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2109///
2110/// The source and destination vector types may have different element types.
2111static Instruction *
2113 InstCombinerImpl &IC) {
2114 // We can only do this optimization if the output is a multiple of the input
2115 // element size, or the input is a multiple of the output element size.
2116 // Convert the input type to have the same element type as the output.
2117 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2118
2119 if (SrcTy->getElementType() != DestTy->getElementType()) {
2120 // The input types don't need to be identical, but for now they must be the
2121 // same size. There is no specific reason we couldn't handle things like
2122 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2123 // there yet.
2124 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2125 DestTy->getElementType()->getPrimitiveSizeInBits())
2126 return nullptr;
2127
2128 SrcTy =
2129 FixedVectorType::get(DestTy->getElementType(),
2130 cast<FixedVectorType>(SrcTy)->getNumElements());
2131 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2132 }
2133
2134 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2135 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2136 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2137
2138 assert(SrcElts != DestElts && "Element counts should be different.");
2139
2140 // Now that the element types match, get the shuffle mask and RHS of the
2141 // shuffle to use, which depends on whether we're increasing or decreasing the
2142 // size of the input.
2143 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2144 ArrayRef<int> ShuffleMask;
2145 Value *V2;
2146
2147 if (SrcElts > DestElts) {
2148 // If we're shrinking the number of elements (rewriting an integer
2149 // truncate), just shuffle in the elements corresponding to the least
2150 // significant bits from the input and use poison as the second shuffle
2151 // input.
2152 V2 = PoisonValue::get(SrcTy);
2153 // Make sure the shuffle mask selects the "least significant bits" by
2154 // keeping elements from back of the src vector for big endian, and from the
2155 // front for little endian.
2156 ShuffleMask = ShuffleMaskStorage;
2157 if (IsBigEndian)
2158 ShuffleMask = ShuffleMask.take_back(DestElts);
2159 else
2160 ShuffleMask = ShuffleMask.take_front(DestElts);
2161 } else {
2162 // If we're increasing the number of elements (rewriting an integer zext),
2163 // shuffle in all of the elements from InVal. Fill the rest of the result
2164 // elements with zeros from a constant zero.
2165 V2 = Constant::getNullValue(SrcTy);
2166 // Use first elt from V2 when indicating zero in the shuffle mask.
2167 uint32_t NullElt = SrcElts;
2168 // Extend with null values in the "most significant bits" by adding elements
2169 // in front of the src vector for big endian, and at the back for little
2170 // endian.
2171 unsigned DeltaElts = DestElts - SrcElts;
2172 if (IsBigEndian)
2173 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2174 else
2175 ShuffleMaskStorage.append(DeltaElts, NullElt);
2176 ShuffleMask = ShuffleMaskStorage;
2177 }
2178
2179 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2180}
2181
2182static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2183 return Value % Ty->getPrimitiveSizeInBits() == 0;
2184}
2185
2186static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2187 return Value / Ty->getPrimitiveSizeInBits();
2188}
2189
2190/// V is a value which is inserted into a vector of VecEltTy.
2191/// Look through the value to see if we can decompose it into
2192/// insertions into the vector. See the example in the comment for
2193/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2194/// The type of V is always a non-zero multiple of VecEltTy's size.
2195/// Shift is the number of bits between the lsb of V and the lsb of
2196/// the vector.
2197///
2198/// This returns false if the pattern can't be matched or true if it can,
2199/// filling in Elements with the elements found here.
2200static bool collectInsertionElements(Value *V, unsigned Shift,
2201 SmallVectorImpl<Value *> &Elements,
2202 Type *VecEltTy, bool isBigEndian) {
2203 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2204 "Shift should be a multiple of the element type size");
2205
2206 // Undef values never contribute useful bits to the result.
2207 if (isa<UndefValue>(V)) return true;
2208
2209 // If we got down to a value of the right type, we win, try inserting into the
2210 // right element.
2211 if (V->getType() == VecEltTy) {
2212 // Inserting null doesn't actually insert any elements.
2213 if (Constant *C = dyn_cast<Constant>(V))
2214 if (C->isNullValue())
2215 return true;
2216
2217 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2218 if (isBigEndian)
2219 ElementIndex = Elements.size() - ElementIndex - 1;
2220
2221 // Fail if multiple elements are inserted into this slot.
2222 if (Elements[ElementIndex])
2223 return false;
2224
2225 Elements[ElementIndex] = V;
2226 return true;
2227 }
2228
2229 if (Constant *C = dyn_cast<Constant>(V)) {
2230 // Figure out the # elements this provides, and bitcast it or slice it up
2231 // as required.
2232 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2233 VecEltTy);
2234 // If the constant is the size of a vector element, we just need to bitcast
2235 // it to the right type so it gets properly inserted.
2236 if (NumElts == 1)
2238 Shift, Elements, VecEltTy, isBigEndian);
2239
2240 // Okay, this is a constant that covers multiple elements. Slice it up into
2241 // pieces and insert each element-sized piece into the vector.
2242 if (!isa<IntegerType>(C->getType()))
2243 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2244 C->getType()->getPrimitiveSizeInBits()));
2245 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2246 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2247
2248 for (unsigned i = 0; i != NumElts; ++i) {
2249 unsigned ShiftI = i * ElementSize;
2251 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2252 if (!Piece)
2253 return false;
2254
2255 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2256 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2257 isBigEndian))
2258 return false;
2259 }
2260 return true;
2261 }
2262
2263 if (!V->hasOneUse()) return false;
2264
2265 Instruction *I = dyn_cast<Instruction>(V);
2266 if (!I) return false;
2267 switch (I->getOpcode()) {
2268 default: return false; // Unhandled case.
2269 case Instruction::BitCast:
2270 if (I->getOperand(0)->getType()->isVectorTy())
2271 return false;
2272 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2273 isBigEndian);
2274 case Instruction::ZExt:
2276 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2277 VecEltTy))
2278 return false;
2279 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2280 isBigEndian);
2281 case Instruction::Or:
2282 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2283 isBigEndian) &&
2284 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2285 isBigEndian);
2286 case Instruction::Shl: {
2287 // Must be shifting by a constant that is a multiple of the element size.
2288 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2289 if (!CI) return false;
2290 Shift += CI->getZExtValue();
2291 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2292 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2293 isBigEndian);
2294 }
2295
2296 }
2297}
2298
2299
2300/// If the input is an 'or' instruction, we may be doing shifts and ors to
2301/// assemble the elements of the vector manually.
2302/// Try to rip the code out and replace it with insertelements. This is to
2303/// optimize code like this:
2304///
2305/// %tmp37 = bitcast float %inc to i32
2306/// %tmp38 = zext i32 %tmp37 to i64
2307/// %tmp31 = bitcast float %inc5 to i32
2308/// %tmp32 = zext i32 %tmp31 to i64
2309/// %tmp33 = shl i64 %tmp32, 32
2310/// %ins35 = or i64 %tmp33, %tmp38
2311/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2312///
2313/// Into two insertelements that do "buildvector{%inc, %inc5}".
2315 InstCombinerImpl &IC) {
2316 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2317 Value *IntInput = CI.getOperand(0);
2318
2319 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2320 if (!collectInsertionElements(IntInput, 0, Elements,
2321 DestVecTy->getElementType(),
2322 IC.getDataLayout().isBigEndian()))
2323 return nullptr;
2324
2325 // If we succeeded, we know that all of the element are specified by Elements
2326 // or are zero if Elements has a null entry. Recast this as a set of
2327 // insertions.
2328 Value *Result = Constant::getNullValue(CI.getType());
2329 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2330 if (!Elements[i]) continue; // Unset element.
2331
2332 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2333 IC.Builder.getInt32(i));
2334 }
2335
2336 return Result;
2337}
2338
2339/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2340/// vector followed by extract element. The backend tends to handle bitcasts of
2341/// vectors better than bitcasts of scalars because vector registers are
2342/// usually not type-specific like scalar integer or scalar floating-point.
2344 InstCombinerImpl &IC) {
2345 Value *VecOp, *Index;
2346 if (!match(BitCast.getOperand(0),
2348 return nullptr;
2349
2350 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2351 // type to extract from.
2352 Type *DestType = BitCast.getType();
2353 VectorType *VecType = cast<VectorType>(VecOp->getType());
2354 if (VectorType::isValidElementType(DestType)) {
2355 auto *NewVecType = VectorType::get(DestType, VecType);
2356 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2357 return ExtractElementInst::Create(NewBC, Index);
2358 }
2359
2360 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2361 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2362 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2363 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2364 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2365
2366 return nullptr;
2367}
2368
2369/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2371 InstCombiner::BuilderTy &Builder) {
2372 Type *DestTy = BitCast.getType();
2373 BinaryOperator *BO;
2374
2375 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2376 !BO->isBitwiseLogicOp())
2377 return nullptr;
2378
2379 // FIXME: This transform is restricted to vector types to avoid backend
2380 // problems caused by creating potentially illegal operations. If a fix-up is
2381 // added to handle that situation, we can remove this check.
2382 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2383 return nullptr;
2384
2385 if (DestTy->isFPOrFPVectorTy()) {
2386 Value *X, *Y;
2387 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2388 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2390 if (X->getType()->isFPOrFPVectorTy() &&
2391 Y->getType()->isIntOrIntVectorTy()) {
2392 Value *CastedOp =
2393 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2394 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2395 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2396 }
2397 if (X->getType()->isIntOrIntVectorTy() &&
2398 Y->getType()->isFPOrFPVectorTy()) {
2399 Value *CastedOp =
2400 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2401 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2402 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2403 }
2404 }
2405 return nullptr;
2406 }
2407
2408 if (!DestTy->isIntOrIntVectorTy())
2409 return nullptr;
2410
2411 Value *X;
2412 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2413 X->getType() == DestTy && !isa<Constant>(X)) {
2414 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2415 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2416 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2417 }
2418
2419 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2420 X->getType() == DestTy && !isa<Constant>(X)) {
2421 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2422 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2423 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2424 }
2425
2426 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2427 // constant. This eases recognition of special constants for later ops.
2428 // Example:
2429 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2430 Constant *C;
2431 if (match(BO->getOperand(1), m_Constant(C))) {
2432 // bitcast (logic X, C) --> logic (bitcast X, C')
2433 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2434 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2435 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2436 }
2437
2438 return nullptr;
2439}
2440
2441/// Change the type of a select if we can eliminate a bitcast.
2443 InstCombiner::BuilderTy &Builder) {
2444 Value *Cond, *TVal, *FVal;
2445 if (!match(BitCast.getOperand(0),
2446 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2447 return nullptr;
2448
2449 // A vector select must maintain the same number of elements in its operands.
2450 Type *CondTy = Cond->getType();
2451 Type *DestTy = BitCast.getType();
2452 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2453 if (!DestTy->isVectorTy() ||
2454 CondVTy->getElementCount() !=
2455 cast<VectorType>(DestTy)->getElementCount())
2456 return nullptr;
2457
2458 // FIXME: This transform is restricted from changing the select between
2459 // scalars and vectors to avoid backend problems caused by creating
2460 // potentially illegal operations. If a fix-up is added to handle that
2461 // situation, we can remove this check.
2462 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2463 return nullptr;
2464
2465 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2466 Value *X;
2467 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2468 !isa<Constant>(X)) {
2469 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2470 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2471 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2472 }
2473
2474 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2475 !isa<Constant>(X)) {
2476 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2477 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2478 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2479 }
2480
2481 return nullptr;
2482}
2483
2484/// Check if all users of CI are StoreInsts.
2485static bool hasStoreUsersOnly(CastInst &CI) {
2486 for (User *U : CI.users()) {
2487 if (!isa<StoreInst>(U))
2488 return false;
2489 }
2490 return true;
2491}
2492
2493/// This function handles following case
2494///
2495/// A -> B cast
2496/// PHI
2497/// B -> A cast
2498///
2499/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2500/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2501Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2502 PHINode *PN) {
2503 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2504 if (hasStoreUsersOnly(CI))
2505 return nullptr;
2506
2507 Value *Src = CI.getOperand(0);
2508 Type *SrcTy = Src->getType(); // Type B
2509 Type *DestTy = CI.getType(); // Type A
2510
2511 SmallVector<PHINode *, 4> PhiWorklist;
2512 SmallSetVector<PHINode *, 4> OldPhiNodes;
2513
2514 // Find all of the A->B casts and PHI nodes.
2515 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2516 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2517 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2518 PhiWorklist.push_back(PN);
2519 OldPhiNodes.insert(PN);
2520 while (!PhiWorklist.empty()) {
2521 auto *OldPN = PhiWorklist.pop_back_val();
2522 for (Value *IncValue : OldPN->incoming_values()) {
2523 if (isa<Constant>(IncValue))
2524 continue;
2525
2526 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2527 // If there is a sequence of one or more load instructions, each loaded
2528 // value is used as address of later load instruction, bitcast is
2529 // necessary to change the value type, don't optimize it. For
2530 // simplicity we give up if the load address comes from another load.
2531 Value *Addr = LI->getOperand(0);
2532 if (Addr == &CI || isa<LoadInst>(Addr))
2533 return nullptr;
2534 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2535 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2536 // TODO: Remove this check when bitcast between vector and x86_amx
2537 // is replaced with a specific intrinsic.
2538 if (DestTy->isX86_AMXTy())
2539 return nullptr;
2540 if (LI->hasOneUse() && LI->isSimple())
2541 continue;
2542 // If a LoadInst has more than one use, changing the type of loaded
2543 // value may create another bitcast.
2544 return nullptr;
2545 }
2546
2547 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2548 if (OldPhiNodes.insert(PNode))
2549 PhiWorklist.push_back(PNode);
2550 continue;
2551 }
2552
2553 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2554 // We can't handle other instructions.
2555 if (!BCI)
2556 return nullptr;
2557
2558 // Verify it's a A->B cast.
2559 Type *TyA = BCI->getOperand(0)->getType();
2560 Type *TyB = BCI->getType();
2561 if (TyA != DestTy || TyB != SrcTy)
2562 return nullptr;
2563 }
2564 }
2565
2566 // Check that each user of each old PHI node is something that we can
2567 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2568 for (auto *OldPN : OldPhiNodes) {
2569 for (User *V : OldPN->users()) {
2570 if (auto *SI = dyn_cast<StoreInst>(V)) {
2571 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2572 return nullptr;
2573 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2574 // Verify it's a B->A cast.
2575 Type *TyB = BCI->getOperand(0)->getType();
2576 Type *TyA = BCI->getType();
2577 if (TyA != DestTy || TyB != SrcTy)
2578 return nullptr;
2579 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2580 // As long as the user is another old PHI node, then even if we don't
2581 // rewrite it, the PHI web we're considering won't have any users
2582 // outside itself, so it'll be dead.
2583 if (!OldPhiNodes.contains(PHI))
2584 return nullptr;
2585 } else {
2586 return nullptr;
2587 }
2588 }
2589 }
2590
2591 // For each old PHI node, create a corresponding new PHI node with a type A.
2593 for (auto *OldPN : OldPhiNodes) {
2594 Builder.SetInsertPoint(OldPN);
2595 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2596 NewPNodes[OldPN] = NewPN;
2597 }
2598
2599 // Fill in the operands of new PHI nodes.
2600 for (auto *OldPN : OldPhiNodes) {
2601 PHINode *NewPN = NewPNodes[OldPN];
2602 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2603 Value *V = OldPN->getOperand(j);
2604 Value *NewV = nullptr;
2605 if (auto *C = dyn_cast<Constant>(V)) {
2606 NewV = ConstantExpr::getBitCast(C, DestTy);
2607 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2608 // Explicitly perform load combine to make sure no opposing transform
2609 // can remove the bitcast in the meantime and trigger an infinite loop.
2611 NewV = combineLoadToNewType(*LI, DestTy);
2612 // Remove the old load and its use in the old phi, which itself becomes
2613 // dead once the whole transform finishes.
2616 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2617 NewV = BCI->getOperand(0);
2618 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2619 NewV = NewPNodes[PrevPN];
2620 }
2621 assert(NewV);
2622 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2623 }
2624 }
2625
2626 // Traverse all accumulated PHI nodes and process its users,
2627 // which are Stores and BitcCasts. Without this processing
2628 // NewPHI nodes could be replicated and could lead to extra
2629 // moves generated after DeSSA.
2630 // If there is a store with type B, change it to type A.
2631
2632
2633 // Replace users of BitCast B->A with NewPHI. These will help
2634 // later to get rid off a closure formed by OldPHI nodes.
2635 Instruction *RetVal = nullptr;
2636 for (auto *OldPN : OldPhiNodes) {
2637 PHINode *NewPN = NewPNodes[OldPN];
2638 for (User *V : make_early_inc_range(OldPN->users())) {
2639 if (auto *SI = dyn_cast<StoreInst>(V)) {
2640 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2642 auto *NewBC =
2643 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2644 SI->setOperand(0, NewBC);
2645 Worklist.push(SI);
2646 assert(hasStoreUsersOnly(*NewBC));
2647 }
2648 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2649 Type *TyB = BCI->getOperand(0)->getType();
2650 Type *TyA = BCI->getType();
2651 assert(TyA == DestTy && TyB == SrcTy);
2652 (void) TyA;
2653 (void) TyB;
2654 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2655 if (BCI == &CI)
2656 RetVal = I;
2657 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2658 assert(OldPhiNodes.contains(PHI));
2659 (void) PHI;
2660 } else {
2661 llvm_unreachable("all uses should be handled");
2662 }
2663 }
2664 }
2665
2666 return RetVal;
2667}
2668
2670 // If the operands are integer typed then apply the integer transforms,
2671 // otherwise just apply the common ones.
2672 Value *Src = CI.getOperand(0);
2673 Type *SrcTy = Src->getType();
2674 Type *DestTy = CI.getType();
2675
2676 // Get rid of casts from one type to the same type. These are useless and can
2677 // be replaced by the operand.
2678 if (DestTy == Src->getType())
2679 return replaceInstUsesWith(CI, Src);
2680
2681 if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2682 // Beware: messing with this target-specific oddity may cause trouble.
2683 if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2684 Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2685 return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2687 }
2688
2689 if (isa<IntegerType>(SrcTy)) {
2690 // If this is a cast from an integer to vector, check to see if the input
2691 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2692 // the casts with a shuffle and (potentially) a bitcast.
2693 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2694 CastInst *SrcCast = cast<CastInst>(Src);
2695 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2696 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2698 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2699 return I;
2700 }
2701
2702 // If the input is an 'or' instruction, we may be doing shifts and ors to
2703 // assemble the elements of the vector manually. Try to rip the code out
2704 // and replace it with insertelements.
2705 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2706 return replaceInstUsesWith(CI, V);
2707 }
2708 }
2709
2710 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2711 if (SrcVTy->getNumElements() == 1) {
2712 // If our destination is not a vector, then make this a straight
2713 // scalar-scalar cast.
2714 if (!DestTy->isVectorTy()) {
2715 Value *Elem =
2718 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2719 }
2720
2721 // Otherwise, see if our source is an insert. If so, then use the scalar
2722 // component directly:
2723 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2724 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2725 return new BitCastInst(InsElt->getOperand(1), DestTy);
2726 }
2727
2728 // Convert an artificial vector insert into more analyzable bitwise logic.
2729 unsigned BitWidth = DestTy->getScalarSizeInBits();
2730 Value *X, *Y;
2731 uint64_t IndexC;
2733 m_Value(Y), m_ConstantInt(IndexC)))) &&
2734 DestTy->isIntegerTy() && X->getType() == DestTy &&
2735 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2736 // Adjust for big endian - the LSBs are at the high index.
2737 if (DL.isBigEndian())
2738 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2739
2740 // We only handle (endian-normalized) insert to index 0. Any other insert
2741 // would require a left-shift, so that is an extra instruction.
2742 if (IndexC == 0) {
2743 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2744 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2745 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2746 Value *AndX = Builder.CreateAnd(X, MaskC);
2747 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2748 return BinaryOperator::CreateOr(AndX, ZextY);
2749 }
2750 }
2751 }
2752
2753 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2754 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2755 // a bitcast to a vector with the same # elts.
2756 Value *ShufOp0 = Shuf->getOperand(0);
2757 Value *ShufOp1 = Shuf->getOperand(1);
2758 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2759 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2760 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2761 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2762 ShufElts == SrcVecElts) {
2763 BitCastInst *Tmp;
2764 // If either of the operands is a cast from CI.getType(), then
2765 // evaluating the shuffle in the casted destination's type will allow
2766 // us to eliminate at least one cast.
2767 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2768 Tmp->getOperand(0)->getType() == DestTy) ||
2769 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2770 Tmp->getOperand(0)->getType() == DestTy)) {
2771 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2772 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2773 // Return a new shuffle vector. Use the same element ID's, as we
2774 // know the vector types match #elts.
2775 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2776 }
2777 }
2778
2779 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2780 // as a byte/bit swap:
2781 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2782 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2783 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2784 Shuf->hasOneUse() && Shuf->isReverse()) {
2785 unsigned IntrinsicNum = 0;
2786 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2787 SrcTy->getScalarSizeInBits() == 8) {
2788 IntrinsicNum = Intrinsic::bswap;
2789 } else if (SrcTy->getScalarSizeInBits() == 1) {
2790 IntrinsicNum = Intrinsic::bitreverse;
2791 }
2792 if (IntrinsicNum != 0) {
2793 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2794 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2795 Function *BswapOrBitreverse =
2796 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2797 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2798 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2799 }
2800 }
2801 }
2802
2803 // Handle the A->B->A cast, and there is an intervening PHI node.
2804 if (PHINode *PN = dyn_cast<PHINode>(Src))
2805 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2806 return I;
2807
2808 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2809 return I;
2810
2812 return I;
2813
2815 return I;
2816
2817 return commonCastTransforms(CI);
2818}
2819
2821 return commonCastTransforms(CI);
2822}
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Type * shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat)
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Type * getMinimumFPType(Value *V, bool PreferBFloat)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
int32_t exactLogBase2() const
Definition: APInt.h:1741
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:465
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:271
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:275
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:699
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:701
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2606
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2283
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2241
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:792
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:298
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
bool isBigEndian() const
Definition: DataLayout.h:239
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:89
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2465
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2044
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1659
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2402
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2026
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1671
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2166
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2201
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1461
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1524
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:375
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:452
DominatorTree & DT
Definition: InstCombiner.h:74
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:431
BuilderTy & Builder
Definition: InstCombiner.h:60
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:447
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:342
unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:457
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:323
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static Type * getBFloatTy(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:201
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
'undef' values are things that do not have specified contents.
Definition: Constants.h:1385
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
This class represents zero extension of integer types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
Definition: PatternMatch.h:980
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
VScaleVal_match m_VScale()
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:698
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:359
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2715
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, unsigned Depth, const SimplifyQuery &SQ)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:250
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:277
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:274
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:275
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:337
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
SimplifyQuery getWithInstruction(const Instruction *I) const